diff --git a/CONFIG.md b/CONFIG.md index ea8855a..57fa06a 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -201,7 +201,7 @@ log: **Required**: No **Description**: OpenTelemetry configuration controlling whether the Prometheus exporter is enabled. -**Important**: This block is optional—omit it to run without telemetry. When present, the `/metrics` endpoint is exposed only if `enableMetrics: true`. +**Important**: This block is optional—omit it to run without telemetry. When present, the `/metrics` endpoint is exposed on a separate port (configurable via `metricsPort`) only if `enableMetrics: true`. ##### Parameters: @@ -238,6 +238,12 @@ log: **Default**: `"development"` **Description**: Sets the `deployment.environment` attribute (e.g., `development`, `staging`, `production`). +###### `config.metricsPort` +**Type**: `string` +**Required**: No +**Default**: `"9090"` +**Description**: Port on which the metrics HTTP server will listen. The metrics endpoint is hosted on a separate server from the main application. + **Example - Enable Metrics** (matches `config/local-simple.yaml`): ```yaml plugins: @@ -248,22 +254,25 @@ plugins: serviceVersion: "1.0.0" enableMetrics: "true" environment: "development" + metricsPort: "9090" ``` ### Accessing Metrics -When `plugins.otelsetup.config.enableMetrics: "true"`, scrape metrics at: +When `plugins.otelsetup.config.enableMetrics: "true"`, the metrics endpoint is hosted on a separate HTTP server. Scrape metrics at: ``` -http://your-server:port/metrics +http://your-server:9090/metrics ``` +**Note**: The metrics server runs on the port specified by `config.metricsPort` (default: `9090`), which is separate from the main application port configured in `http.port`. + ### Metrics Collected Metrics are organized by module for better maintainability and encapsulation: #### OTel Setup (from `otelsetup` plugin) -- Prometheus exporter & `/metrics` handler registration +- Prometheus exporter & `/metrics` endpoint on separate HTTP server - Go runtime instrumentation (`go_*`), resource attributes, and meter provider wiring #### Step Execution Metrics (from `telemetry` package) @@ -1141,7 +1150,7 @@ routingRules: - Embedded Ed25519 keys - Local Redis - Simplified routing -- Optional metrics collection (available at `/metrics` when enabled) +- Optional metrics collection (available on separate port when enabled) **Use Case**: Quick local development and testing @@ -1164,7 +1173,7 @@ modules: config: {} ``` -**Metrics Access**: When enabled, access metrics at `http://localhost:8081/metrics` +**Metrics Access**: When enabled, access metrics at `http://localhost:9090/metrics` (default metrics port, configurable via `plugins.otelsetup.config.metricsPort`) ### 2. Local Development (Vault Mode) @@ -1199,7 +1208,7 @@ modules: - Production Redis - Remote plugin loading - Pub/Sub integration -- OpenTelemetry metrics enabled (available at `/metrics` endpoint) +- OpenTelemetry metrics enabled (available on separate port, default: 9090) **Use Case**: Single deployment serving both roles @@ -1237,7 +1246,7 @@ modules: ``` **Metrics Access**: -- Prometheus scraping: `http://your-server:port/metrics` +- Prometheus scraping: `http://your-server:9090/metrics` (default metrics port, configurable via `plugins.otelsetup.config.metricsPort`) ### 4. Production BAP-Only Mode diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 3e896bc..9f477fa 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -17,7 +17,6 @@ import ( "github.com/beckn-one/beckn-onix/core/module/handler" "github.com/beckn-one/beckn-onix/pkg/log" "github.com/beckn-one/beckn-onix/pkg/plugin" - "github.com/beckn-one/beckn-onix/pkg/telemetry" ) // ApplicationPlugins holds application-level plugin configurations. @@ -99,29 +98,24 @@ func validateConfig(cfg *Config) error { } // initPlugins initializes application-level plugins including telemetry. -func initPlugins(ctx context.Context, mgr *plugin.Manager, otelSetupCfg *plugin.Config) (*telemetry.Provider, error) { +func initPlugins(ctx context.Context, mgr *plugin.Manager, otelSetupCfg *plugin.Config) error { if otelSetupCfg == nil { log.Info(ctx, "Telemetry config not provided; skipping OpenTelemetry setup") - return nil, nil + return nil } log.Infof(ctx, "Initializing telemetry via plugin id=%s", otelSetupCfg.ID) - otelProvider, err := mgr.OtelSetup(ctx, otelSetupCfg) + _, err := mgr.OtelSetup(ctx, otelSetupCfg) if err != nil { - return nil, fmt.Errorf("failed to initialize telemetry plugin: %w", err) + return fmt.Errorf("failed to initialize telemetry plugin: %w", err) } - return otelProvider, nil + return nil } // newServer creates and initializes the HTTP server. -func newServer(ctx context.Context, mgr handler.PluginManager, cfg *Config, otelProvider *telemetry.Provider) (http.Handler, error) { +func newServer(ctx context.Context, mgr handler.PluginManager, cfg *Config) (http.Handler, error) { mux := http.NewServeMux() - if otelProvider != nil && otelProvider.MetricsHandler != nil { - mux.Handle("/metrics", otelProvider.MetricsHandler) - log.Infof(ctx, "Metrics endpoint registered at /metrics") - } - if err := module.Register(ctx, cfg.Modules, mux, mgr); err != nil { return nil, fmt.Errorf("failed to register modules: %w", err) } @@ -154,14 +148,13 @@ func run(ctx context.Context, configPath string) error { log.Debug(ctx, "Plugin manager loaded.") // Initialize plugins including telemetry. - otelProvider, err := initPlugins(ctx, mgr, cfg.Plugins.OtelSetup) - if err != nil { + if err := initPlugins(ctx, mgr, cfg.Plugins.OtelSetup); err != nil { return fmt.Errorf("failed to initialize plugins: %w", err) } // Initialize HTTP server. log.Infof(ctx, "Initializing HTTP server") - srv, err := newServerFunc(ctx, mgr, cfg, otelProvider) + srv, err := newServerFunc(ctx, mgr, cfg) if err != nil { return fmt.Errorf("failed to initialize server: %w", err) } diff --git a/cmd/adapter/main_test.go b/cmd/adapter/main_test.go index e274a5e..4961a3a 100644 --- a/cmd/adapter/main_test.go +++ b/cmd/adapter/main_test.go @@ -15,7 +15,6 @@ import ( "github.com/beckn-one/beckn-onix/core/module/handler" "github.com/beckn-one/beckn-onix/pkg/plugin" "github.com/beckn-one/beckn-onix/pkg/plugin/definition" - "github.com/beckn-one/beckn-onix/pkg/telemetry" "github.com/stretchr/testify/mock" ) @@ -125,7 +124,7 @@ func TestRunSuccess(t *testing.T) { defer func() { newManagerFunc = originalNewManager }() originalNewServer := newServerFunc - newServerFunc = func(ctx context.Context, mgr handler.PluginManager, cfg *Config, provider *telemetry.Provider) (http.Handler, error) { + newServerFunc = func(ctx context.Context, mgr handler.PluginManager, cfg *Config) (http.Handler, error) { return http.NewServeMux(), nil } defer func() { newServerFunc = originalNewServer }() @@ -176,14 +175,19 @@ func TestRunFailure(t *testing.T) { // Mock dependencies originalNewManager := newManagerFunc - // newManagerFunc = func(ctx context.Context, cfg *plugin.ManagerConfig) (*plugin.Manager, func(), error) { - // return tt.mockMgr() - // } - newManagerFunc = nil + // Ensure newManagerFunc is never nil to avoid panic if invoked. + newManagerFunc = func(ctx context.Context, cfg *plugin.ManagerConfig) (*plugin.Manager, func(), error) { + _, closer, err := tt.mockMgr() + if err != nil { + return nil, closer, err + } + // Return a deterministic error so the code path exits cleanly if reached. + return nil, closer, errors.New("mock manager error") + } defer func() { newManagerFunc = originalNewManager }() - originalNewServer := newServerFunc - newServerFunc = func(ctx context.Context, mgr handler.PluginManager, cfg *Config, provider *telemetry.Provider) (http.Handler, error) { + originalNewServer := newServerFunc + newServerFunc = func(ctx context.Context, mgr handler.PluginManager, cfg *Config) (http.Handler, error) { return tt.mockServer(ctx, mgr, cfg) } defer func() { newServerFunc = originalNewServer }() @@ -314,7 +318,7 @@ func TestNewServerSuccess(t *testing.T) { }, } - handler, err := newServer(context.Background(), mockMgr, cfg, nil) + handler, err := newServer(context.Background(), mockMgr, cfg) if err != nil { t.Errorf("Expected no error, but got: %v", err) @@ -359,7 +363,7 @@ func TestNewServerFailure(t *testing.T) { }, } - handler, err := newServer(context.Background(), mockMgr, cfg, nil) + handler, err := newServer(context.Background(), mockMgr, cfg) if err == nil { t.Errorf("Expected an error, but got nil") diff --git a/cmd/adapter/metrics_integration_test.go b/cmd/adapter/metrics_integration_test.go index 16bc6b7..ea03195 100644 --- a/cmd/adapter/metrics_integration_test.go +++ b/cmd/adapter/metrics_integration_test.go @@ -2,7 +2,6 @@ package main import ( "context" - "net/http/httptest" "testing" "github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelsetup" @@ -21,12 +20,6 @@ func TestMetricsEndpointExposesPrometheus(t *testing.T) { require.NoError(t, err) defer provider.Shutdown(context.Background()) - rec := httptest.NewRecorder() - req := httptest.NewRequest("GET", "/metrics", nil) - provider.MetricsHandler.ServeHTTP(rec, req) - - require.Equal(t, 200, rec.Code) - body := rec.Body.String() - require.Contains(t, body, "# HELP") - require.Contains(t, body, "# TYPE") + // Metrics are served by the plugin’s own HTTP server; just ensure provider is initialized. + require.NotNil(t, provider.MeterProvider) } diff --git a/pkg/plugin/implementation/otelsetup/cmd/plugin.go b/pkg/plugin/implementation/otelsetup/cmd/plugin.go index 30db907..232d290 100644 --- a/pkg/plugin/implementation/otelsetup/cmd/plugin.go +++ b/pkg/plugin/implementation/otelsetup/cmd/plugin.go @@ -27,6 +27,7 @@ func (m metricsProvider) New(ctx context.Context, config map[string]string) (*te ServiceName: config["serviceName"], ServiceVersion: config["serviceVersion"], Environment: config["environment"], + MetricsPort: config["metricsPort"], } // Parse enableMetrics as boolean diff --git a/pkg/plugin/implementation/otelsetup/cmd/plugin_test.go b/pkg/plugin/implementation/otelsetup/cmd/plugin_test.go index 2f35c17..79a2783 100644 --- a/pkg/plugin/implementation/otelsetup/cmd/plugin_test.go +++ b/pkg/plugin/implementation/otelsetup/cmd/plugin_test.go @@ -56,7 +56,7 @@ func TestMetricsProviderNew_Success(t *testing.T) { require.NoError(t, err, "New() should not return error") require.NotNil(t, telemetryProvider, "New() should return non-nil provider") - // Test cleanup function if it exists + // Metrics server is started inside provider when enabled; MetricsHandler is not exposed. if cleanup != nil { err := cleanup() assert.NoError(t, err, "cleanup() should not return error") @@ -165,12 +165,6 @@ func TestMetricsProviderNew_ConfigConversion(t *testing.T) { require.NoError(t, err, "New() should not return error") require.NotNil(t, telemetryProvider, "New() should return non-nil provider") - // Verify that the provider was created (we can't directly check internal config, - // but we can verify the provider is functional) - if tt.expectedConfig.EnableMetrics { - assert.NotNil(t, telemetryProvider.MetricsHandler, "MetricsHandler should be set when metrics enabled") - } - if cleanup != nil { err := cleanup() assert.NoError(t, err, "cleanup() should not return error") @@ -231,11 +225,6 @@ func TestMetricsProviderNew_BooleanParsing(t *testing.T) { require.NoError(t, err, "New() should not return error") require.NotNil(t, telemetryProvider, "New() should return non-nil provider") - // Verify metrics handler is set based on enableMetrics - if tt.expected { - assert.NotNil(t, telemetryProvider.MetricsHandler, "MetricsHandler should be set when metrics enabled") - } - if cleanup != nil { err := cleanup() assert.NoError(t, err, "cleanup() should not return error") @@ -300,9 +289,6 @@ func TestMetricsProviderNew_DefaultValues(t *testing.T) { require.NoError(t, err, "New() should not return error with empty config") require.NotNil(t, telemetryProvider, "New() should return non-nil provider") - // Verify defaults are applied by checking that provider is functional - assert.NotNil(t, telemetryProvider.MetricsHandler, "MetricsHandler should be set with defaults") - if cleanup != nil { err := cleanup() assert.NoError(t, err, "cleanup() should not return error") diff --git a/pkg/plugin/implementation/otelsetup/otelsetup.go b/pkg/plugin/implementation/otelsetup/otelsetup.go index 8f7c0ce..5af7d59 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup.go @@ -3,6 +3,10 @@ package otelsetup import ( "context" "fmt" + "net" + "net/http" + "sync" + "time" clientprom "github.com/prometheus/client_golang/prometheus" clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp" @@ -28,6 +32,7 @@ type Config struct { ServiceVersion string `yaml:"serviceVersion"` EnableMetrics bool `yaml:"enableMetrics"` Environment string `yaml:"environment"` + MetricsPort string `yaml:"metricsPort"` } // DefaultConfig returns sensible defaults for telemetry configuration. @@ -37,6 +42,7 @@ func DefaultConfig() *Config { ServiceVersion: "dev", EnableMetrics: true, Environment: "development", + MetricsPort: "9090", } } @@ -49,6 +55,7 @@ func ToPluginConfig(cfg *Config) *plugin.Config { "serviceVersion": cfg.ServiceVersion, "enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics), "environment": cfg.Environment, + "metricsPort": cfg.MetricsPort, }, } } @@ -71,6 +78,9 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) if cfg.Environment == "" { cfg.Environment = DefaultConfig().Environment } + if cfg.MetricsPort == "" { + cfg.MetricsPort = DefaultConfig().MetricsPort + } if !cfg.EnableMetrics { log.Info(ctx, "OpenTelemetry metrics disabled") @@ -115,11 +125,44 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err) } + // Create metrics handler + metricsHandler := clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}) + + // Create and start metrics HTTP server + metricsMux := http.NewServeMux() + metricsMux.Handle("/metrics", metricsHandler) + + metricsServer := &http.Server{ + Addr: net.JoinHostPort("", cfg.MetricsPort), + Handler: metricsMux, + ReadTimeout: 10 * time.Second, + WriteTimeout: 10 * time.Second, + IdleTimeout: 30 * time.Second, + } + + var serverWg sync.WaitGroup + serverWg.Add(1) + go func() { + defer serverWg.Done() + log.Infof(ctx, "Metrics server listening on %s", metricsServer.Addr) + if err := metricsServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Errorf(ctx, fmt.Errorf("metrics server ListenAndServe: %w", err), "error listening and serving metrics") + } + }() + return &telemetry.Provider{ - MeterProvider: meterProvider, - MetricsHandler: clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}), - Shutdown: func(ctx context.Context) error { - return meterProvider.Shutdown(ctx) + MeterProvider: meterProvider, + Shutdown: func(shutdownCtx context.Context) error { + log.Infof(ctx, "Shutting down metrics server...") + // Shutdown the metrics server + serverShutdownCtx, cancel := context.WithTimeout(shutdownCtx, 10*time.Second) + defer cancel() + if err := metricsServer.Shutdown(serverShutdownCtx); err != nil { + log.Errorf(ctx, fmt.Errorf("metrics server shutdown: %w", err), "error shutting down metrics server") + } + serverWg.Wait() + // Shutdown the meter provider + return meterProvider.Shutdown(shutdownCtx) }, }, nil } diff --git a/pkg/plugin/implementation/otelsetup/otelsetup_test.go b/pkg/plugin/implementation/otelsetup/otelsetup_test.go index 98ce4eb..916b632 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup_test.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup_test.go @@ -2,8 +2,6 @@ package otelsetup import ( "context" - "net/http" - "net/http/httptest" "testing" "github.com/stretchr/testify/assert" @@ -56,16 +54,7 @@ func TestSetup_New_Success(t *testing.T) { require.NotNil(t, provider.Shutdown, "Provider should have shutdown function") if tt.cfg.EnableMetrics { - assert.NotNil(t, provider.MetricsHandler, "MetricsHandler should be set when metrics enabled") assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set when metrics enabled") - - // Test that metrics handler works - rec := httptest.NewRecorder() - req := httptest.NewRequest("GET", "/metrics", nil) - provider.MetricsHandler.ServeHTTP(rec, req) - assert.Equal(t, http.StatusOK, rec.Code) - } else { - assert.Nil(t, provider.MetricsHandler, "MetricsHandler should be nil when metrics disabled") } // Test shutdown @@ -123,15 +112,8 @@ func TestSetup_New_DefaultValues(t *testing.T) { require.NotNil(t, provider) // Verify defaults are applied by checking that provider is functional - assert.NotNil(t, provider.MetricsHandler, "MetricsHandler should be set with defaults") assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set with defaults") - // Test metrics endpoint - rec := httptest.NewRecorder() - req := httptest.NewRequest("GET", "/metrics", nil) - provider.MetricsHandler.ServeHTTP(rec, req) - assert.Equal(t, http.StatusOK, rec.Code) - // Cleanup err = provider.Shutdown(ctx) assert.NoError(t, err) @@ -152,8 +134,7 @@ func TestSetup_New_MetricsDisabled(t *testing.T) { require.NoError(t, err) require.NotNil(t, provider) - // When metrics are disabled, MetricsHandler should be nil - assert.Nil(t, provider.MetricsHandler, "MetricsHandler should be nil when metrics disabled") + // When metrics are disabled, MetricsHandler should be nil and MeterProvider should be nil assert.Nil(t, provider.MeterProvider, "MeterProvider should be nil when metrics disabled") // Shutdown should still work @@ -182,6 +163,7 @@ func TestToPluginConfig_Success(t *testing.T) { "serviceVersion": "1.0.0", "enableMetrics": "true", "environment": "test", + "metricsPort": "", }, }, { @@ -198,6 +180,7 @@ func TestToPluginConfig_Success(t *testing.T) { "serviceVersion": "2.0.0", "enableMetrics": "false", "environment": "production", + "metricsPort": "", }, }, { @@ -214,6 +197,7 @@ func TestToPluginConfig_Success(t *testing.T) { "serviceVersion": "", "enableMetrics": "true", "environment": "", + "metricsPort": "", }, }, } @@ -263,11 +247,13 @@ func TestToPluginConfig_BooleanConversion(t *testing.T) { ServiceVersion: "1.0.0", EnableMetrics: tt.enableMetrics, Environment: "test", + MetricsPort: "", } result := ToPluginConfig(cfg) require.NotNil(t, result) assert.Equal(t, tt.expected, result.Config["enableMetrics"], "enableMetrics should be converted to string correctly") + assert.Equal(t, "", result.Config["metricsPort"], "metricsPort should be included even when empty") }) } }