Merge pull request #568 from Beckn-One/feat/observability

Feat/observability
This commit is contained in:
Mayuresh A Nirhali
2025-12-18 15:27:47 +05:30
committed by GitHub
31 changed files with 2033 additions and 45 deletions

View File

@@ -0,0 +1,15 @@
package definition
import (
"context"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
)
// OtelSetupMetricsProvider encapsulates initialization of OpenTelemetry metrics
// providers. Implementations wire exporters and return a Provider that the core
// application can manage.
type OtelSetupMetricsProvider interface {
// New initializes a new telemetry provider instance with the given configuration.
New(ctx context.Context, config map[string]string) (*telemetry.Provider, func() error, error)
}

View File

@@ -7,7 +7,12 @@ import (
"os"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
"github.com/redis/go-redis/extra/redisotel/v9"
"github.com/redis/go-redis/v9"
)
@@ -31,7 +36,8 @@ type Config struct {
// Cache wraps a Redis client to provide basic caching operations.
type Cache struct {
Client RedisClient
Client RedisClient
metrics *CacheMetrics
}
// Error variables to describe common failure modes.
@@ -77,26 +83,80 @@ func New(ctx context.Context, cfg *Config) (*Cache, func() error, error) {
return nil, nil, fmt.Errorf("%w: %v", ErrConnectionFail, err)
}
// Enable OpenTelemetry instrumentation for tracing and metrics
// This will automatically collect Redis operation metrics and expose them via /metrics endpoint
if redisClient, ok := client.(*redis.Client); ok {
if err := redisotel.InstrumentTracing(redisClient); err != nil {
// Log error but don't fail - instrumentation is optional
log.Debugf(ctx, "Failed to instrument Redis tracing: %v", err)
}
if err := redisotel.InstrumentMetrics(redisClient); err != nil {
// Log error but don't fail - instrumentation is optional
log.Debugf(ctx, "Failed to instrument Redis metrics: %v", err)
}
}
metrics, _ := GetCacheMetrics(ctx)
log.Infof(ctx, "Cache connection to Redis established successfully")
return &Cache{Client: client}, client.Close, nil
return &Cache{Client: client, metrics: metrics}, client.Close, nil
}
// Get retrieves the value for the specified key from Redis.
func (c *Cache) Get(ctx context.Context, key string) (string, error) {
return c.Client.Get(ctx, key).Result()
result, err := c.Client.Get(ctx, key).Result()
if c.metrics != nil {
attrs := []attribute.KeyValue{
telemetry.AttrOperation.String("get"),
}
switch {
case err == redis.Nil:
c.metrics.CacheMissesTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("miss"))...))
case err != nil:
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("error"))...))
default:
c.metrics.CacheHitsTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("hit"))...))
}
}
return result, err
}
// Set stores the given key-value pair in Redis with the specified TTL (time to live).
func (c *Cache) Set(ctx context.Context, key, value string, ttl time.Duration) error {
return c.Client.Set(ctx, key, value, ttl).Err()
err := c.Client.Set(ctx, key, value, ttl).Err()
c.recordOperation(ctx, "set", err)
return err
}
// Delete removes the specified key from Redis.
func (c *Cache) Delete(ctx context.Context, key string) error {
return c.Client.Del(ctx, key).Err()
err := c.Client.Del(ctx, key).Err()
c.recordOperation(ctx, "delete", err)
return err
}
// Clear removes all keys in the currently selected Redis database.
func (c *Cache) Clear(ctx context.Context) error {
return c.Client.FlushDB(ctx).Err()
}
func (c *Cache) recordOperation(ctx context.Context, op string, err error) {
if c.metrics == nil {
return
}
status := "success"
if err != nil {
status = "error"
}
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(
telemetry.AttrOperation.String(op),
telemetry.AttrStatus.String(status),
))
}

View File

@@ -0,0 +1,69 @@
package cache
import (
"context"
"fmt"
"sync"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/metric"
)
// CacheMetrics exposes cache-related metric instruments.
type CacheMetrics struct {
CacheOperationsTotal metric.Int64Counter
CacheHitsTotal metric.Int64Counter
CacheMissesTotal metric.Int64Counter
}
var (
cacheMetricsInstance *CacheMetrics
cacheMetricsOnce sync.Once
cacheMetricsErr error
)
// GetCacheMetrics lazily initializes cache metric instruments and returns a cached reference.
func GetCacheMetrics(ctx context.Context) (*CacheMetrics, error) {
cacheMetricsOnce.Do(func() {
cacheMetricsInstance, cacheMetricsErr = newCacheMetrics()
})
return cacheMetricsInstance, cacheMetricsErr
}
func newCacheMetrics() (*CacheMetrics, error) {
meter := otel.GetMeterProvider().Meter(
"github.com/beckn-one/beckn-onix/cache",
metric.WithInstrumentationVersion("1.0.0"),
)
m := &CacheMetrics{}
var err error
if m.CacheOperationsTotal, err = meter.Int64Counter(
"onix_cache_operations_total",
metric.WithDescription("Redis cache operations"),
metric.WithUnit("{operation}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_operations_total: %w", err)
}
if m.CacheHitsTotal, err = meter.Int64Counter(
"onix_cache_hits_total",
metric.WithDescription("Redis cache hits"),
metric.WithUnit("{hit}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_hits_total: %w", err)
}
if m.CacheMissesTotal, err = meter.Int64Counter(
"onix_cache_misses_total",
metric.WithDescription("Redis cache misses"),
metric.WithUnit("{miss}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_misses_total: %w", err)
}
return m, nil
}

View File

@@ -0,0 +1,79 @@
package main
import (
"context"
"errors"
"strconv"
"time"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelsetup"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
)
// metricsProvider implements the OtelSetupMetricsProvider interface for the otelsetup plugin.
type metricsProvider struct {
impl otelsetup.Setup
}
// New creates a new telemetry provider instance.
func (m metricsProvider) New(ctx context.Context, config map[string]string) (*telemetry.Provider, func() error, error) {
if ctx == nil {
return nil, nil, errors.New("context cannot be nil")
}
// Convert map[string]string to otelsetup.Config
telemetryConfig := &otelsetup.Config{
ServiceName: config["serviceName"],
ServiceVersion: config["serviceVersion"],
Environment: config["environment"],
MetricsPort: config["metricsPort"],
}
// Parse enableMetrics as boolean
if enableMetricsStr, ok := config["enableMetrics"]; ok && enableMetricsStr != "" {
enableMetrics, err := strconv.ParseBool(enableMetricsStr)
if err != nil {
log.Warnf(ctx, "Invalid enableMetrics value '%s', defaulting to true: %v", enableMetricsStr, err)
telemetryConfig.EnableMetrics = true
} else {
telemetryConfig.EnableMetrics = enableMetrics
}
} else {
telemetryConfig.EnableMetrics = true // Default to true if not specified or empty
}
// Apply defaults if fields are empty
if telemetryConfig.ServiceName == "" {
telemetryConfig.ServiceName = otelsetup.DefaultConfig().ServiceName
}
if telemetryConfig.ServiceVersion == "" {
telemetryConfig.ServiceVersion = otelsetup.DefaultConfig().ServiceVersion
}
if telemetryConfig.Environment == "" {
telemetryConfig.Environment = otelsetup.DefaultConfig().Environment
}
log.Debugf(ctx, "Telemetry config mapped: %+v", telemetryConfig)
provider, err := m.impl.New(ctx, telemetryConfig)
if err != nil {
log.Errorf(ctx, err, "Failed to create telemetry provider instance")
return nil, nil, err
}
// Wrap the Shutdown function to match the closer signature
var closer func() error
if provider != nil && provider.Shutdown != nil {
closer = func() error {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return provider.Shutdown(shutdownCtx)
}
}
log.Infof(ctx, "Telemetry provider instance created successfully")
return provider, closer, nil
}
// Provider is the exported plugin instance
var Provider = metricsProvider{}

View File

@@ -0,0 +1,296 @@
package main
import (
"context"
"testing"
"github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelsetup"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMetricsProviderNew_Success(t *testing.T) {
provider := metricsProvider{}
tests := []struct {
name string
ctx context.Context
config map[string]string
}{
{
name: "Valid config with all fields",
ctx: context.Background(),
config: map[string]string{
"serviceName": "test-service",
"serviceVersion": "1.0.0",
"enableMetrics": "true",
"environment": "test",
},
},
{
name: "Valid config with minimal fields (uses defaults)",
ctx: context.Background(),
config: map[string]string{},
},
{
name: "Valid config with enableMetrics false",
ctx: context.Background(),
config: map[string]string{
"enableMetrics": "false",
},
},
{
name: "Valid config with partial fields",
ctx: context.Background(),
config: map[string]string{
"serviceName": "custom-service",
"serviceVersion": "2.0.0",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
telemetryProvider, cleanup, err := provider.New(tt.ctx, tt.config)
require.NoError(t, err, "New() should not return error")
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
// Metrics server is started inside provider when enabled; MetricsHandler is not exposed.
if cleanup != nil {
err := cleanup()
assert.NoError(t, err, "cleanup() should not return error")
}
})
}
}
func TestMetricsProviderNew_Failure(t *testing.T) {
provider := metricsProvider{}
tests := []struct {
name string
ctx context.Context
config map[string]string
wantErr bool
}{
{
name: "Nil context",
ctx: nil,
config: map[string]string{},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
telemetryProvider, cleanup, err := provider.New(tt.ctx, tt.config)
if tt.wantErr {
assert.Error(t, err, "New() should return error for nil context")
assert.Nil(t, telemetryProvider, "New() should return nil provider on error")
assert.Nil(t, cleanup, "New() should return nil cleanup on error")
} else {
assert.NoError(t, err, "New() should not return error")
assert.NotNil(t, telemetryProvider, "New() should return non-nil provider")
}
})
}
}
func TestMetricsProviderNew_ConfigConversion(t *testing.T) {
provider := metricsProvider{}
ctx := context.Background()
tests := []struct {
name string
config map[string]string
expectedConfig *otelsetup.Config
}{
{
name: "All fields provided",
config: map[string]string{
"serviceName": "my-service",
"serviceVersion": "3.0.0",
"enableMetrics": "true",
"environment": "production",
},
expectedConfig: &otelsetup.Config{
ServiceName: "my-service",
ServiceVersion: "3.0.0",
EnableMetrics: true,
Environment: "production",
},
},
{
name: "Empty config uses defaults",
config: map[string]string{},
expectedConfig: &otelsetup.Config{
ServiceName: otelsetup.DefaultConfig().ServiceName,
ServiceVersion: otelsetup.DefaultConfig().ServiceVersion,
EnableMetrics: true, // Default when not specified
Environment: otelsetup.DefaultConfig().Environment,
},
},
{
name: "EnableMetrics false",
config: map[string]string{
"enableMetrics": "false",
},
expectedConfig: &otelsetup.Config{
ServiceName: otelsetup.DefaultConfig().ServiceName,
ServiceVersion: otelsetup.DefaultConfig().ServiceVersion,
EnableMetrics: false,
Environment: otelsetup.DefaultConfig().Environment,
},
},
{
name: "Invalid enableMetrics defaults to true",
config: map[string]string{
"enableMetrics": "invalid",
},
expectedConfig: &otelsetup.Config{
ServiceName: otelsetup.DefaultConfig().ServiceName,
ServiceVersion: otelsetup.DefaultConfig().ServiceVersion,
EnableMetrics: true, // Defaults to true on parse error
Environment: otelsetup.DefaultConfig().Environment,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
telemetryProvider, cleanup, err := provider.New(ctx, tt.config)
require.NoError(t, err, "New() should not return error")
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
if cleanup != nil {
err := cleanup()
assert.NoError(t, err, "cleanup() should not return error")
}
})
}
}
func TestMetricsProviderNew_BooleanParsing(t *testing.T) {
provider := metricsProvider{}
ctx := context.Background()
tests := []struct {
name string
enableMetrics string
expected bool
}{
{
name: "True string",
enableMetrics: "true",
expected: true,
},
{
name: "False string",
enableMetrics: "false",
expected: false,
},
{
name: "True uppercase",
enableMetrics: "TRUE",
expected: true,
},
{
name: "False uppercase",
enableMetrics: "FALSE",
expected: false,
},
{
name: "Invalid value defaults to true",
enableMetrics: "invalid",
expected: true, // Defaults to true on parse error
},
{
name: "Empty string defaults to true",
enableMetrics: "",
expected: true, // Defaults to true when not specified
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := map[string]string{
"enableMetrics": tt.enableMetrics,
}
telemetryProvider, cleanup, err := provider.New(ctx, config)
require.NoError(t, err, "New() should not return error")
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
if cleanup != nil {
err := cleanup()
assert.NoError(t, err, "cleanup() should not return error")
}
})
}
}
func TestMetricsProviderNew_CleanupFunction(t *testing.T) {
provider := metricsProvider{}
ctx := context.Background()
config := map[string]string{
"serviceName": "test-service",
"serviceVersion": "1.0.0",
"enableMetrics": "true",
"environment": "test",
}
telemetryProvider, cleanup, err := provider.New(ctx, config)
require.NoError(t, err, "New() should not return error")
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
require.NotNil(t, cleanup, "New() should return non-nil cleanup function")
// Test that cleanup can be called successfully
err = cleanup()
assert.NoError(t, err, "cleanup() should not return error")
}
func TestProviderVariable(t *testing.T) {
assert.NotNil(t, Provider, "Provider should not be nil")
// Verify Provider implements the interface correctly
ctx := context.Background()
config := map[string]string{
"serviceName": "test",
"serviceVersion": "1.0.0",
"enableMetrics": "true",
}
telemetryProvider, cleanup, err := Provider.New(ctx, config)
require.NoError(t, err, "Provider.New() should not return error")
require.NotNil(t, telemetryProvider, "Provider.New() should return non-nil provider")
if cleanup != nil {
err := cleanup()
assert.NoError(t, err, "cleanup() should not return error")
}
}
func TestMetricsProviderNew_DefaultValues(t *testing.T) {
provider := metricsProvider{}
ctx := context.Background()
// Test with completely empty config
config := map[string]string{}
telemetryProvider, cleanup, err := provider.New(ctx, config)
require.NoError(t, err, "New() should not return error with empty config")
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
if cleanup != nil {
err := cleanup()
assert.NoError(t, err, "cleanup() should not return error")
}
}

View File

@@ -0,0 +1,169 @@
package otelsetup
import (
"context"
"fmt"
"net"
"net/http"
"sync"
"time"
clientprom "github.com/prometheus/client_golang/prometheus"
clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/plugin"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
)
// Setup wires the telemetry provider. This is the concrete implementation
// behind the OtelSetupMetricsProvider interface.
type Setup struct{}
// Config represents OpenTelemetry related configuration.
type Config struct {
ServiceName string `yaml:"serviceName"`
ServiceVersion string `yaml:"serviceVersion"`
EnableMetrics bool `yaml:"enableMetrics"`
Environment string `yaml:"environment"`
MetricsPort string `yaml:"metricsPort"`
}
// DefaultConfig returns sensible defaults for telemetry configuration.
func DefaultConfig() *Config {
return &Config{
ServiceName: "beckn-onix",
ServiceVersion: "dev",
EnableMetrics: true,
Environment: "development",
MetricsPort: "9090",
}
}
// ToPluginConfig converts Config to plugin.Config format.
func ToPluginConfig(cfg *Config) *plugin.Config {
return &plugin.Config{
ID: "otelsetup",
Config: map[string]string{
"serviceName": cfg.ServiceName,
"serviceVersion": cfg.ServiceVersion,
"enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics),
"environment": cfg.Environment,
"metricsPort": cfg.MetricsPort,
},
}
}
// New initializes the underlying telemetry provider. The returned provider
// exposes the HTTP handler and shutdown hooks that the core application can
// manage directly.
func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) {
if cfg == nil {
return nil, fmt.Errorf("telemetry config cannot be nil")
}
// Apply defaults if fields are empty
if cfg.ServiceName == "" {
cfg.ServiceName = DefaultConfig().ServiceName
}
if cfg.ServiceVersion == "" {
cfg.ServiceVersion = DefaultConfig().ServiceVersion
}
if cfg.Environment == "" {
cfg.Environment = DefaultConfig().Environment
}
if cfg.MetricsPort == "" {
cfg.MetricsPort = DefaultConfig().MetricsPort
}
if !cfg.EnableMetrics {
log.Info(ctx, "OpenTelemetry metrics disabled")
return &telemetry.Provider{
Shutdown: func(context.Context) error { return nil },
}, nil
}
res, err := resource.New(
ctx,
resource.WithAttributes(
attribute.String("service.name", cfg.ServiceName),
attribute.String("service.version", cfg.ServiceVersion),
attribute.String("deployment.environment", cfg.Environment),
),
)
if err != nil {
return nil, fmt.Errorf("failed to create telemetry resource: %w", err)
}
registry := clientprom.NewRegistry()
exporter, err := otelprom.New(
otelprom.WithRegisterer(registry),
otelprom.WithoutUnits(),
otelprom.WithoutScopeInfo(),
)
if err != nil {
return nil, fmt.Errorf("failed to create prometheus exporter: %w", err)
}
meterProvider := metric.NewMeterProvider(
metric.WithReader(exporter),
metric.WithResource(res),
)
otel.SetMeterProvider(meterProvider)
log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s",
cfg.ServiceName, cfg.ServiceVersion, cfg.Environment)
if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(0)); err != nil {
log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err)
}
// Create metrics handler
metricsHandler := clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{})
// Create and start metrics HTTP server
metricsMux := http.NewServeMux()
metricsMux.Handle("/metrics", metricsHandler)
metricsServer := &http.Server{
Addr: net.JoinHostPort("", cfg.MetricsPort),
Handler: metricsMux,
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
IdleTimeout: 30 * time.Second,
}
var serverWg sync.WaitGroup
serverWg.Add(1)
go func() {
defer serverWg.Done()
log.Infof(ctx, "Metrics server listening on %s", metricsServer.Addr)
if err := metricsServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Errorf(ctx, fmt.Errorf("metrics server ListenAndServe: %w", err), "error listening and serving metrics")
}
}()
return &telemetry.Provider{
MeterProvider: meterProvider,
MetricsHandler: metricsHandler,
Shutdown: func(shutdownCtx context.Context) error {
log.Infof(ctx, "Shutting down metrics server...")
// Shutdown the metrics server
serverShutdownCtx, cancel := context.WithTimeout(shutdownCtx, 10*time.Second)
defer cancel()
if err := metricsServer.Shutdown(serverShutdownCtx); err != nil {
log.Errorf(ctx, fmt.Errorf("metrics server shutdown: %w", err), "error shutting down metrics server")
}
serverWg.Wait()
// Shutdown the meter provider
return meterProvider.Shutdown(shutdownCtx)
},
}, nil
}

View File

@@ -0,0 +1,259 @@
package otelsetup
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSetup_New_Success(t *testing.T) {
setup := Setup{}
ctx := context.Background()
tests := []struct {
name string
cfg *Config
}{
{
name: "Valid config with all fields",
cfg: &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
},
},
{
name: "Valid config with metrics disabled",
cfg: &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: false,
Environment: "test",
},
},
{
name: "Config with empty fields uses defaults",
cfg: &Config{
ServiceName: "",
ServiceVersion: "",
EnableMetrics: true,
Environment: "",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
provider, err := setup.New(ctx, tt.cfg)
require.NoError(t, err, "New() should not return error")
require.NotNil(t, provider, "New() should return non-nil provider")
require.NotNil(t, provider.Shutdown, "Provider should have shutdown function")
if tt.cfg.EnableMetrics {
assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set when metrics enabled")
}
// Test shutdown
err = provider.Shutdown(ctx)
assert.NoError(t, err, "Shutdown should not return error")
})
}
}
func TestSetup_New_Failure(t *testing.T) {
setup := Setup{}
ctx := context.Background()
tests := []struct {
name string
cfg *Config
wantErr bool
}{
{
name: "Nil config",
cfg: nil,
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
provider, err := setup.New(ctx, tt.cfg)
if tt.wantErr {
assert.Error(t, err, "New() should return error")
assert.Nil(t, provider, "New() should return nil provider on error")
} else {
assert.NoError(t, err, "New() should not return error")
assert.NotNil(t, provider, "New() should return non-nil provider")
}
})
}
}
func TestSetup_New_DefaultValues(t *testing.T) {
setup := Setup{}
ctx := context.Background()
// Test with empty fields - should use defaults
cfg := &Config{
ServiceName: "",
ServiceVersion: "",
EnableMetrics: true,
Environment: "",
}
provider, err := setup.New(ctx, cfg)
require.NoError(t, err)
require.NotNil(t, provider)
// Verify defaults are applied by checking that provider is functional
assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set with defaults")
// Cleanup
err = provider.Shutdown(ctx)
assert.NoError(t, err)
}
func TestSetup_New_MetricsDisabled(t *testing.T) {
setup := Setup{}
ctx := context.Background()
cfg := &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: false,
Environment: "test",
}
provider, err := setup.New(ctx, cfg)
require.NoError(t, err)
require.NotNil(t, provider)
// When metrics are disabled, MetricsHandler should be nil and MeterProvider should be nil
assert.Nil(t, provider.MeterProvider, "MeterProvider should be nil when metrics disabled")
// Shutdown should still work
err = provider.Shutdown(ctx)
assert.NoError(t, err, "Shutdown should work even when metrics disabled")
}
func TestToPluginConfig_Success(t *testing.T) {
tests := []struct {
name string
cfg *Config
expectedID string
expectedConfig map[string]string
}{
{
name: "Valid config with all fields",
cfg: &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
},
expectedID: "otelsetup",
expectedConfig: map[string]string{
"serviceName": "test-service",
"serviceVersion": "1.0.0",
"enableMetrics": "true",
"environment": "test",
"metricsPort": "",
},
},
{
name: "Config with enableMetrics false",
cfg: &Config{
ServiceName: "my-service",
ServiceVersion: "2.0.0",
EnableMetrics: false,
Environment: "production",
},
expectedID: "otelsetup",
expectedConfig: map[string]string{
"serviceName": "my-service",
"serviceVersion": "2.0.0",
"enableMetrics": "false",
"environment": "production",
"metricsPort": "",
},
},
{
name: "Config with empty fields",
cfg: &Config{
ServiceName: "",
ServiceVersion: "",
EnableMetrics: true,
Environment: "",
},
expectedID: "otelsetup",
expectedConfig: map[string]string{
"serviceName": "",
"serviceVersion": "",
"enableMetrics": "true",
"environment": "",
"metricsPort": "",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := ToPluginConfig(tt.cfg)
require.NotNil(t, result, "ToPluginConfig should return non-nil config")
assert.Equal(t, tt.expectedID, result.ID, "Plugin ID should be 'otelsetup'")
assert.Equal(t, tt.expectedConfig, result.Config, "Config map should match expected values")
})
}
}
func TestToPluginConfig_NilConfig(t *testing.T) {
// Test that ToPluginConfig handles nil config
// Note: This will panic if nil is passed, which is acceptable behavior
// as the function expects a valid config. In practice, callers should check for nil.
assert.Panics(t, func() {
ToPluginConfig(nil)
}, "ToPluginConfig should panic when given nil config")
}
func TestToPluginConfig_BooleanConversion(t *testing.T) {
tests := []struct {
name string
enableMetrics bool
expected string
}{
{
name: "EnableMetrics true",
enableMetrics: true,
expected: "true",
},
{
name: "EnableMetrics false",
enableMetrics: false,
expected: "false",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := &Config{
ServiceName: "test",
ServiceVersion: "1.0.0",
EnableMetrics: tt.enableMetrics,
Environment: "test",
MetricsPort: "",
}
result := ToPluginConfig(cfg)
require.NotNil(t, result)
assert.Equal(t, tt.expected, result.Config["enableMetrics"], "enableMetrics should be converted to string correctly")
assert.Equal(t, "", result.Config["metricsPort"], "metricsPort should be included even when empty")
})
}
}

View File

@@ -683,7 +683,6 @@ func TestExcludeActionWithNonURLTargetTypes(t *testing.T) {
}
}
// TestV2RouteSuccess tests v2 routing with domain-agnostic behavior
func TestV2RouteSuccess(t *testing.T) {
ctx := context.Background()

View File

@@ -15,6 +15,7 @@ import (
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/plugin/definition"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
)
type onixPlugin interface {
@@ -196,6 +197,33 @@ func (m *Manager) Middleware(ctx context.Context, cfg *Config) (func(http.Handle
return mwp.New(ctx, cfg.Config)
}
// OtelSetup initializes OpenTelemetry via a dedicated plugin. The plugin is
// expected to return a telemetry Provider that the core application can use for
// instrumentation.
func (m *Manager) OtelSetup(ctx context.Context, cfg *Config) (*telemetry.Provider, error) {
if cfg == nil {
log.Info(ctx, "Telemetry config not provided; skipping OpenTelemetry setup")
return nil, nil
}
otp, err := provider[definition.OtelSetupMetricsProvider](m.plugins, cfg.ID)
if err != nil {
return nil, fmt.Errorf("failed to load provider for %s: %w", cfg.ID, err)
}
provider, closer, err := otp.New(ctx, cfg.Config)
if err != nil {
return nil, err
}
if closer != nil {
m.closers = append(m.closers, func() {
if err := closer(); err != nil {
log.Errorf(context.Background(), err, "Failed to shutdown telemetry provider")
}
})
}
return provider, nil
}
// TransportWrapper returns a TransportWrapper instance based on the provided configuration.
func (m *Manager) TransportWrapper(ctx context.Context, cfg *Config) (definition.TransportWrapper, error) {
twp, err := provider[definition.TransportWrapperProvider](m.plugins, cfg.ID)

View File

@@ -0,0 +1,28 @@
package telemetry
import (
"context"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/require"
)
func TestNewProviderAndMetrics(t *testing.T) {
ctx := context.Background()
provider, err := NewTestProvider(ctx)
require.NoError(t, err)
require.NotNil(t, provider)
require.NotNil(t, provider.MetricsHandler)
metrics, err := GetMetrics(ctx)
require.NoError(t, err)
require.NotNil(t, metrics)
rec := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
provider.MetricsHandler.ServeHTTP(rec, req)
require.Equal(t, 200, rec.Code)
require.NoError(t, provider.Shutdown(context.Background()))
}

View File

@@ -0,0 +1,86 @@
package telemetry
import (
"context"
"fmt"
"sync"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
// Metrics exposes strongly typed metric instruments used across the adapter.
// Note: Most metrics have been moved to their respective modules. Only plugin-level
// metrics remain here. See:
// - OTel setup: pkg/plugin/implementation/otelsetup
// - Step metrics: core/module/handler/step_metrics.go
// - Cache metrics: pkg/plugin/implementation/cache/cache_metrics.go
// - Handler metrics: core/module/handler/handlerMetrics.go
type Metrics struct {
PluginExecutionDuration metric.Float64Histogram
PluginErrorsTotal metric.Int64Counter
}
var (
metricsInstance *Metrics
metricsOnce sync.Once
metricsErr error
)
// Attribute keys shared across instruments.
var (
AttrModule = attribute.Key("module")
AttrSubsystem = attribute.Key("subsystem")
AttrName = attribute.Key("name")
AttrStep = attribute.Key("step")
AttrRole = attribute.Key("role")
AttrAction = attribute.Key("action")
AttrHTTPMethod = attribute.Key("http_method")
AttrHTTPStatus = attribute.Key("http_status_code")
AttrStatus = attribute.Key("status")
AttrErrorType = attribute.Key("error_type")
AttrPluginID = attribute.Key("plugin_id")
AttrPluginType = attribute.Key("plugin_type")
AttrOperation = attribute.Key("operation")
AttrRouteType = attribute.Key("route_type")
AttrTargetType = attribute.Key("target_type")
AttrSchemaVersion = attribute.Key("schema_version")
)
// GetMetrics lazily initializes instruments and returns a cached reference.
func GetMetrics(ctx context.Context) (*Metrics, error) {
metricsOnce.Do(func() {
metricsInstance, metricsErr = newMetrics()
})
return metricsInstance, metricsErr
}
func newMetrics() (*Metrics, error) {
meter := otel.GetMeterProvider().Meter(
"github.com/beckn-one/beckn-onix/telemetry",
metric.WithInstrumentationVersion("1.0.0"),
)
m := &Metrics{}
var err error
if m.PluginExecutionDuration, err = meter.Float64Histogram(
"onix_plugin_execution_duration_seconds",
metric.WithDescription("Plugin execution time"),
metric.WithUnit("s"),
metric.WithExplicitBucketBoundaries(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1),
); err != nil {
return nil, fmt.Errorf("onix_plugin_execution_duration_seconds: %w", err)
}
if m.PluginErrorsTotal, err = meter.Int64Counter(
"onix_plugin_errors_total",
metric.WithDescription("Plugin level errors"),
metric.WithUnit("{error}"),
); err != nil {
return nil, fmt.Errorf("onix_plugin_errors_total: %w", err)
}
return m, nil
}

View File

@@ -0,0 +1,15 @@
package telemetry
import (
"context"
"net/http"
"go.opentelemetry.io/otel/sdk/metric"
)
// Provider holds references to telemetry components that need coordinated shutdown.
type Provider struct {
MeterProvider *metric.MeterProvider
MetricsHandler http.Handler
Shutdown func(context.Context) error
}

View File

@@ -0,0 +1,54 @@
package telemetry
import (
"context"
clientprom "github.com/prometheus/client_golang/prometheus"
clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
)
// NewTestProvider creates a minimal telemetry provider for testing purposes.
// This avoids import cycles by not depending on the otelsetup package.
func NewTestProvider(ctx context.Context) (*Provider, error) {
res, err := resource.New(
ctx,
resource.WithAttributes(
attribute.String("service.name", "test-service"),
attribute.String("service.version", "test"),
attribute.String("deployment.environment", "test"),
),
)
if err != nil {
return nil, err
}
registry := clientprom.NewRegistry()
exporter, err := otelprom.New(
otelprom.WithRegisterer(registry),
otelprom.WithoutUnits(),
otelprom.WithoutScopeInfo(),
)
if err != nil {
return nil, err
}
meterProvider := metric.NewMeterProvider(
metric.WithReader(exporter),
metric.WithResource(res),
)
otel.SetMeterProvider(meterProvider)
return &Provider{
MeterProvider: meterProvider,
MetricsHandler: clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}),
Shutdown: func(ctx context.Context) error {
return meterProvider.Shutdown(ctx)
},
}, nil
}