Merge pull request #568 from Beckn-One/feat/observability
Feat/observability
This commit is contained in:
15
pkg/plugin/definition/metrics.go
Normal file
15
pkg/plugin/definition/metrics.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package definition
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/telemetry"
|
||||
)
|
||||
|
||||
// OtelSetupMetricsProvider encapsulates initialization of OpenTelemetry metrics
|
||||
// providers. Implementations wire exporters and return a Provider that the core
|
||||
// application can manage.
|
||||
type OtelSetupMetricsProvider interface {
|
||||
// New initializes a new telemetry provider instance with the given configuration.
|
||||
New(ctx context.Context, config map[string]string) (*telemetry.Provider, func() error, error)
|
||||
}
|
||||
70
pkg/plugin/implementation/cache/cache.go
vendored
70
pkg/plugin/implementation/cache/cache.go
vendored
@@ -7,7 +7,12 @@ import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/telemetry"
|
||||
"github.com/redis/go-redis/extra/redisotel/v9"
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
@@ -31,7 +36,8 @@ type Config struct {
|
||||
|
||||
// Cache wraps a Redis client to provide basic caching operations.
|
||||
type Cache struct {
|
||||
Client RedisClient
|
||||
Client RedisClient
|
||||
metrics *CacheMetrics
|
||||
}
|
||||
|
||||
// Error variables to describe common failure modes.
|
||||
@@ -77,26 +83,80 @@ func New(ctx context.Context, cfg *Config) (*Cache, func() error, error) {
|
||||
return nil, nil, fmt.Errorf("%w: %v", ErrConnectionFail, err)
|
||||
}
|
||||
|
||||
// Enable OpenTelemetry instrumentation for tracing and metrics
|
||||
// This will automatically collect Redis operation metrics and expose them via /metrics endpoint
|
||||
if redisClient, ok := client.(*redis.Client); ok {
|
||||
if err := redisotel.InstrumentTracing(redisClient); err != nil {
|
||||
// Log error but don't fail - instrumentation is optional
|
||||
log.Debugf(ctx, "Failed to instrument Redis tracing: %v", err)
|
||||
}
|
||||
|
||||
if err := redisotel.InstrumentMetrics(redisClient); err != nil {
|
||||
// Log error but don't fail - instrumentation is optional
|
||||
log.Debugf(ctx, "Failed to instrument Redis metrics: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
metrics, _ := GetCacheMetrics(ctx)
|
||||
|
||||
log.Infof(ctx, "Cache connection to Redis established successfully")
|
||||
return &Cache{Client: client}, client.Close, nil
|
||||
return &Cache{Client: client, metrics: metrics}, client.Close, nil
|
||||
}
|
||||
|
||||
// Get retrieves the value for the specified key from Redis.
|
||||
func (c *Cache) Get(ctx context.Context, key string) (string, error) {
|
||||
return c.Client.Get(ctx, key).Result()
|
||||
result, err := c.Client.Get(ctx, key).Result()
|
||||
if c.metrics != nil {
|
||||
attrs := []attribute.KeyValue{
|
||||
telemetry.AttrOperation.String("get"),
|
||||
}
|
||||
switch {
|
||||
case err == redis.Nil:
|
||||
c.metrics.CacheMissesTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("miss"))...))
|
||||
case err != nil:
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("error"))...))
|
||||
default:
|
||||
c.metrics.CacheHitsTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("hit"))...))
|
||||
}
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
|
||||
// Set stores the given key-value pair in Redis with the specified TTL (time to live).
|
||||
func (c *Cache) Set(ctx context.Context, key, value string, ttl time.Duration) error {
|
||||
return c.Client.Set(ctx, key, value, ttl).Err()
|
||||
err := c.Client.Set(ctx, key, value, ttl).Err()
|
||||
c.recordOperation(ctx, "set", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Delete removes the specified key from Redis.
|
||||
func (c *Cache) Delete(ctx context.Context, key string) error {
|
||||
return c.Client.Del(ctx, key).Err()
|
||||
err := c.Client.Del(ctx, key).Err()
|
||||
c.recordOperation(ctx, "delete", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Clear removes all keys in the currently selected Redis database.
|
||||
func (c *Cache) Clear(ctx context.Context) error {
|
||||
return c.Client.FlushDB(ctx).Err()
|
||||
}
|
||||
|
||||
func (c *Cache) recordOperation(ctx context.Context, op string, err error) {
|
||||
if c.metrics == nil {
|
||||
return
|
||||
}
|
||||
status := "success"
|
||||
if err != nil {
|
||||
status = "error"
|
||||
}
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(
|
||||
telemetry.AttrOperation.String(op),
|
||||
telemetry.AttrStatus.String(status),
|
||||
))
|
||||
}
|
||||
|
||||
69
pkg/plugin/implementation/cache/cache_metrics.go
vendored
Normal file
69
pkg/plugin/implementation/cache/cache_metrics.go
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
)
|
||||
|
||||
// CacheMetrics exposes cache-related metric instruments.
|
||||
type CacheMetrics struct {
|
||||
CacheOperationsTotal metric.Int64Counter
|
||||
CacheHitsTotal metric.Int64Counter
|
||||
CacheMissesTotal metric.Int64Counter
|
||||
}
|
||||
|
||||
var (
|
||||
cacheMetricsInstance *CacheMetrics
|
||||
cacheMetricsOnce sync.Once
|
||||
cacheMetricsErr error
|
||||
)
|
||||
|
||||
// GetCacheMetrics lazily initializes cache metric instruments and returns a cached reference.
|
||||
func GetCacheMetrics(ctx context.Context) (*CacheMetrics, error) {
|
||||
cacheMetricsOnce.Do(func() {
|
||||
cacheMetricsInstance, cacheMetricsErr = newCacheMetrics()
|
||||
})
|
||||
return cacheMetricsInstance, cacheMetricsErr
|
||||
}
|
||||
|
||||
func newCacheMetrics() (*CacheMetrics, error) {
|
||||
meter := otel.GetMeterProvider().Meter(
|
||||
"github.com/beckn-one/beckn-onix/cache",
|
||||
metric.WithInstrumentationVersion("1.0.0"),
|
||||
)
|
||||
|
||||
m := &CacheMetrics{}
|
||||
var err error
|
||||
|
||||
if m.CacheOperationsTotal, err = meter.Int64Counter(
|
||||
"onix_cache_operations_total",
|
||||
metric.WithDescription("Redis cache operations"),
|
||||
metric.WithUnit("{operation}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_operations_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheHitsTotal, err = meter.Int64Counter(
|
||||
"onix_cache_hits_total",
|
||||
metric.WithDescription("Redis cache hits"),
|
||||
metric.WithUnit("{hit}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_hits_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheMissesTotal, err = meter.Int64Counter(
|
||||
"onix_cache_misses_total",
|
||||
metric.WithDescription("Redis cache misses"),
|
||||
metric.WithUnit("{miss}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_misses_total: %w", err)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
|
||||
79
pkg/plugin/implementation/otelsetup/cmd/plugin.go
Normal file
79
pkg/plugin/implementation/otelsetup/cmd/plugin.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelsetup"
|
||||
"github.com/beckn-one/beckn-onix/pkg/telemetry"
|
||||
)
|
||||
|
||||
// metricsProvider implements the OtelSetupMetricsProvider interface for the otelsetup plugin.
|
||||
type metricsProvider struct {
|
||||
impl otelsetup.Setup
|
||||
}
|
||||
|
||||
// New creates a new telemetry provider instance.
|
||||
func (m metricsProvider) New(ctx context.Context, config map[string]string) (*telemetry.Provider, func() error, error) {
|
||||
if ctx == nil {
|
||||
return nil, nil, errors.New("context cannot be nil")
|
||||
}
|
||||
|
||||
// Convert map[string]string to otelsetup.Config
|
||||
telemetryConfig := &otelsetup.Config{
|
||||
ServiceName: config["serviceName"],
|
||||
ServiceVersion: config["serviceVersion"],
|
||||
Environment: config["environment"],
|
||||
MetricsPort: config["metricsPort"],
|
||||
}
|
||||
|
||||
// Parse enableMetrics as boolean
|
||||
if enableMetricsStr, ok := config["enableMetrics"]; ok && enableMetricsStr != "" {
|
||||
enableMetrics, err := strconv.ParseBool(enableMetricsStr)
|
||||
if err != nil {
|
||||
log.Warnf(ctx, "Invalid enableMetrics value '%s', defaulting to true: %v", enableMetricsStr, err)
|
||||
telemetryConfig.EnableMetrics = true
|
||||
} else {
|
||||
telemetryConfig.EnableMetrics = enableMetrics
|
||||
}
|
||||
} else {
|
||||
telemetryConfig.EnableMetrics = true // Default to true if not specified or empty
|
||||
}
|
||||
|
||||
// Apply defaults if fields are empty
|
||||
if telemetryConfig.ServiceName == "" {
|
||||
telemetryConfig.ServiceName = otelsetup.DefaultConfig().ServiceName
|
||||
}
|
||||
if telemetryConfig.ServiceVersion == "" {
|
||||
telemetryConfig.ServiceVersion = otelsetup.DefaultConfig().ServiceVersion
|
||||
}
|
||||
if telemetryConfig.Environment == "" {
|
||||
telemetryConfig.Environment = otelsetup.DefaultConfig().Environment
|
||||
}
|
||||
|
||||
log.Debugf(ctx, "Telemetry config mapped: %+v", telemetryConfig)
|
||||
provider, err := m.impl.New(ctx, telemetryConfig)
|
||||
if err != nil {
|
||||
log.Errorf(ctx, err, "Failed to create telemetry provider instance")
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Wrap the Shutdown function to match the closer signature
|
||||
var closer func() error
|
||||
if provider != nil && provider.Shutdown != nil {
|
||||
closer = func() error {
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
return provider.Shutdown(shutdownCtx)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof(ctx, "Telemetry provider instance created successfully")
|
||||
return provider, closer, nil
|
||||
}
|
||||
|
||||
// Provider is the exported plugin instance
|
||||
var Provider = metricsProvider{}
|
||||
296
pkg/plugin/implementation/otelsetup/cmd/plugin_test.go
Normal file
296
pkg/plugin/implementation/otelsetup/cmd/plugin_test.go
Normal file
@@ -0,0 +1,296 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelsetup"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMetricsProviderNew_Success(t *testing.T) {
|
||||
provider := metricsProvider{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
ctx context.Context
|
||||
config map[string]string
|
||||
}{
|
||||
{
|
||||
name: "Valid config with all fields",
|
||||
ctx: context.Background(),
|
||||
config: map[string]string{
|
||||
"serviceName": "test-service",
|
||||
"serviceVersion": "1.0.0",
|
||||
"enableMetrics": "true",
|
||||
"environment": "test",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Valid config with minimal fields (uses defaults)",
|
||||
ctx: context.Background(),
|
||||
config: map[string]string{},
|
||||
},
|
||||
{
|
||||
name: "Valid config with enableMetrics false",
|
||||
ctx: context.Background(),
|
||||
config: map[string]string{
|
||||
"enableMetrics": "false",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Valid config with partial fields",
|
||||
ctx: context.Background(),
|
||||
config: map[string]string{
|
||||
"serviceName": "custom-service",
|
||||
"serviceVersion": "2.0.0",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
telemetryProvider, cleanup, err := provider.New(tt.ctx, tt.config)
|
||||
|
||||
require.NoError(t, err, "New() should not return error")
|
||||
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
|
||||
|
||||
// Metrics server is started inside provider when enabled; MetricsHandler is not exposed.
|
||||
if cleanup != nil {
|
||||
err := cleanup()
|
||||
assert.NoError(t, err, "cleanup() should not return error")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsProviderNew_Failure(t *testing.T) {
|
||||
provider := metricsProvider{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
ctx context.Context
|
||||
config map[string]string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "Nil context",
|
||||
ctx: nil,
|
||||
config: map[string]string{},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
telemetryProvider, cleanup, err := provider.New(tt.ctx, tt.config)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err, "New() should return error for nil context")
|
||||
assert.Nil(t, telemetryProvider, "New() should return nil provider on error")
|
||||
assert.Nil(t, cleanup, "New() should return nil cleanup on error")
|
||||
} else {
|
||||
assert.NoError(t, err, "New() should not return error")
|
||||
assert.NotNil(t, telemetryProvider, "New() should return non-nil provider")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsProviderNew_ConfigConversion(t *testing.T) {
|
||||
provider := metricsProvider{}
|
||||
ctx := context.Background()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
config map[string]string
|
||||
expectedConfig *otelsetup.Config
|
||||
}{
|
||||
{
|
||||
name: "All fields provided",
|
||||
config: map[string]string{
|
||||
"serviceName": "my-service",
|
||||
"serviceVersion": "3.0.0",
|
||||
"enableMetrics": "true",
|
||||
"environment": "production",
|
||||
},
|
||||
expectedConfig: &otelsetup.Config{
|
||||
ServiceName: "my-service",
|
||||
ServiceVersion: "3.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "production",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Empty config uses defaults",
|
||||
config: map[string]string{},
|
||||
expectedConfig: &otelsetup.Config{
|
||||
ServiceName: otelsetup.DefaultConfig().ServiceName,
|
||||
ServiceVersion: otelsetup.DefaultConfig().ServiceVersion,
|
||||
EnableMetrics: true, // Default when not specified
|
||||
Environment: otelsetup.DefaultConfig().Environment,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "EnableMetrics false",
|
||||
config: map[string]string{
|
||||
"enableMetrics": "false",
|
||||
},
|
||||
expectedConfig: &otelsetup.Config{
|
||||
ServiceName: otelsetup.DefaultConfig().ServiceName,
|
||||
ServiceVersion: otelsetup.DefaultConfig().ServiceVersion,
|
||||
EnableMetrics: false,
|
||||
Environment: otelsetup.DefaultConfig().Environment,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Invalid enableMetrics defaults to true",
|
||||
config: map[string]string{
|
||||
"enableMetrics": "invalid",
|
||||
},
|
||||
expectedConfig: &otelsetup.Config{
|
||||
ServiceName: otelsetup.DefaultConfig().ServiceName,
|
||||
ServiceVersion: otelsetup.DefaultConfig().ServiceVersion,
|
||||
EnableMetrics: true, // Defaults to true on parse error
|
||||
Environment: otelsetup.DefaultConfig().Environment,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
telemetryProvider, cleanup, err := provider.New(ctx, tt.config)
|
||||
|
||||
require.NoError(t, err, "New() should not return error")
|
||||
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
|
||||
|
||||
if cleanup != nil {
|
||||
err := cleanup()
|
||||
assert.NoError(t, err, "cleanup() should not return error")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsProviderNew_BooleanParsing(t *testing.T) {
|
||||
provider := metricsProvider{}
|
||||
ctx := context.Background()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
enableMetrics string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "True string",
|
||||
enableMetrics: "true",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "False string",
|
||||
enableMetrics: "false",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "True uppercase",
|
||||
enableMetrics: "TRUE",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "False uppercase",
|
||||
enableMetrics: "FALSE",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "Invalid value defaults to true",
|
||||
enableMetrics: "invalid",
|
||||
expected: true, // Defaults to true on parse error
|
||||
},
|
||||
{
|
||||
name: "Empty string defaults to true",
|
||||
enableMetrics: "",
|
||||
expected: true, // Defaults to true when not specified
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
config := map[string]string{
|
||||
"enableMetrics": tt.enableMetrics,
|
||||
}
|
||||
|
||||
telemetryProvider, cleanup, err := provider.New(ctx, config)
|
||||
|
||||
require.NoError(t, err, "New() should not return error")
|
||||
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
|
||||
|
||||
if cleanup != nil {
|
||||
err := cleanup()
|
||||
assert.NoError(t, err, "cleanup() should not return error")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsProviderNew_CleanupFunction(t *testing.T) {
|
||||
provider := metricsProvider{}
|
||||
ctx := context.Background()
|
||||
|
||||
config := map[string]string{
|
||||
"serviceName": "test-service",
|
||||
"serviceVersion": "1.0.0",
|
||||
"enableMetrics": "true",
|
||||
"environment": "test",
|
||||
}
|
||||
|
||||
telemetryProvider, cleanup, err := provider.New(ctx, config)
|
||||
|
||||
require.NoError(t, err, "New() should not return error")
|
||||
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
|
||||
require.NotNil(t, cleanup, "New() should return non-nil cleanup function")
|
||||
|
||||
// Test that cleanup can be called successfully
|
||||
err = cleanup()
|
||||
assert.NoError(t, err, "cleanup() should not return error")
|
||||
}
|
||||
|
||||
func TestProviderVariable(t *testing.T) {
|
||||
assert.NotNil(t, Provider, "Provider should not be nil")
|
||||
|
||||
// Verify Provider implements the interface correctly
|
||||
ctx := context.Background()
|
||||
config := map[string]string{
|
||||
"serviceName": "test",
|
||||
"serviceVersion": "1.0.0",
|
||||
"enableMetrics": "true",
|
||||
}
|
||||
|
||||
telemetryProvider, cleanup, err := Provider.New(ctx, config)
|
||||
|
||||
require.NoError(t, err, "Provider.New() should not return error")
|
||||
require.NotNil(t, telemetryProvider, "Provider.New() should return non-nil provider")
|
||||
|
||||
if cleanup != nil {
|
||||
err := cleanup()
|
||||
assert.NoError(t, err, "cleanup() should not return error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsProviderNew_DefaultValues(t *testing.T) {
|
||||
provider := metricsProvider{}
|
||||
ctx := context.Background()
|
||||
|
||||
// Test with completely empty config
|
||||
config := map[string]string{}
|
||||
|
||||
telemetryProvider, cleanup, err := provider.New(ctx, config)
|
||||
|
||||
require.NoError(t, err, "New() should not return error with empty config")
|
||||
require.NotNil(t, telemetryProvider, "New() should return non-nil provider")
|
||||
|
||||
if cleanup != nil {
|
||||
err := cleanup()
|
||||
assert.NoError(t, err, "cleanup() should not return error")
|
||||
}
|
||||
}
|
||||
169
pkg/plugin/implementation/otelsetup/otelsetup.go
Normal file
169
pkg/plugin/implementation/otelsetup/otelsetup.go
Normal file
@@ -0,0 +1,169 @@
|
||||
package otelsetup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
clientprom "github.com/prometheus/client_golang/prometheus"
|
||||
clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/plugin"
|
||||
"github.com/beckn-one/beckn-onix/pkg/telemetry"
|
||||
)
|
||||
|
||||
// Setup wires the telemetry provider. This is the concrete implementation
|
||||
// behind the OtelSetupMetricsProvider interface.
|
||||
type Setup struct{}
|
||||
|
||||
// Config represents OpenTelemetry related configuration.
|
||||
type Config struct {
|
||||
ServiceName string `yaml:"serviceName"`
|
||||
ServiceVersion string `yaml:"serviceVersion"`
|
||||
EnableMetrics bool `yaml:"enableMetrics"`
|
||||
Environment string `yaml:"environment"`
|
||||
MetricsPort string `yaml:"metricsPort"`
|
||||
}
|
||||
|
||||
// DefaultConfig returns sensible defaults for telemetry configuration.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
ServiceName: "beckn-onix",
|
||||
ServiceVersion: "dev",
|
||||
EnableMetrics: true,
|
||||
Environment: "development",
|
||||
MetricsPort: "9090",
|
||||
}
|
||||
}
|
||||
|
||||
// ToPluginConfig converts Config to plugin.Config format.
|
||||
func ToPluginConfig(cfg *Config) *plugin.Config {
|
||||
return &plugin.Config{
|
||||
ID: "otelsetup",
|
||||
Config: map[string]string{
|
||||
"serviceName": cfg.ServiceName,
|
||||
"serviceVersion": cfg.ServiceVersion,
|
||||
"enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics),
|
||||
"environment": cfg.Environment,
|
||||
"metricsPort": cfg.MetricsPort,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// New initializes the underlying telemetry provider. The returned provider
|
||||
// exposes the HTTP handler and shutdown hooks that the core application can
|
||||
// manage directly.
|
||||
func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) {
|
||||
if cfg == nil {
|
||||
return nil, fmt.Errorf("telemetry config cannot be nil")
|
||||
}
|
||||
|
||||
// Apply defaults if fields are empty
|
||||
if cfg.ServiceName == "" {
|
||||
cfg.ServiceName = DefaultConfig().ServiceName
|
||||
}
|
||||
if cfg.ServiceVersion == "" {
|
||||
cfg.ServiceVersion = DefaultConfig().ServiceVersion
|
||||
}
|
||||
if cfg.Environment == "" {
|
||||
cfg.Environment = DefaultConfig().Environment
|
||||
}
|
||||
if cfg.MetricsPort == "" {
|
||||
cfg.MetricsPort = DefaultConfig().MetricsPort
|
||||
}
|
||||
|
||||
if !cfg.EnableMetrics {
|
||||
log.Info(ctx, "OpenTelemetry metrics disabled")
|
||||
return &telemetry.Provider{
|
||||
Shutdown: func(context.Context) error { return nil },
|
||||
}, nil
|
||||
}
|
||||
|
||||
res, err := resource.New(
|
||||
ctx,
|
||||
resource.WithAttributes(
|
||||
attribute.String("service.name", cfg.ServiceName),
|
||||
attribute.String("service.version", cfg.ServiceVersion),
|
||||
attribute.String("deployment.environment", cfg.Environment),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create telemetry resource: %w", err)
|
||||
}
|
||||
|
||||
registry := clientprom.NewRegistry()
|
||||
|
||||
exporter, err := otelprom.New(
|
||||
otelprom.WithRegisterer(registry),
|
||||
otelprom.WithoutUnits(),
|
||||
otelprom.WithoutScopeInfo(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create prometheus exporter: %w", err)
|
||||
}
|
||||
|
||||
meterProvider := metric.NewMeterProvider(
|
||||
metric.WithReader(exporter),
|
||||
metric.WithResource(res),
|
||||
)
|
||||
|
||||
otel.SetMeterProvider(meterProvider)
|
||||
log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s",
|
||||
cfg.ServiceName, cfg.ServiceVersion, cfg.Environment)
|
||||
|
||||
if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(0)); err != nil {
|
||||
log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err)
|
||||
}
|
||||
|
||||
// Create metrics handler
|
||||
metricsHandler := clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{})
|
||||
|
||||
// Create and start metrics HTTP server
|
||||
metricsMux := http.NewServeMux()
|
||||
metricsMux.Handle("/metrics", metricsHandler)
|
||||
|
||||
metricsServer := &http.Server{
|
||||
Addr: net.JoinHostPort("", cfg.MetricsPort),
|
||||
Handler: metricsMux,
|
||||
ReadTimeout: 10 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
IdleTimeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
var serverWg sync.WaitGroup
|
||||
serverWg.Add(1)
|
||||
go func() {
|
||||
defer serverWg.Done()
|
||||
log.Infof(ctx, "Metrics server listening on %s", metricsServer.Addr)
|
||||
if err := metricsServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
log.Errorf(ctx, fmt.Errorf("metrics server ListenAndServe: %w", err), "error listening and serving metrics")
|
||||
}
|
||||
}()
|
||||
|
||||
return &telemetry.Provider{
|
||||
MeterProvider: meterProvider,
|
||||
MetricsHandler: metricsHandler,
|
||||
Shutdown: func(shutdownCtx context.Context) error {
|
||||
log.Infof(ctx, "Shutting down metrics server...")
|
||||
// Shutdown the metrics server
|
||||
serverShutdownCtx, cancel := context.WithTimeout(shutdownCtx, 10*time.Second)
|
||||
defer cancel()
|
||||
if err := metricsServer.Shutdown(serverShutdownCtx); err != nil {
|
||||
log.Errorf(ctx, fmt.Errorf("metrics server shutdown: %w", err), "error shutting down metrics server")
|
||||
}
|
||||
serverWg.Wait()
|
||||
// Shutdown the meter provider
|
||||
return meterProvider.Shutdown(shutdownCtx)
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
259
pkg/plugin/implementation/otelsetup/otelsetup_test.go
Normal file
259
pkg/plugin/implementation/otelsetup/otelsetup_test.go
Normal file
@@ -0,0 +1,259 @@
|
||||
package otelsetup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSetup_New_Success(t *testing.T) {
|
||||
setup := Setup{}
|
||||
ctx := context.Background()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg *Config
|
||||
}{
|
||||
{
|
||||
name: "Valid config with all fields",
|
||||
cfg: &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Valid config with metrics disabled",
|
||||
cfg: &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: false,
|
||||
Environment: "test",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Config with empty fields uses defaults",
|
||||
cfg: &Config{
|
||||
ServiceName: "",
|
||||
ServiceVersion: "",
|
||||
EnableMetrics: true,
|
||||
Environment: "",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
provider, err := setup.New(ctx, tt.cfg)
|
||||
|
||||
require.NoError(t, err, "New() should not return error")
|
||||
require.NotNil(t, provider, "New() should return non-nil provider")
|
||||
require.NotNil(t, provider.Shutdown, "Provider should have shutdown function")
|
||||
|
||||
if tt.cfg.EnableMetrics {
|
||||
assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set when metrics enabled")
|
||||
}
|
||||
|
||||
// Test shutdown
|
||||
err = provider.Shutdown(ctx)
|
||||
assert.NoError(t, err, "Shutdown should not return error")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetup_New_Failure(t *testing.T) {
|
||||
setup := Setup{}
|
||||
ctx := context.Background()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg *Config
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "Nil config",
|
||||
cfg: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
provider, err := setup.New(ctx, tt.cfg)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err, "New() should return error")
|
||||
assert.Nil(t, provider, "New() should return nil provider on error")
|
||||
} else {
|
||||
assert.NoError(t, err, "New() should not return error")
|
||||
assert.NotNil(t, provider, "New() should return non-nil provider")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetup_New_DefaultValues(t *testing.T) {
|
||||
setup := Setup{}
|
||||
ctx := context.Background()
|
||||
|
||||
// Test with empty fields - should use defaults
|
||||
cfg := &Config{
|
||||
ServiceName: "",
|
||||
ServiceVersion: "",
|
||||
EnableMetrics: true,
|
||||
Environment: "",
|
||||
}
|
||||
|
||||
provider, err := setup.New(ctx, cfg)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, provider)
|
||||
|
||||
// Verify defaults are applied by checking that provider is functional
|
||||
assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set with defaults")
|
||||
|
||||
// Cleanup
|
||||
err = provider.Shutdown(ctx)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestSetup_New_MetricsDisabled(t *testing.T) {
|
||||
setup := Setup{}
|
||||
ctx := context.Background()
|
||||
|
||||
cfg := &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: false,
|
||||
Environment: "test",
|
||||
}
|
||||
|
||||
provider, err := setup.New(ctx, cfg)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, provider)
|
||||
|
||||
// When metrics are disabled, MetricsHandler should be nil and MeterProvider should be nil
|
||||
assert.Nil(t, provider.MeterProvider, "MeterProvider should be nil when metrics disabled")
|
||||
|
||||
// Shutdown should still work
|
||||
err = provider.Shutdown(ctx)
|
||||
assert.NoError(t, err, "Shutdown should work even when metrics disabled")
|
||||
}
|
||||
|
||||
func TestToPluginConfig_Success(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg *Config
|
||||
expectedID string
|
||||
expectedConfig map[string]string
|
||||
}{
|
||||
{
|
||||
name: "Valid config with all fields",
|
||||
cfg: &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
},
|
||||
expectedID: "otelsetup",
|
||||
expectedConfig: map[string]string{
|
||||
"serviceName": "test-service",
|
||||
"serviceVersion": "1.0.0",
|
||||
"enableMetrics": "true",
|
||||
"environment": "test",
|
||||
"metricsPort": "",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Config with enableMetrics false",
|
||||
cfg: &Config{
|
||||
ServiceName: "my-service",
|
||||
ServiceVersion: "2.0.0",
|
||||
EnableMetrics: false,
|
||||
Environment: "production",
|
||||
},
|
||||
expectedID: "otelsetup",
|
||||
expectedConfig: map[string]string{
|
||||
"serviceName": "my-service",
|
||||
"serviceVersion": "2.0.0",
|
||||
"enableMetrics": "false",
|
||||
"environment": "production",
|
||||
"metricsPort": "",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Config with empty fields",
|
||||
cfg: &Config{
|
||||
ServiceName: "",
|
||||
ServiceVersion: "",
|
||||
EnableMetrics: true,
|
||||
Environment: "",
|
||||
},
|
||||
expectedID: "otelsetup",
|
||||
expectedConfig: map[string]string{
|
||||
"serviceName": "",
|
||||
"serviceVersion": "",
|
||||
"enableMetrics": "true",
|
||||
"environment": "",
|
||||
"metricsPort": "",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := ToPluginConfig(tt.cfg)
|
||||
|
||||
require.NotNil(t, result, "ToPluginConfig should return non-nil config")
|
||||
assert.Equal(t, tt.expectedID, result.ID, "Plugin ID should be 'otelsetup'")
|
||||
assert.Equal(t, tt.expectedConfig, result.Config, "Config map should match expected values")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestToPluginConfig_NilConfig(t *testing.T) {
|
||||
// Test that ToPluginConfig handles nil config
|
||||
// Note: This will panic if nil is passed, which is acceptable behavior
|
||||
// as the function expects a valid config. In practice, callers should check for nil.
|
||||
assert.Panics(t, func() {
|
||||
ToPluginConfig(nil)
|
||||
}, "ToPluginConfig should panic when given nil config")
|
||||
}
|
||||
|
||||
func TestToPluginConfig_BooleanConversion(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
enableMetrics bool
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "EnableMetrics true",
|
||||
enableMetrics: true,
|
||||
expected: "true",
|
||||
},
|
||||
{
|
||||
name: "EnableMetrics false",
|
||||
enableMetrics: false,
|
||||
expected: "false",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cfg := &Config{
|
||||
ServiceName: "test",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: tt.enableMetrics,
|
||||
Environment: "test",
|
||||
MetricsPort: "",
|
||||
}
|
||||
|
||||
result := ToPluginConfig(cfg)
|
||||
require.NotNil(t, result)
|
||||
assert.Equal(t, tt.expected, result.Config["enableMetrics"], "enableMetrics should be converted to string correctly")
|
||||
assert.Equal(t, "", result.Config["metricsPort"], "metricsPort should be included even when empty")
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -683,7 +683,6 @@ func TestExcludeActionWithNonURLTargetTypes(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TestV2RouteSuccess tests v2 routing with domain-agnostic behavior
|
||||
func TestV2RouteSuccess(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/plugin/definition"
|
||||
"github.com/beckn-one/beckn-onix/pkg/telemetry"
|
||||
)
|
||||
|
||||
type onixPlugin interface {
|
||||
@@ -196,6 +197,33 @@ func (m *Manager) Middleware(ctx context.Context, cfg *Config) (func(http.Handle
|
||||
return mwp.New(ctx, cfg.Config)
|
||||
}
|
||||
|
||||
// OtelSetup initializes OpenTelemetry via a dedicated plugin. The plugin is
|
||||
// expected to return a telemetry Provider that the core application can use for
|
||||
// instrumentation.
|
||||
func (m *Manager) OtelSetup(ctx context.Context, cfg *Config) (*telemetry.Provider, error) {
|
||||
if cfg == nil {
|
||||
log.Info(ctx, "Telemetry config not provided; skipping OpenTelemetry setup")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
otp, err := provider[definition.OtelSetupMetricsProvider](m.plugins, cfg.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load provider for %s: %w", cfg.ID, err)
|
||||
}
|
||||
provider, closer, err := otp.New(ctx, cfg.Config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if closer != nil {
|
||||
m.closers = append(m.closers, func() {
|
||||
if err := closer(); err != nil {
|
||||
log.Errorf(context.Background(), err, "Failed to shutdown telemetry provider")
|
||||
}
|
||||
})
|
||||
}
|
||||
return provider, nil
|
||||
}
|
||||
|
||||
// TransportWrapper returns a TransportWrapper instance based on the provided configuration.
|
||||
func (m *Manager) TransportWrapper(ctx context.Context, cfg *Config) (definition.TransportWrapper, error) {
|
||||
twp, err := provider[definition.TransportWrapperProvider](m.plugins, cfg.ID)
|
||||
|
||||
Reference in New Issue
Block a user