make changes as per the doc

This commit is contained in:
Manendra Pal Singh
2025-11-21 16:01:59 +05:30
parent bf2c132ab3
commit 8ef4904076
28 changed files with 937 additions and 1062 deletions

View File

@@ -1,24 +0,0 @@
package metrics
import (
"net/http"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
)
// HTTPMiddleware wraps an HTTP handler with OpenTelemetry instrumentation.
func HTTPMiddleware(handler http.Handler, operation string) http.Handler {
if !IsEnabled() {
return handler
}
return otelhttp.NewHandler(
handler,
operation,
)
}
// HTTPHandler wraps an HTTP handler function with OpenTelemetry instrumentation.
func HTTPHandler(handler http.HandlerFunc, operation string) http.Handler {
return HTTPMiddleware(handler, operation)
}

View File

@@ -1,186 +0,0 @@
package metrics
import (
"context"
"errors"
"fmt"
"net/http"
"sync"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
otelmetric "go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
)
var (
mp *metric.MeterProvider
meter otelmetric.Meter
prometheusRegistry *prometheus.Registry
once sync.Once
shutdownFunc func(context.Context) error
ErrInvalidExporter = errors.New("invalid metrics exporter type")
ErrMetricsNotInit = errors.New("metrics not initialized")
)
// ExporterType represents the type of metrics exporter.
type ExporterType string
const (
// ExporterPrometheus exports metrics in Prometheus format.
ExporterPrometheus ExporterType = "prometheus"
)
// Config represents the configuration for metrics.
type Config struct {
Enabled bool `yaml:"enabled"`
ExporterType ExporterType `yaml:"exporterType"`
ServiceName string `yaml:"serviceName"`
ServiceVersion string `yaml:"serviceVersion"`
Prometheus PrometheusConfig `yaml:"prometheus"`
}
// PrometheusConfig represents Prometheus exporter configuration.
type PrometheusConfig struct {
Port string `yaml:"port"`
Path string `yaml:"path"`
}
// validate validates the metrics configuration.
func (c *Config) validate() error {
if !c.Enabled {
return nil
}
if c.ExporterType != ExporterPrometheus {
return fmt.Errorf("%w: %s", ErrInvalidExporter, c.ExporterType)
}
if c.ServiceName == "" {
c.ServiceName = "beckn-onix"
}
return nil
}
// InitMetrics initializes the OpenTelemetry metrics SDK.
func InitMetrics(cfg Config) error {
if !cfg.Enabled {
return nil
}
var initErr error
once.Do(func() {
if initErr = cfg.validate(); initErr != nil {
return
}
// Create resource with service information.
attrs := []attribute.KeyValue{
attribute.String("service.name", cfg.ServiceName),
}
if cfg.ServiceVersion != "" {
attrs = append(attrs, attribute.String("service.version", cfg.ServiceVersion))
}
res, err := resource.New(
context.Background(),
resource.WithAttributes(attrs...),
)
if err != nil {
initErr = fmt.Errorf("failed to create resource: %w", err)
return
}
// Always create Prometheus exporter for /metrics endpoint
// Create a custom registry for the exporter so we can use it for HTTP serving
promRegistry := prometheus.NewRegistry()
promExporter, err := otelprom.New(otelprom.WithRegisterer(promRegistry))
if err != nil {
initErr = fmt.Errorf("failed to create Prometheus exporter: %w", err)
return
}
prometheusRegistry = promRegistry
// Create readers based on configuration.
var readers []metric.Reader
// Always add Prometheus reader for /metrics endpoint
readers = append(readers, promExporter)
// Create meter provider with all readers
opts := []metric.Option{
metric.WithResource(res),
}
for _, reader := range readers {
opts = append(opts, metric.WithReader(reader))
}
mp = metric.NewMeterProvider(opts...)
// Set global meter provider.
otel.SetMeterProvider(mp)
// Create meter for this package.
meter = mp.Meter("github.com/beckn-one/beckn-onix")
// Store shutdown function.
shutdownFunc = func(ctx context.Context) error {
return mp.Shutdown(ctx)
}
})
return initErr
}
// GetMeter returns the global meter instance.
func GetMeter() otelmetric.Meter {
if meter == nil {
// Return a no-op meter if not initialized.
return otel.Meter("noop")
}
return meter
}
// Shutdown gracefully shuts down the metrics provider.
func Shutdown(ctx context.Context) error {
if shutdownFunc == nil {
return nil
}
return shutdownFunc(ctx)
}
// IsEnabled returns whether metrics are enabled.
func IsEnabled() bool {
return mp != nil
}
// MetricsHandler returns the HTTP handler for the /metrics endpoint.
// Returns nil if metrics are not enabled.
func MetricsHandler() http.Handler {
if prometheusRegistry == nil {
return nil
}
// Use promhttp to serve the Prometheus registry
return promhttp.HandlerFor(prometheusRegistry, promhttp.HandlerOpts{})
}
// InitAllMetrics initializes all metrics subsystems.
// This includes request metrics and runtime metrics.
// Returns an error if any initialization fails.
func InitAllMetrics() error {
if !IsEnabled() {
return nil
}
if err := InitRequestMetrics(); err != nil {
return fmt.Errorf("failed to initialize request metrics: %w", err)
}
if err := InitRuntimeMetrics(); err != nil {
return fmt.Errorf("failed to initialize runtime metrics: %w", err)
}
return nil
}

View File

@@ -1,200 +0,0 @@
package metrics
import (
"context"
"net/http"
"strconv"
"time"
"go.opentelemetry.io/otel/attribute"
otelmetric "go.opentelemetry.io/otel/metric"
)
var (
// Inbound request metrics
inboundRequestsTotal otelmetric.Int64Counter
inboundSignValidationTotal otelmetric.Int64Counter
inboundSchemaValidationTotal otelmetric.Int64Counter
// Outbound request metrics
outboundRequestsTotal otelmetric.Int64Counter
outboundRequests2XX otelmetric.Int64Counter
outboundRequests4XX otelmetric.Int64Counter
outboundRequests5XX otelmetric.Int64Counter
outboundRequestDuration otelmetric.Float64Histogram
)
// InitRequestMetrics initializes request-related metrics instruments.
func InitRequestMetrics() error {
if !IsEnabled() {
return nil
}
meter := GetMeter()
var err error
// Inbound request metrics
inboundRequestsTotal, err = meter.Int64Counter(
"beckn.inbound.requests.total",
otelmetric.WithDescription("Total number of inbound requests per host"),
)
if err != nil {
return err
}
inboundSignValidationTotal, err = meter.Int64Counter(
"beckn.inbound.sign_validation.total",
otelmetric.WithDescription("Total number of inbound requests with sign validation per host"),
)
if err != nil {
return err
}
inboundSchemaValidationTotal, err = meter.Int64Counter(
"beckn.inbound.schema_validation.total",
otelmetric.WithDescription("Total number of inbound requests with schema validation per host"),
)
if err != nil {
return err
}
// Outbound request metrics
outboundRequestsTotal, err = meter.Int64Counter(
"beckn.outbound.requests.total",
otelmetric.WithDescription("Total number of outbound requests per host"),
)
if err != nil {
return err
}
outboundRequests2XX, err = meter.Int64Counter(
"beckn.outbound.requests.2xx",
otelmetric.WithDescription("Total number of outbound requests with 2XX status code per host"),
)
if err != nil {
return err
}
outboundRequests4XX, err = meter.Int64Counter(
"beckn.outbound.requests.4xx",
otelmetric.WithDescription("Total number of outbound requests with 4XX status code per host"),
)
if err != nil {
return err
}
outboundRequests5XX, err = meter.Int64Counter(
"beckn.outbound.requests.5xx",
otelmetric.WithDescription("Total number of outbound requests with 5XX status code per host"),
)
if err != nil {
return err
}
// Outbound request duration histogram (for p99, p95, p75)
outboundRequestDuration, err = meter.Float64Histogram(
"beckn.outbound.request.duration",
otelmetric.WithDescription("Duration of outbound requests in milliseconds"),
otelmetric.WithUnit("ms"),
)
if err != nil {
return err
}
return nil
}
// RecordInboundRequest records an inbound request.
func RecordInboundRequest(ctx context.Context, host string) {
if inboundRequestsTotal == nil {
return
}
inboundRequestsTotal.Add(ctx, 1, otelmetric.WithAttributes(
attribute.String("host", host),
))
}
// RecordInboundSignValidation records an inbound request with sign validation.
func RecordInboundSignValidation(ctx context.Context, host string) {
if inboundSignValidationTotal == nil {
return
}
inboundSignValidationTotal.Add(ctx, 1, otelmetric.WithAttributes(
attribute.String("host", host),
))
}
// RecordInboundSchemaValidation records an inbound request with schema validation.
func RecordInboundSchemaValidation(ctx context.Context, host string) {
if inboundSchemaValidationTotal == nil {
return
}
inboundSchemaValidationTotal.Add(ctx, 1, otelmetric.WithAttributes(
attribute.String("host", host),
))
}
// RecordOutboundRequest records an outbound request with status code and duration.
func RecordOutboundRequest(ctx context.Context, host string, statusCode int, duration time.Duration) {
if outboundRequestsTotal == nil {
return
}
attrs := []attribute.KeyValue{
attribute.String("host", host),
attribute.String("status_code", strconv.Itoa(statusCode)),
}
// Record total
outboundRequestsTotal.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
// Record by status code category
statusClass := statusCode / 100
switch statusClass {
case 2:
outboundRequests2XX.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
case 4:
outboundRequests4XX.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
case 5:
outboundRequests5XX.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
}
// Record duration for percentile calculations (p99, p95, p75)
if outboundRequestDuration != nil {
outboundRequestDuration.Record(ctx, float64(duration.Milliseconds()), otelmetric.WithAttributes(attrs...))
}
}
// HTTPTransport wraps an http.RoundTripper to track outbound request metrics.
type HTTPTransport struct {
Transport http.RoundTripper
}
// RoundTrip implements http.RoundTripper interface and tracks metrics.
func (t *HTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) {
start := time.Now()
host := req.URL.Host
resp, err := t.Transport.RoundTrip(req)
duration := time.Since(start)
statusCode := 0
if resp != nil {
statusCode = resp.StatusCode
} else if err != nil {
// Network error - treat as 5XX
statusCode = 500
}
RecordOutboundRequest(req.Context(), host, statusCode, duration)
return resp, err
}
// WrapHTTPTransport wraps an http.RoundTripper with metrics tracking.
func WrapHTTPTransport(transport http.RoundTripper) http.RoundTripper {
if !IsEnabled() {
return transport
}
return &HTTPTransport{Transport: transport}
}

View File

@@ -1,346 +0,0 @@
package metrics
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestInitRequestMetrics(t *testing.T) {
tests := []struct {
name string
enabled bool
wantError bool
}{
{
name: "metrics enabled",
enabled: true,
wantError: false,
},
{
name: "metrics disabled",
enabled: false,
wantError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Setup: Initialize metrics with enabled state
cfg := Config{
Enabled: tt.enabled,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
// Test InitRequestMetrics
err = InitRequestMetrics()
if tt.wantError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
// Cleanup
Shutdown(context.Background())
})
}
}
func TestRecordInboundRequest(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
err = InitRequestMetrics()
require.NoError(t, err)
ctx := context.Background()
host := "example.com"
// Test: Record inbound request
RecordInboundRequest(ctx, host)
// Verify: No error should occur
// Note: We can't easily verify the metric value without exporting,
// but we can verify the function doesn't panic
assert.NotPanics(t, func() {
RecordInboundRequest(ctx, host)
})
}
func TestRecordInboundSignValidation(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
err = InitRequestMetrics()
require.NoError(t, err)
ctx := context.Background()
host := "example.com"
// Test: Record sign validation
RecordInboundSignValidation(ctx, host)
// Verify: No error should occur
assert.NotPanics(t, func() {
RecordInboundSignValidation(ctx, host)
})
}
func TestRecordInboundSchemaValidation(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
err = InitRequestMetrics()
require.NoError(t, err)
ctx := context.Background()
host := "example.com"
// Test: Record schema validation
RecordInboundSchemaValidation(ctx, host)
// Verify: No error should occur
assert.NotPanics(t, func() {
RecordInboundSchemaValidation(ctx, host)
})
}
func TestRecordOutboundRequest(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
err = InitRequestMetrics()
require.NoError(t, err)
ctx := context.Background()
host := "example.com"
tests := []struct {
name string
statusCode int
duration time.Duration
}{
{
name: "2XX status code",
statusCode: 200,
duration: 100 * time.Millisecond,
},
{
name: "4XX status code",
statusCode: 404,
duration: 50 * time.Millisecond,
},
{
name: "5XX status code",
statusCode: 500,
duration: 200 * time.Millisecond,
},
{
name: "3XX status code",
statusCode: 301,
duration: 75 * time.Millisecond,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test: Record outbound request
RecordOutboundRequest(ctx, host, tt.statusCode, tt.duration)
// Verify: No error should occur
assert.NotPanics(t, func() {
RecordOutboundRequest(ctx, host, tt.statusCode, tt.duration)
})
})
}
}
func TestHTTPTransport_RoundTrip(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
err = InitRequestMetrics()
require.NoError(t, err)
// Create a test server
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte("OK"))
}))
defer server.Close()
// Create transport wrapper
transport := &HTTPTransport{
Transport: http.DefaultTransport,
}
// Create request
req, err := http.NewRequest("GET", server.URL, nil)
require.NoError(t, err)
req = req.WithContext(context.Background())
// Test: RoundTrip should track metrics
resp, err := transport.RoundTrip(req)
require.NoError(t, err)
require.NotNil(t, resp)
assert.Equal(t, http.StatusOK, resp.StatusCode)
// Verify: Metrics should be recorded
assert.NotPanics(t, func() {
resp, err = transport.RoundTrip(req)
assert.NoError(t, err)
assert.NotNil(t, resp)
})
}
func TestHTTPTransport_RoundTrip_Error(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
err = InitRequestMetrics()
require.NoError(t, err)
// Create transport with invalid URL to cause error
transport := &HTTPTransport{
Transport: http.DefaultTransport,
}
// Create request with invalid URL
req, err := http.NewRequest("GET", "http://invalid-host-that-does-not-exist:9999", nil)
require.NoError(t, err)
req = req.WithContext(context.Background())
// Test: RoundTrip should handle error and still record metrics
resp, err := transport.RoundTrip(req)
assert.Error(t, err)
assert.Nil(t, resp)
// Verify: Metrics should still be recorded (with 500 status)
assert.NotPanics(t, func() {
_, _ = transport.RoundTrip(req)
})
}
func TestWrapHTTPTransport_Enabled(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
// Create a new transport
transport := http.DefaultTransport.(*http.Transport).Clone()
// Test: Wrap transport
wrapped := WrapHTTPTransport(transport)
// Verify: Should be wrapped
assert.NotEqual(t, transport, wrapped)
_, ok := wrapped.(*HTTPTransport)
assert.True(t, ok, "Should be wrapped with HTTPTransport")
}
func TestWrapHTTPTransport_Disabled(t *testing.T) {
// Setup: Initialize metrics with disabled state
cfg := Config{
Enabled: false,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
// Create a new transport
transport := http.DefaultTransport.(*http.Transport).Clone()
// Test: Wrap transport when metrics disabled
wrapped := WrapHTTPTransport(transport)
// Verify: When metrics are disabled, IsEnabled() returns false
// So WrapHTTPTransport should return the original transport
// Note: This test verifies the behavior when IsEnabled() returns false
if !IsEnabled() {
assert.Equal(t, transport, wrapped, "Should return original transport when metrics disabled")
} else {
// If metrics are still enabled from previous test, just verify it doesn't panic
assert.NotNil(t, wrapped)
}
}
func TestRecordInboundRequest_WhenDisabled(t *testing.T) {
// Setup: Metrics disabled
cfg := Config{
Enabled: false,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
ctx := context.Background()
host := "example.com"
// Test: Should not panic when metrics are disabled
assert.NotPanics(t, func() {
RecordInboundRequest(ctx, host)
RecordInboundSignValidation(ctx, host)
RecordInboundSchemaValidation(ctx, host)
RecordOutboundRequest(ctx, host, 200, time.Second)
})
}

View File

@@ -1,27 +0,0 @@
package metrics
import (
otelruntime "go.opentelemetry.io/contrib/instrumentation/runtime"
)
// InitRuntimeMetrics initializes Go runtime metrics instrumentation.
// This includes CPU, memory, GC, and goroutine metrics.
// The runtime instrumentation automatically collects:
// - CPU usage (go_cpu_*)
// - Memory allocation and heap stats (go_memstats_*)
// - GC statistics (go_memstats_gc_*)
// - Goroutine count (go_goroutines)
func InitRuntimeMetrics() error {
if !IsEnabled() {
return nil
}
// Start OpenTelemetry runtime metrics collection
// This automatically collects Go runtime metrics
err := otelruntime.Start(otelruntime.WithMinimumReadMemStatsInterval(0))
if err != nil {
return err
}
return nil
}

View File

@@ -1,91 +0,0 @@
package metrics
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestInitRuntimeMetrics(t *testing.T) {
tests := []struct {
name string
enabled bool
wantError bool
}{
{
name: "metrics enabled",
enabled: true,
wantError: false,
},
{
name: "metrics disabled",
enabled: false,
wantError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Setup: Initialize metrics with enabled state
cfg := Config{
Enabled: tt.enabled,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
// Test InitRuntimeMetrics
err = InitRuntimeMetrics()
if tt.wantError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
// Cleanup
Shutdown(context.Background())
})
}
}
func TestInitRuntimeMetrics_MultipleCalls(t *testing.T) {
// Setup
cfg := Config{
Enabled: true,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
// Test: Multiple calls should not cause errors
err = InitRuntimeMetrics()
require.NoError(t, err)
// Note: Second call might fail if runtime.Start is already called,
// but that's expected behavior
err = InitRuntimeMetrics()
// We don't assert on error here as it depends on internal state
_ = err
}
func TestInitRuntimeMetrics_WhenDisabled(t *testing.T) {
// Setup: Metrics disabled
cfg := Config{
Enabled: false,
ExporterType: ExporterPrometheus,
ServiceName: "test-service",
}
err := InitMetrics(cfg)
require.NoError(t, err)
defer Shutdown(context.Background())
// Test: Should return nil without error when disabled
err = InitRuntimeMetrics()
assert.NoError(t, err)
}

View File

@@ -7,7 +7,11 @@ import (
"os"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
"github.com/redis/go-redis/extra/redisotel/v9"
"github.com/redis/go-redis/v9"
)
@@ -32,7 +36,8 @@ type Config struct {
// Cache wraps a Redis client to provide basic caching operations.
type Cache struct {
Client RedisClient
Client RedisClient
metrics *telemetry.Metrics
}
// Error variables to describe common failure modes.
@@ -92,26 +97,66 @@ func New(ctx context.Context, cfg *Config) (*Cache, func() error, error) {
}
}
metrics, _ := telemetry.GetMetrics(ctx)
log.Infof(ctx, "Cache connection to Redis established successfully")
return &Cache{Client: client}, client.Close, nil
return &Cache{Client: client, metrics: metrics}, client.Close, nil
}
// Get retrieves the value for the specified key from Redis.
func (c *Cache) Get(ctx context.Context, key string) (string, error) {
return c.Client.Get(ctx, key).Result()
result, err := c.Client.Get(ctx, key).Result()
if c.metrics != nil {
attrs := []attribute.KeyValue{
telemetry.AttrOperation.String("get"),
}
switch {
case err == redis.Nil:
c.metrics.CacheMissesTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("miss"))...))
case err != nil:
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("error"))...))
default:
c.metrics.CacheHitsTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("hit"))...))
}
}
return result, err
}
// Set stores the given key-value pair in Redis with the specified TTL (time to live).
func (c *Cache) Set(ctx context.Context, key, value string, ttl time.Duration) error {
return c.Client.Set(ctx, key, value, ttl).Err()
err := c.Client.Set(ctx, key, value, ttl).Err()
c.recordOperation(ctx, "set", err)
return err
}
// Delete removes the specified key from Redis.
func (c *Cache) Delete(ctx context.Context, key string) error {
return c.Client.Del(ctx, key).Err()
err := c.Client.Del(ctx, key).Err()
c.recordOperation(ctx, "delete", err)
return err
}
// Clear removes all keys in the currently selected Redis database.
func (c *Cache) Clear(ctx context.Context) error {
return c.Client.FlushDB(ctx).Err()
}
func (c *Cache) recordOperation(ctx context.Context, op string, err error) {
if c.metrics == nil {
return
}
status := "success"
if err != nil {
status = "error"
}
c.metrics.CacheOperationsTotal.Add(ctx, 1,
metric.WithAttributes(
telemetry.AttrOperation.String(op),
telemetry.AttrStatus.String(status),
))
}

View File

@@ -0,0 +1,21 @@
package main
import (
"context"
"net/http"
"github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelmetrics"
)
type middlewareProvider struct{}
func (middlewareProvider) New(ctx context.Context, cfg map[string]string) (func(http.Handler) http.Handler, error) {
mw, err := otelmetrics.New(ctx, cfg)
if err != nil {
return nil, err
}
return mw.Handler, nil
}
// Provider is exported for plugin loader.
var Provider = middlewareProvider{}

View File

@@ -0,0 +1,134 @@
package otelmetrics
import (
"context"
"net/http"
"strings"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
)
// Middleware instruments inbound HTTP handlers with OpenTelemetry metrics.
type Middleware struct {
metrics *telemetry.Metrics
enabled bool
}
// New constructs middleware based on plugin configuration.
func New(ctx context.Context, cfg map[string]string) (*Middleware, error) {
enabled := cfg["enabled"] != "false"
metrics, err := telemetry.GetMetrics(ctx)
if err != nil {
log.Warnf(ctx, "OpenTelemetry metrics unavailable: %v", err)
}
return &Middleware{
metrics: metrics,
enabled: enabled,
}, nil
}
// Handler returns an http.Handler middleware compatible with plugin expectations.
func (m *Middleware) Handler(next http.Handler) http.Handler {
if !m.enabled || m.metrics == nil {
return next
}
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
action := extractAction(r.URL.Path)
module := r.Header.Get("X-Module-Name")
role := r.Header.Get("X-Role")
attrs := []attribute.KeyValue{
telemetry.AttrModule.String(module),
telemetry.AttrRole.String(role),
telemetry.AttrAction.String(action),
telemetry.AttrHTTPMethod.String(r.Method),
}
m.metrics.HTTPRequestsInFlight.Add(ctx, 1, metric.WithAttributes(attrs...))
defer m.metrics.HTTPRequestsInFlight.Add(ctx, -1, metric.WithAttributes(attrs...))
if r.ContentLength > 0 {
m.metrics.HTTPRequestSize.Record(ctx, r.ContentLength, metric.WithAttributes(attrs...))
}
rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
start := time.Now()
next.ServeHTTP(rw, r)
duration := time.Since(start).Seconds()
status := "success"
if rw.statusCode >= 400 {
status = "error"
}
statusAttrs := append(attrs,
telemetry.AttrHTTPStatus.Int(rw.statusCode),
telemetry.AttrStatus.String(status),
)
m.metrics.HTTPRequestsTotal.Add(ctx, 1, metric.WithAttributes(statusAttrs...))
m.metrics.HTTPRequestDuration.Record(ctx, duration, metric.WithAttributes(statusAttrs...))
if rw.bytesWritten > 0 {
m.metrics.HTTPResponseSize.Record(ctx, int64(rw.bytesWritten), metric.WithAttributes(statusAttrs...))
}
if isBecknAction(action) {
m.metrics.BecknMessagesTotal.Add(ctx, 1,
metric.WithAttributes(
telemetry.AttrAction.String(action),
telemetry.AttrRole.String(role),
telemetry.AttrStatus.String(status),
))
}
})
}
type responseWriter struct {
http.ResponseWriter
statusCode int
bytesWritten int
}
func (rw *responseWriter) WriteHeader(code int) {
rw.statusCode = code
rw.ResponseWriter.WriteHeader(code)
}
func (rw *responseWriter) Write(b []byte) (int, error) {
n, err := rw.ResponseWriter.Write(b)
rw.bytesWritten += n
return n, err
}
func extractAction(path string) string {
trimmed := strings.Trim(path, "/")
if trimmed == "" {
return "root"
}
parts := strings.Split(trimmed, "/")
return parts[len(parts)-1]
}
func isBecknAction(action string) bool {
actions := []string{
"discover", "select", "init", "confirm", "status", "track",
"cancel", "update", "rating", "support",
"on_discover", "on_select", "on_init", "on_confirm", "on_status",
"on_track", "on_cancel", "on_update", "on_rating", "on_support",
}
for _, a := range actions {
if a == action {
return true
}
}
return false
}

View File

@@ -681,4 +681,4 @@ func TestExcludeActionWithNonURLTargetTypes(t *testing.T) {
}
})
}
}
}

222
pkg/telemetry/metrics.go Normal file
View File

@@ -0,0 +1,222 @@
package telemetry
import (
"context"
"fmt"
"sync"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
// Metrics exposes strongly typed metric instruments used across the adapter.
type Metrics struct {
HTTPRequestsTotal metric.Int64Counter
HTTPRequestDuration metric.Float64Histogram
HTTPRequestsInFlight metric.Int64UpDownCounter
HTTPRequestSize metric.Int64Histogram
HTTPResponseSize metric.Int64Histogram
StepExecutionDuration metric.Float64Histogram
StepExecutionTotal metric.Int64Counter
StepErrorsTotal metric.Int64Counter
PluginExecutionDuration metric.Float64Histogram
PluginErrorsTotal metric.Int64Counter
BecknMessagesTotal metric.Int64Counter
SignatureValidationsTotal metric.Int64Counter
SchemaValidationsTotal metric.Int64Counter
CacheOperationsTotal metric.Int64Counter
CacheHitsTotal metric.Int64Counter
CacheMissesTotal metric.Int64Counter
RoutingDecisionsTotal metric.Int64Counter
}
var (
metricsInstance *Metrics
metricsOnce sync.Once
metricsErr error
)
// Attribute keys shared across instruments.
var (
AttrModule = attribute.Key("module")
AttrSubsystem = attribute.Key("subsystem")
AttrName = attribute.Key("name")
AttrStep = attribute.Key("step")
AttrRole = attribute.Key("role")
AttrAction = attribute.Key("action")
AttrHTTPMethod = attribute.Key("http_method")
AttrHTTPStatus = attribute.Key("http_status_code")
AttrStatus = attribute.Key("status")
AttrErrorType = attribute.Key("error_type")
AttrPluginID = attribute.Key("plugin_id")
AttrPluginType = attribute.Key("plugin_type")
AttrOperation = attribute.Key("operation")
AttrRouteType = attribute.Key("route_type")
AttrTargetType = attribute.Key("target_type")
AttrSchemaVersion = attribute.Key("schema_version")
)
// GetMetrics lazily initializes instruments and returns a cached reference.
func GetMetrics(ctx context.Context) (*Metrics, error) {
metricsOnce.Do(func() {
metricsInstance, metricsErr = newMetrics()
})
return metricsInstance, metricsErr
}
func newMetrics() (*Metrics, error) {
meter := otel.GetMeterProvider().Meter(
"github.com/beckn-one/beckn-onix/telemetry",
metric.WithInstrumentationVersion("1.0.0"),
)
m := &Metrics{}
var err error
if m.HTTPRequestsTotal, err = meter.Int64Counter(
"http_server_requests_total",
metric.WithDescription("Total number of HTTP requests processed"),
metric.WithUnit("{request}"),
); err != nil {
return nil, fmt.Errorf("http_server_requests_total: %w", err)
}
if m.HTTPRequestDuration, err = meter.Float64Histogram(
"http_server_request_duration_seconds",
metric.WithDescription("HTTP request duration in seconds"),
metric.WithUnit("s"),
metric.WithExplicitBucketBoundaries(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10),
); err != nil {
return nil, fmt.Errorf("http_server_request_duration_seconds: %w", err)
}
if m.HTTPRequestsInFlight, err = meter.Int64UpDownCounter(
"http_server_requests_in_flight",
metric.WithDescription("Number of HTTP requests currently being processed"),
metric.WithUnit("{request}"),
); err != nil {
return nil, fmt.Errorf("http_server_requests_in_flight: %w", err)
}
if m.HTTPRequestSize, err = meter.Int64Histogram(
"http_server_request_size_bytes",
metric.WithDescription("Size of HTTP request payloads"),
metric.WithUnit("By"),
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
); err != nil {
return nil, fmt.Errorf("http_server_request_size_bytes: %w", err)
}
if m.HTTPResponseSize, err = meter.Int64Histogram(
"http_server_response_size_bytes",
metric.WithDescription("Size of HTTP responses"),
metric.WithUnit("By"),
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
); err != nil {
return nil, fmt.Errorf("http_server_response_size_bytes: %w", err)
}
if m.StepExecutionDuration, err = meter.Float64Histogram(
"onix_step_execution_duration_seconds",
metric.WithDescription("Duration of individual processing steps"),
metric.WithUnit("s"),
metric.WithExplicitBucketBoundaries(0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5),
); err != nil {
return nil, fmt.Errorf("onix_step_execution_duration_seconds: %w", err)
}
if m.StepExecutionTotal, err = meter.Int64Counter(
"onix_step_executions_total",
metric.WithDescription("Total processing step executions"),
metric.WithUnit("{execution}"),
); err != nil {
return nil, fmt.Errorf("onix_step_executions_total: %w", err)
}
if m.StepErrorsTotal, err = meter.Int64Counter(
"onix_step_errors_total",
metric.WithDescription("Processing step errors"),
metric.WithUnit("{error}"),
); err != nil {
return nil, fmt.Errorf("onix_step_errors_total: %w", err)
}
if m.PluginExecutionDuration, err = meter.Float64Histogram(
"onix_plugin_execution_duration_seconds",
metric.WithDescription("Plugin execution time"),
metric.WithUnit("s"),
metric.WithExplicitBucketBoundaries(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1),
); err != nil {
return nil, fmt.Errorf("onix_plugin_execution_duration_seconds: %w", err)
}
if m.PluginErrorsTotal, err = meter.Int64Counter(
"onix_plugin_errors_total",
metric.WithDescription("Plugin level errors"),
metric.WithUnit("{error}"),
); err != nil {
return nil, fmt.Errorf("onix_plugin_errors_total: %w", err)
}
if m.BecknMessagesTotal, err = meter.Int64Counter(
"beckn_messages_total",
metric.WithDescription("Total Beckn protocol messages processed"),
metric.WithUnit("{message}"),
); err != nil {
return nil, fmt.Errorf("beckn_messages_total: %w", err)
}
if m.SignatureValidationsTotal, err = meter.Int64Counter(
"beckn_signature_validations_total",
metric.WithDescription("Signature validation attempts"),
metric.WithUnit("{validation}"),
); err != nil {
return nil, fmt.Errorf("beckn_signature_validations_total: %w", err)
}
if m.SchemaValidationsTotal, err = meter.Int64Counter(
"beckn_schema_validations_total",
metric.WithDescription("Schema validation attempts"),
metric.WithUnit("{validation}"),
); err != nil {
return nil, fmt.Errorf("beckn_schema_validations_total: %w", err)
}
if m.CacheOperationsTotal, err = meter.Int64Counter(
"onix_cache_operations_total",
metric.WithDescription("Redis cache operations"),
metric.WithUnit("{operation}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_operations_total: %w", err)
}
if m.CacheHitsTotal, err = meter.Int64Counter(
"onix_cache_hits_total",
metric.WithDescription("Redis cache hits"),
metric.WithUnit("{hit}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_hits_total: %w", err)
}
if m.CacheMissesTotal, err = meter.Int64Counter(
"onix_cache_misses_total",
metric.WithDescription("Redis cache misses"),
metric.WithUnit("{miss}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_misses_total: %w", err)
}
if m.RoutingDecisionsTotal, err = meter.Int64Counter(
"onix_routing_decisions_total",
metric.WithDescription("Routing decisions taken by handler"),
metric.WithUnit("{decision}"),
); err != nil {
return nil, fmt.Errorf("onix_routing_decisions_total: %w", err)
}
return m, nil
}

View File

@@ -0,0 +1,33 @@
package telemetry
import (
"context"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/require"
)
func TestNewProviderAndMetrics(t *testing.T) {
ctx := context.Background()
provider, err := NewProvider(ctx, &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
})
require.NoError(t, err)
require.NotNil(t, provider)
require.NotNil(t, provider.MetricsHandler)
metrics, err := GetMetrics(ctx)
require.NoError(t, err)
require.NotNil(t, metrics)
rec := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
provider.MetricsHandler.ServeHTTP(rec, req)
require.Equal(t, 200, rec.Code)
require.NoError(t, provider.Shutdown(context.Background()))
}

View File

@@ -0,0 +1,78 @@
package telemetry
import (
"context"
"errors"
"fmt"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/model"
"github.com/beckn-one/beckn-onix/pkg/plugin/definition"
)
// InstrumentedStep wraps a processing step with telemetry instrumentation.
type InstrumentedStep struct {
step definition.Step
stepName string
moduleName string
metrics *Metrics
}
// NewInstrumentedStep returns a telemetry enabled wrapper around a definition.Step.
func NewInstrumentedStep(step definition.Step, stepName, moduleName string) (*InstrumentedStep, error) {
metrics, err := GetMetrics(context.Background())
if err != nil {
return nil, err
}
return &InstrumentedStep{
step: step,
stepName: stepName,
moduleName: moduleName,
metrics: metrics,
}, nil
}
type becknError interface {
BecknError() *model.Error
}
// Run executes the underlying step and records RED style metrics.
func (is *InstrumentedStep) Run(ctx *model.StepContext) error {
if is.metrics == nil {
return is.step.Run(ctx)
}
start := time.Now()
err := is.step.Run(ctx)
duration := time.Since(start).Seconds()
attrs := []attribute.KeyValue{
AttrModule.String(is.moduleName),
AttrStep.String(is.stepName),
AttrRole.String(string(ctx.Role)),
}
is.metrics.StepExecutionTotal.Add(ctx.Context, 1, metric.WithAttributes(attrs...))
is.metrics.StepExecutionDuration.Record(ctx.Context, duration, metric.WithAttributes(attrs...))
if err != nil {
errorType := fmt.Sprintf("%T", err)
var becknErr becknError
if errors.As(err, &becknErr) {
if be := becknErr.BecknError(); be != nil && be.Code != "" {
errorType = be.Code
}
}
errorAttrs := append(attrs, AttrErrorType.String(errorType))
is.metrics.StepErrorsTotal.Add(ctx.Context, 1, metric.WithAttributes(errorAttrs...))
log.Errorf(ctx.Context, err, "Step %s failed", is.stepName)
}
return err
}

View File

@@ -0,0 +1,60 @@
package telemetry
import (
"context"
"errors"
"testing"
"github.com/beckn-one/beckn-onix/pkg/model"
"github.com/stretchr/testify/require"
)
type stubStep struct {
err error
}
func (s stubStep) Run(ctx *model.StepContext) error {
return s.err
}
func TestInstrumentedStepSuccess(t *testing.T) {
ctx := context.Background()
provider, err := NewProvider(ctx, &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
})
require.NoError(t, err)
defer provider.Shutdown(context.Background())
step, err := NewInstrumentedStep(stubStep{}, "test-step", "test-module")
require.NoError(t, err)
stepCtx := &model.StepContext{
Context: context.Background(),
Role: model.RoleBAP,
}
require.NoError(t, step.Run(stepCtx))
}
func TestInstrumentedStepError(t *testing.T) {
ctx := context.Background()
provider, err := NewProvider(ctx, &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
})
require.NoError(t, err)
defer provider.Shutdown(context.Background())
step, err := NewInstrumentedStep(stubStep{err: errors.New("boom")}, "test-step", "test-module")
require.NoError(t, err)
stepCtx := &model.StepContext{
Context: context.Background(),
Role: model.RoleBAP,
}
require.Error(t, step.Run(stepCtx))
}

110
pkg/telemetry/telemetry.go Normal file
View File

@@ -0,0 +1,110 @@
package telemetry
import (
"context"
"fmt"
"net/http"
clientprom "github.com/prometheus/client_golang/prometheus"
clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"github.com/beckn-one/beckn-onix/pkg/log"
)
// Config represents OpenTelemetry related configuration.
type Config struct {
ServiceName string `yaml:"serviceName"`
ServiceVersion string `yaml:"serviceVersion"`
EnableMetrics bool `yaml:"enableMetrics"`
Environment string `yaml:"environment"`
}
// Provider holds references to telemetry components that need coordinated shutdown.
type Provider struct {
MeterProvider *metric.MeterProvider
MetricsHandler http.Handler
Shutdown func(context.Context) error
}
// DefaultConfig returns sensible defaults for telemetry configuration.
func DefaultConfig() *Config {
return &Config{
ServiceName: "beckn-onix",
ServiceVersion: "dev",
EnableMetrics: true,
Environment: "development",
}
}
// NewProvider wires OpenTelemetry with a Prometheus exporter and exposes /metrics handler.
func NewProvider(ctx context.Context, cfg *Config) (*Provider, error) {
if cfg == nil {
cfg = DefaultConfig()
}
if cfg.ServiceName == "" {
cfg.ServiceName = DefaultConfig().ServiceName
}
if cfg.ServiceVersion == "" {
cfg.ServiceVersion = DefaultConfig().ServiceVersion
}
if cfg.Environment == "" {
cfg.Environment = DefaultConfig().Environment
}
if !cfg.EnableMetrics {
log.Info(ctx, "OpenTelemetry metrics disabled")
return &Provider{
Shutdown: func(context.Context) error { return nil },
}, nil
}
res, err := resource.New(
ctx,
resource.WithAttributes(
attribute.String("service.name", cfg.ServiceName),
attribute.String("service.version", cfg.ServiceVersion),
attribute.String("deployment.environment", cfg.Environment),
),
)
if err != nil {
return nil, fmt.Errorf("failed to create telemetry resource: %w", err)
}
registry := clientprom.NewRegistry()
exporter, err := otelprom.New(
otelprom.WithRegisterer(registry),
otelprom.WithoutUnits(),
otelprom.WithoutScopeInfo(),
)
if err != nil {
return nil, fmt.Errorf("failed to create prometheus exporter: %w", err)
}
meterProvider := metric.NewMeterProvider(
metric.WithReader(exporter),
metric.WithResource(res),
)
otel.SetMeterProvider(meterProvider)
log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s",
cfg.ServiceName, cfg.ServiceVersion, cfg.Environment)
if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(0)); err != nil {
log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err)
}
return &Provider{
MeterProvider: meterProvider,
MetricsHandler: clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}),
Shutdown: func(ctx context.Context) error {
return meterProvider.Shutdown(ctx)
},
}, nil
}