make changes as per the doc
This commit is contained in:
@@ -1,24 +0,0 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
||||
)
|
||||
|
||||
// HTTPMiddleware wraps an HTTP handler with OpenTelemetry instrumentation.
|
||||
func HTTPMiddleware(handler http.Handler, operation string) http.Handler {
|
||||
if !IsEnabled() {
|
||||
return handler
|
||||
}
|
||||
|
||||
return otelhttp.NewHandler(
|
||||
handler,
|
||||
operation,
|
||||
)
|
||||
}
|
||||
|
||||
// HTTPHandler wraps an HTTP handler function with OpenTelemetry instrumentation.
|
||||
func HTTPHandler(handler http.HandlerFunc, operation string) http.Handler {
|
||||
return HTTPMiddleware(handler, operation)
|
||||
}
|
||||
@@ -1,186 +0,0 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
|
||||
otelmetric "go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
)
|
||||
|
||||
var (
|
||||
mp *metric.MeterProvider
|
||||
meter otelmetric.Meter
|
||||
prometheusRegistry *prometheus.Registry
|
||||
once sync.Once
|
||||
shutdownFunc func(context.Context) error
|
||||
ErrInvalidExporter = errors.New("invalid metrics exporter type")
|
||||
ErrMetricsNotInit = errors.New("metrics not initialized")
|
||||
)
|
||||
|
||||
// ExporterType represents the type of metrics exporter.
|
||||
type ExporterType string
|
||||
|
||||
const (
|
||||
// ExporterPrometheus exports metrics in Prometheus format.
|
||||
ExporterPrometheus ExporterType = "prometheus"
|
||||
)
|
||||
|
||||
// Config represents the configuration for metrics.
|
||||
type Config struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
ExporterType ExporterType `yaml:"exporterType"`
|
||||
ServiceName string `yaml:"serviceName"`
|
||||
ServiceVersion string `yaml:"serviceVersion"`
|
||||
Prometheus PrometheusConfig `yaml:"prometheus"`
|
||||
}
|
||||
|
||||
// PrometheusConfig represents Prometheus exporter configuration.
|
||||
type PrometheusConfig struct {
|
||||
Port string `yaml:"port"`
|
||||
Path string `yaml:"path"`
|
||||
}
|
||||
|
||||
// validate validates the metrics configuration.
|
||||
func (c *Config) validate() error {
|
||||
if !c.Enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
if c.ExporterType != ExporterPrometheus {
|
||||
return fmt.Errorf("%w: %s", ErrInvalidExporter, c.ExporterType)
|
||||
}
|
||||
|
||||
if c.ServiceName == "" {
|
||||
c.ServiceName = "beckn-onix"
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// InitMetrics initializes the OpenTelemetry metrics SDK.
|
||||
func InitMetrics(cfg Config) error {
|
||||
if !cfg.Enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
var initErr error
|
||||
once.Do(func() {
|
||||
if initErr = cfg.validate(); initErr != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Create resource with service information.
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("service.name", cfg.ServiceName),
|
||||
}
|
||||
if cfg.ServiceVersion != "" {
|
||||
attrs = append(attrs, attribute.String("service.version", cfg.ServiceVersion))
|
||||
}
|
||||
res, err := resource.New(
|
||||
context.Background(),
|
||||
resource.WithAttributes(attrs...),
|
||||
)
|
||||
if err != nil {
|
||||
initErr = fmt.Errorf("failed to create resource: %w", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Always create Prometheus exporter for /metrics endpoint
|
||||
// Create a custom registry for the exporter so we can use it for HTTP serving
|
||||
promRegistry := prometheus.NewRegistry()
|
||||
promExporter, err := otelprom.New(otelprom.WithRegisterer(promRegistry))
|
||||
if err != nil {
|
||||
initErr = fmt.Errorf("failed to create Prometheus exporter: %w", err)
|
||||
return
|
||||
}
|
||||
prometheusRegistry = promRegistry
|
||||
|
||||
// Create readers based on configuration.
|
||||
var readers []metric.Reader
|
||||
|
||||
// Always add Prometheus reader for /metrics endpoint
|
||||
readers = append(readers, promExporter)
|
||||
|
||||
// Create meter provider with all readers
|
||||
opts := []metric.Option{
|
||||
metric.WithResource(res),
|
||||
}
|
||||
for _, reader := range readers {
|
||||
opts = append(opts, metric.WithReader(reader))
|
||||
}
|
||||
mp = metric.NewMeterProvider(opts...)
|
||||
|
||||
// Set global meter provider.
|
||||
otel.SetMeterProvider(mp)
|
||||
|
||||
// Create meter for this package.
|
||||
meter = mp.Meter("github.com/beckn-one/beckn-onix")
|
||||
|
||||
// Store shutdown function.
|
||||
shutdownFunc = func(ctx context.Context) error {
|
||||
return mp.Shutdown(ctx)
|
||||
}
|
||||
})
|
||||
|
||||
return initErr
|
||||
}
|
||||
|
||||
// GetMeter returns the global meter instance.
|
||||
func GetMeter() otelmetric.Meter {
|
||||
if meter == nil {
|
||||
// Return a no-op meter if not initialized.
|
||||
return otel.Meter("noop")
|
||||
}
|
||||
return meter
|
||||
}
|
||||
|
||||
// Shutdown gracefully shuts down the metrics provider.
|
||||
func Shutdown(ctx context.Context) error {
|
||||
if shutdownFunc == nil {
|
||||
return nil
|
||||
}
|
||||
return shutdownFunc(ctx)
|
||||
}
|
||||
|
||||
// IsEnabled returns whether metrics are enabled.
|
||||
func IsEnabled() bool {
|
||||
return mp != nil
|
||||
}
|
||||
|
||||
// MetricsHandler returns the HTTP handler for the /metrics endpoint.
|
||||
// Returns nil if metrics are not enabled.
|
||||
func MetricsHandler() http.Handler {
|
||||
if prometheusRegistry == nil {
|
||||
return nil
|
||||
}
|
||||
// Use promhttp to serve the Prometheus registry
|
||||
return promhttp.HandlerFor(prometheusRegistry, promhttp.HandlerOpts{})
|
||||
}
|
||||
|
||||
// InitAllMetrics initializes all metrics subsystems.
|
||||
// This includes request metrics and runtime metrics.
|
||||
// Returns an error if any initialization fails.
|
||||
func InitAllMetrics() error {
|
||||
if !IsEnabled() {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := InitRequestMetrics(); err != nil {
|
||||
return fmt.Errorf("failed to initialize request metrics: %w", err)
|
||||
}
|
||||
if err := InitRuntimeMetrics(); err != nil {
|
||||
return fmt.Errorf("failed to initialize runtime metrics: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
otelmetric "go.opentelemetry.io/otel/metric"
|
||||
)
|
||||
|
||||
var (
|
||||
// Inbound request metrics
|
||||
inboundRequestsTotal otelmetric.Int64Counter
|
||||
inboundSignValidationTotal otelmetric.Int64Counter
|
||||
inboundSchemaValidationTotal otelmetric.Int64Counter
|
||||
|
||||
// Outbound request metrics
|
||||
outboundRequestsTotal otelmetric.Int64Counter
|
||||
outboundRequests2XX otelmetric.Int64Counter
|
||||
outboundRequests4XX otelmetric.Int64Counter
|
||||
outboundRequests5XX otelmetric.Int64Counter
|
||||
outboundRequestDuration otelmetric.Float64Histogram
|
||||
)
|
||||
|
||||
// InitRequestMetrics initializes request-related metrics instruments.
|
||||
func InitRequestMetrics() error {
|
||||
if !IsEnabled() {
|
||||
return nil
|
||||
}
|
||||
|
||||
meter := GetMeter()
|
||||
var err error
|
||||
|
||||
// Inbound request metrics
|
||||
inboundRequestsTotal, err = meter.Int64Counter(
|
||||
"beckn.inbound.requests.total",
|
||||
otelmetric.WithDescription("Total number of inbound requests per host"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
inboundSignValidationTotal, err = meter.Int64Counter(
|
||||
"beckn.inbound.sign_validation.total",
|
||||
otelmetric.WithDescription("Total number of inbound requests with sign validation per host"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
inboundSchemaValidationTotal, err = meter.Int64Counter(
|
||||
"beckn.inbound.schema_validation.total",
|
||||
otelmetric.WithDescription("Total number of inbound requests with schema validation per host"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Outbound request metrics
|
||||
outboundRequestsTotal, err = meter.Int64Counter(
|
||||
"beckn.outbound.requests.total",
|
||||
otelmetric.WithDescription("Total number of outbound requests per host"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
outboundRequests2XX, err = meter.Int64Counter(
|
||||
"beckn.outbound.requests.2xx",
|
||||
otelmetric.WithDescription("Total number of outbound requests with 2XX status code per host"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
outboundRequests4XX, err = meter.Int64Counter(
|
||||
"beckn.outbound.requests.4xx",
|
||||
otelmetric.WithDescription("Total number of outbound requests with 4XX status code per host"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
outboundRequests5XX, err = meter.Int64Counter(
|
||||
"beckn.outbound.requests.5xx",
|
||||
otelmetric.WithDescription("Total number of outbound requests with 5XX status code per host"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Outbound request duration histogram (for p99, p95, p75)
|
||||
outboundRequestDuration, err = meter.Float64Histogram(
|
||||
"beckn.outbound.request.duration",
|
||||
otelmetric.WithDescription("Duration of outbound requests in milliseconds"),
|
||||
otelmetric.WithUnit("ms"),
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RecordInboundRequest records an inbound request.
|
||||
func RecordInboundRequest(ctx context.Context, host string) {
|
||||
if inboundRequestsTotal == nil {
|
||||
return
|
||||
}
|
||||
inboundRequestsTotal.Add(ctx, 1, otelmetric.WithAttributes(
|
||||
attribute.String("host", host),
|
||||
))
|
||||
}
|
||||
|
||||
// RecordInboundSignValidation records an inbound request with sign validation.
|
||||
func RecordInboundSignValidation(ctx context.Context, host string) {
|
||||
if inboundSignValidationTotal == nil {
|
||||
return
|
||||
}
|
||||
inboundSignValidationTotal.Add(ctx, 1, otelmetric.WithAttributes(
|
||||
attribute.String("host", host),
|
||||
))
|
||||
}
|
||||
|
||||
// RecordInboundSchemaValidation records an inbound request with schema validation.
|
||||
func RecordInboundSchemaValidation(ctx context.Context, host string) {
|
||||
if inboundSchemaValidationTotal == nil {
|
||||
return
|
||||
}
|
||||
inboundSchemaValidationTotal.Add(ctx, 1, otelmetric.WithAttributes(
|
||||
attribute.String("host", host),
|
||||
))
|
||||
}
|
||||
|
||||
// RecordOutboundRequest records an outbound request with status code and duration.
|
||||
func RecordOutboundRequest(ctx context.Context, host string, statusCode int, duration time.Duration) {
|
||||
if outboundRequestsTotal == nil {
|
||||
return
|
||||
}
|
||||
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("host", host),
|
||||
attribute.String("status_code", strconv.Itoa(statusCode)),
|
||||
}
|
||||
|
||||
// Record total
|
||||
outboundRequestsTotal.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
|
||||
|
||||
// Record by status code category
|
||||
statusClass := statusCode / 100
|
||||
switch statusClass {
|
||||
case 2:
|
||||
outboundRequests2XX.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
|
||||
case 4:
|
||||
outboundRequests4XX.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
|
||||
case 5:
|
||||
outboundRequests5XX.Add(ctx, 1, otelmetric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
// Record duration for percentile calculations (p99, p95, p75)
|
||||
if outboundRequestDuration != nil {
|
||||
outboundRequestDuration.Record(ctx, float64(duration.Milliseconds()), otelmetric.WithAttributes(attrs...))
|
||||
}
|
||||
}
|
||||
|
||||
// HTTPTransport wraps an http.RoundTripper to track outbound request metrics.
|
||||
type HTTPTransport struct {
|
||||
Transport http.RoundTripper
|
||||
}
|
||||
|
||||
// RoundTrip implements http.RoundTripper interface and tracks metrics.
|
||||
func (t *HTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
start := time.Now()
|
||||
host := req.URL.Host
|
||||
|
||||
resp, err := t.Transport.RoundTrip(req)
|
||||
|
||||
duration := time.Since(start)
|
||||
statusCode := 0
|
||||
if resp != nil {
|
||||
statusCode = resp.StatusCode
|
||||
} else if err != nil {
|
||||
// Network error - treat as 5XX
|
||||
statusCode = 500
|
||||
}
|
||||
|
||||
RecordOutboundRequest(req.Context(), host, statusCode, duration)
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// WrapHTTPTransport wraps an http.RoundTripper with metrics tracking.
|
||||
func WrapHTTPTransport(transport http.RoundTripper) http.RoundTripper {
|
||||
if !IsEnabled() {
|
||||
return transport
|
||||
}
|
||||
return &HTTPTransport{Transport: transport}
|
||||
}
|
||||
@@ -1,346 +0,0 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestInitRequestMetrics(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
enabled bool
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
name: "metrics enabled",
|
||||
enabled: true,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
name: "metrics disabled",
|
||||
enabled: false,
|
||||
wantError: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Setup: Initialize metrics with enabled state
|
||||
cfg := Config{
|
||||
Enabled: tt.enabled,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test InitRequestMetrics
|
||||
err = InitRequestMetrics()
|
||||
if tt.wantError {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
Shutdown(context.Background())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordInboundRequest(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
err = InitRequestMetrics()
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx := context.Background()
|
||||
host := "example.com"
|
||||
|
||||
// Test: Record inbound request
|
||||
RecordInboundRequest(ctx, host)
|
||||
|
||||
// Verify: No error should occur
|
||||
// Note: We can't easily verify the metric value without exporting,
|
||||
// but we can verify the function doesn't panic
|
||||
assert.NotPanics(t, func() {
|
||||
RecordInboundRequest(ctx, host)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRecordInboundSignValidation(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
err = InitRequestMetrics()
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx := context.Background()
|
||||
host := "example.com"
|
||||
|
||||
// Test: Record sign validation
|
||||
RecordInboundSignValidation(ctx, host)
|
||||
|
||||
// Verify: No error should occur
|
||||
assert.NotPanics(t, func() {
|
||||
RecordInboundSignValidation(ctx, host)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRecordInboundSchemaValidation(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
err = InitRequestMetrics()
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx := context.Background()
|
||||
host := "example.com"
|
||||
|
||||
// Test: Record schema validation
|
||||
RecordInboundSchemaValidation(ctx, host)
|
||||
|
||||
// Verify: No error should occur
|
||||
assert.NotPanics(t, func() {
|
||||
RecordInboundSchemaValidation(ctx, host)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRecordOutboundRequest(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
err = InitRequestMetrics()
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx := context.Background()
|
||||
host := "example.com"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
statusCode int
|
||||
duration time.Duration
|
||||
}{
|
||||
{
|
||||
name: "2XX status code",
|
||||
statusCode: 200,
|
||||
duration: 100 * time.Millisecond,
|
||||
},
|
||||
{
|
||||
name: "4XX status code",
|
||||
statusCode: 404,
|
||||
duration: 50 * time.Millisecond,
|
||||
},
|
||||
{
|
||||
name: "5XX status code",
|
||||
statusCode: 500,
|
||||
duration: 200 * time.Millisecond,
|
||||
},
|
||||
{
|
||||
name: "3XX status code",
|
||||
statusCode: 301,
|
||||
duration: 75 * time.Millisecond,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Test: Record outbound request
|
||||
RecordOutboundRequest(ctx, host, tt.statusCode, tt.duration)
|
||||
|
||||
// Verify: No error should occur
|
||||
assert.NotPanics(t, func() {
|
||||
RecordOutboundRequest(ctx, host, tt.statusCode, tt.duration)
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPTransport_RoundTrip(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
err = InitRequestMetrics()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create a test server
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("OK"))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
// Create transport wrapper
|
||||
transport := &HTTPTransport{
|
||||
Transport: http.DefaultTransport,
|
||||
}
|
||||
|
||||
// Create request
|
||||
req, err := http.NewRequest("GET", server.URL, nil)
|
||||
require.NoError(t, err)
|
||||
req = req.WithContext(context.Background())
|
||||
|
||||
// Test: RoundTrip should track metrics
|
||||
resp, err := transport.RoundTrip(req)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp)
|
||||
assert.Equal(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
// Verify: Metrics should be recorded
|
||||
assert.NotPanics(t, func() {
|
||||
resp, err = transport.RoundTrip(req)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, resp)
|
||||
})
|
||||
}
|
||||
|
||||
func TestHTTPTransport_RoundTrip_Error(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
err = InitRequestMetrics()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create transport with invalid URL to cause error
|
||||
transport := &HTTPTransport{
|
||||
Transport: http.DefaultTransport,
|
||||
}
|
||||
|
||||
// Create request with invalid URL
|
||||
req, err := http.NewRequest("GET", "http://invalid-host-that-does-not-exist:9999", nil)
|
||||
require.NoError(t, err)
|
||||
req = req.WithContext(context.Background())
|
||||
|
||||
// Test: RoundTrip should handle error and still record metrics
|
||||
resp, err := transport.RoundTrip(req)
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, resp)
|
||||
|
||||
// Verify: Metrics should still be recorded (with 500 status)
|
||||
assert.NotPanics(t, func() {
|
||||
_, _ = transport.RoundTrip(req)
|
||||
})
|
||||
}
|
||||
|
||||
func TestWrapHTTPTransport_Enabled(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
// Create a new transport
|
||||
transport := http.DefaultTransport.(*http.Transport).Clone()
|
||||
|
||||
// Test: Wrap transport
|
||||
wrapped := WrapHTTPTransport(transport)
|
||||
|
||||
// Verify: Should be wrapped
|
||||
assert.NotEqual(t, transport, wrapped)
|
||||
_, ok := wrapped.(*HTTPTransport)
|
||||
assert.True(t, ok, "Should be wrapped with HTTPTransport")
|
||||
}
|
||||
|
||||
func TestWrapHTTPTransport_Disabled(t *testing.T) {
|
||||
// Setup: Initialize metrics with disabled state
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
// Create a new transport
|
||||
transport := http.DefaultTransport.(*http.Transport).Clone()
|
||||
|
||||
// Test: Wrap transport when metrics disabled
|
||||
wrapped := WrapHTTPTransport(transport)
|
||||
|
||||
// Verify: When metrics are disabled, IsEnabled() returns false
|
||||
// So WrapHTTPTransport should return the original transport
|
||||
// Note: This test verifies the behavior when IsEnabled() returns false
|
||||
if !IsEnabled() {
|
||||
assert.Equal(t, transport, wrapped, "Should return original transport when metrics disabled")
|
||||
} else {
|
||||
// If metrics are still enabled from previous test, just verify it doesn't panic
|
||||
assert.NotNil(t, wrapped)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordInboundRequest_WhenDisabled(t *testing.T) {
|
||||
// Setup: Metrics disabled
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
ctx := context.Background()
|
||||
host := "example.com"
|
||||
|
||||
// Test: Should not panic when metrics are disabled
|
||||
assert.NotPanics(t, func() {
|
||||
RecordInboundRequest(ctx, host)
|
||||
RecordInboundSignValidation(ctx, host)
|
||||
RecordInboundSchemaValidation(ctx, host)
|
||||
RecordOutboundRequest(ctx, host, 200, time.Second)
|
||||
})
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
otelruntime "go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
)
|
||||
|
||||
// InitRuntimeMetrics initializes Go runtime metrics instrumentation.
|
||||
// This includes CPU, memory, GC, and goroutine metrics.
|
||||
// The runtime instrumentation automatically collects:
|
||||
// - CPU usage (go_cpu_*)
|
||||
// - Memory allocation and heap stats (go_memstats_*)
|
||||
// - GC statistics (go_memstats_gc_*)
|
||||
// - Goroutine count (go_goroutines)
|
||||
func InitRuntimeMetrics() error {
|
||||
if !IsEnabled() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Start OpenTelemetry runtime metrics collection
|
||||
// This automatically collects Go runtime metrics
|
||||
err := otelruntime.Start(otelruntime.WithMinimumReadMemStatsInterval(0))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestInitRuntimeMetrics(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
enabled bool
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
name: "metrics enabled",
|
||||
enabled: true,
|
||||
wantError: false,
|
||||
},
|
||||
{
|
||||
name: "metrics disabled",
|
||||
enabled: false,
|
||||
wantError: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Setup: Initialize metrics with enabled state
|
||||
cfg := Config{
|
||||
Enabled: tt.enabled,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test InitRuntimeMetrics
|
||||
err = InitRuntimeMetrics()
|
||||
if tt.wantError {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
Shutdown(context.Background())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitRuntimeMetrics_MultipleCalls(t *testing.T) {
|
||||
// Setup
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
// Test: Multiple calls should not cause errors
|
||||
err = InitRuntimeMetrics()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Note: Second call might fail if runtime.Start is already called,
|
||||
// but that's expected behavior
|
||||
err = InitRuntimeMetrics()
|
||||
// We don't assert on error here as it depends on internal state
|
||||
_ = err
|
||||
}
|
||||
|
||||
func TestInitRuntimeMetrics_WhenDisabled(t *testing.T) {
|
||||
// Setup: Metrics disabled
|
||||
cfg := Config{
|
||||
Enabled: false,
|
||||
ExporterType: ExporterPrometheus,
|
||||
ServiceName: "test-service",
|
||||
}
|
||||
err := InitMetrics(cfg)
|
||||
require.NoError(t, err)
|
||||
defer Shutdown(context.Background())
|
||||
|
||||
// Test: Should return nil without error when disabled
|
||||
err = InitRuntimeMetrics()
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
55
pkg/plugin/implementation/cache/cache.go
vendored
55
pkg/plugin/implementation/cache/cache.go
vendored
@@ -7,7 +7,11 @@ import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/telemetry"
|
||||
"github.com/redis/go-redis/extra/redisotel/v9"
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
@@ -32,7 +36,8 @@ type Config struct {
|
||||
|
||||
// Cache wraps a Redis client to provide basic caching operations.
|
||||
type Cache struct {
|
||||
Client RedisClient
|
||||
Client RedisClient
|
||||
metrics *telemetry.Metrics
|
||||
}
|
||||
|
||||
// Error variables to describe common failure modes.
|
||||
@@ -92,26 +97,66 @@ func New(ctx context.Context, cfg *Config) (*Cache, func() error, error) {
|
||||
}
|
||||
}
|
||||
|
||||
metrics, _ := telemetry.GetMetrics(ctx)
|
||||
|
||||
log.Infof(ctx, "Cache connection to Redis established successfully")
|
||||
return &Cache{Client: client}, client.Close, nil
|
||||
return &Cache{Client: client, metrics: metrics}, client.Close, nil
|
||||
}
|
||||
|
||||
// Get retrieves the value for the specified key from Redis.
|
||||
func (c *Cache) Get(ctx context.Context, key string) (string, error) {
|
||||
return c.Client.Get(ctx, key).Result()
|
||||
result, err := c.Client.Get(ctx, key).Result()
|
||||
if c.metrics != nil {
|
||||
attrs := []attribute.KeyValue{
|
||||
telemetry.AttrOperation.String("get"),
|
||||
}
|
||||
switch {
|
||||
case err == redis.Nil:
|
||||
c.metrics.CacheMissesTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("miss"))...))
|
||||
case err != nil:
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("error"))...))
|
||||
default:
|
||||
c.metrics.CacheHitsTotal.Add(ctx, 1, metric.WithAttributes(attrs...))
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("hit"))...))
|
||||
}
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
|
||||
// Set stores the given key-value pair in Redis with the specified TTL (time to live).
|
||||
func (c *Cache) Set(ctx context.Context, key, value string, ttl time.Duration) error {
|
||||
return c.Client.Set(ctx, key, value, ttl).Err()
|
||||
err := c.Client.Set(ctx, key, value, ttl).Err()
|
||||
c.recordOperation(ctx, "set", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Delete removes the specified key from Redis.
|
||||
func (c *Cache) Delete(ctx context.Context, key string) error {
|
||||
return c.Client.Del(ctx, key).Err()
|
||||
err := c.Client.Del(ctx, key).Err()
|
||||
c.recordOperation(ctx, "delete", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Clear removes all keys in the currently selected Redis database.
|
||||
func (c *Cache) Clear(ctx context.Context) error {
|
||||
return c.Client.FlushDB(ctx).Err()
|
||||
}
|
||||
|
||||
func (c *Cache) recordOperation(ctx context.Context, op string, err error) {
|
||||
if c.metrics == nil {
|
||||
return
|
||||
}
|
||||
status := "success"
|
||||
if err != nil {
|
||||
status = "error"
|
||||
}
|
||||
c.metrics.CacheOperationsTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(
|
||||
telemetry.AttrOperation.String(op),
|
||||
telemetry.AttrStatus.String(status),
|
||||
))
|
||||
}
|
||||
|
||||
21
pkg/plugin/implementation/otelmetrics/cmd/plugin.go
Normal file
21
pkg/plugin/implementation/otelmetrics/cmd/plugin.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelmetrics"
|
||||
)
|
||||
|
||||
type middlewareProvider struct{}
|
||||
|
||||
func (middlewareProvider) New(ctx context.Context, cfg map[string]string) (func(http.Handler) http.Handler, error) {
|
||||
mw, err := otelmetrics.New(ctx, cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return mw.Handler, nil
|
||||
}
|
||||
|
||||
// Provider is exported for plugin loader.
|
||||
var Provider = middlewareProvider{}
|
||||
134
pkg/plugin/implementation/otelmetrics/otelmetrics.go
Normal file
134
pkg/plugin/implementation/otelmetrics/otelmetrics.go
Normal file
@@ -0,0 +1,134 @@
|
||||
package otelmetrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/telemetry"
|
||||
)
|
||||
|
||||
// Middleware instruments inbound HTTP handlers with OpenTelemetry metrics.
|
||||
type Middleware struct {
|
||||
metrics *telemetry.Metrics
|
||||
enabled bool
|
||||
}
|
||||
|
||||
// New constructs middleware based on plugin configuration.
|
||||
func New(ctx context.Context, cfg map[string]string) (*Middleware, error) {
|
||||
enabled := cfg["enabled"] != "false"
|
||||
|
||||
metrics, err := telemetry.GetMetrics(ctx)
|
||||
if err != nil {
|
||||
log.Warnf(ctx, "OpenTelemetry metrics unavailable: %v", err)
|
||||
}
|
||||
|
||||
return &Middleware{
|
||||
metrics: metrics,
|
||||
enabled: enabled,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Handler returns an http.Handler middleware compatible with plugin expectations.
|
||||
func (m *Middleware) Handler(next http.Handler) http.Handler {
|
||||
if !m.enabled || m.metrics == nil {
|
||||
return next
|
||||
}
|
||||
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
action := extractAction(r.URL.Path)
|
||||
module := r.Header.Get("X-Module-Name")
|
||||
role := r.Header.Get("X-Role")
|
||||
|
||||
attrs := []attribute.KeyValue{
|
||||
telemetry.AttrModule.String(module),
|
||||
telemetry.AttrRole.String(role),
|
||||
telemetry.AttrAction.String(action),
|
||||
telemetry.AttrHTTPMethod.String(r.Method),
|
||||
}
|
||||
|
||||
m.metrics.HTTPRequestsInFlight.Add(ctx, 1, metric.WithAttributes(attrs...))
|
||||
defer m.metrics.HTTPRequestsInFlight.Add(ctx, -1, metric.WithAttributes(attrs...))
|
||||
|
||||
if r.ContentLength > 0 {
|
||||
m.metrics.HTTPRequestSize.Record(ctx, r.ContentLength, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
|
||||
start := time.Now()
|
||||
next.ServeHTTP(rw, r)
|
||||
duration := time.Since(start).Seconds()
|
||||
|
||||
status := "success"
|
||||
if rw.statusCode >= 400 {
|
||||
status = "error"
|
||||
}
|
||||
|
||||
statusAttrs := append(attrs,
|
||||
telemetry.AttrHTTPStatus.Int(rw.statusCode),
|
||||
telemetry.AttrStatus.String(status),
|
||||
)
|
||||
|
||||
m.metrics.HTTPRequestsTotal.Add(ctx, 1, metric.WithAttributes(statusAttrs...))
|
||||
m.metrics.HTTPRequestDuration.Record(ctx, duration, metric.WithAttributes(statusAttrs...))
|
||||
if rw.bytesWritten > 0 {
|
||||
m.metrics.HTTPResponseSize.Record(ctx, int64(rw.bytesWritten), metric.WithAttributes(statusAttrs...))
|
||||
}
|
||||
|
||||
if isBecknAction(action) {
|
||||
m.metrics.BecknMessagesTotal.Add(ctx, 1,
|
||||
metric.WithAttributes(
|
||||
telemetry.AttrAction.String(action),
|
||||
telemetry.AttrRole.String(role),
|
||||
telemetry.AttrStatus.String(status),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
type responseWriter struct {
|
||||
http.ResponseWriter
|
||||
statusCode int
|
||||
bytesWritten int
|
||||
}
|
||||
|
||||
func (rw *responseWriter) WriteHeader(code int) {
|
||||
rw.statusCode = code
|
||||
rw.ResponseWriter.WriteHeader(code)
|
||||
}
|
||||
|
||||
func (rw *responseWriter) Write(b []byte) (int, error) {
|
||||
n, err := rw.ResponseWriter.Write(b)
|
||||
rw.bytesWritten += n
|
||||
return n, err
|
||||
}
|
||||
|
||||
func extractAction(path string) string {
|
||||
trimmed := strings.Trim(path, "/")
|
||||
if trimmed == "" {
|
||||
return "root"
|
||||
}
|
||||
parts := strings.Split(trimmed, "/")
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
|
||||
func isBecknAction(action string) bool {
|
||||
actions := []string{
|
||||
"discover", "select", "init", "confirm", "status", "track",
|
||||
"cancel", "update", "rating", "support",
|
||||
"on_discover", "on_select", "on_init", "on_confirm", "on_status",
|
||||
"on_track", "on_cancel", "on_update", "on_rating", "on_support",
|
||||
}
|
||||
for _, a := range actions {
|
||||
if a == action {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -681,4 +681,4 @@ func TestExcludeActionWithNonURLTargetTypes(t *testing.T) {
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
222
pkg/telemetry/metrics.go
Normal file
222
pkg/telemetry/metrics.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
)
|
||||
|
||||
// Metrics exposes strongly typed metric instruments used across the adapter.
|
||||
type Metrics struct {
|
||||
HTTPRequestsTotal metric.Int64Counter
|
||||
HTTPRequestDuration metric.Float64Histogram
|
||||
HTTPRequestsInFlight metric.Int64UpDownCounter
|
||||
HTTPRequestSize metric.Int64Histogram
|
||||
HTTPResponseSize metric.Int64Histogram
|
||||
|
||||
StepExecutionDuration metric.Float64Histogram
|
||||
StepExecutionTotal metric.Int64Counter
|
||||
StepErrorsTotal metric.Int64Counter
|
||||
|
||||
PluginExecutionDuration metric.Float64Histogram
|
||||
PluginErrorsTotal metric.Int64Counter
|
||||
|
||||
BecknMessagesTotal metric.Int64Counter
|
||||
SignatureValidationsTotal metric.Int64Counter
|
||||
SchemaValidationsTotal metric.Int64Counter
|
||||
CacheOperationsTotal metric.Int64Counter
|
||||
CacheHitsTotal metric.Int64Counter
|
||||
CacheMissesTotal metric.Int64Counter
|
||||
RoutingDecisionsTotal metric.Int64Counter
|
||||
}
|
||||
|
||||
var (
|
||||
metricsInstance *Metrics
|
||||
metricsOnce sync.Once
|
||||
metricsErr error
|
||||
)
|
||||
|
||||
// Attribute keys shared across instruments.
|
||||
var (
|
||||
AttrModule = attribute.Key("module")
|
||||
AttrSubsystem = attribute.Key("subsystem")
|
||||
AttrName = attribute.Key("name")
|
||||
AttrStep = attribute.Key("step")
|
||||
AttrRole = attribute.Key("role")
|
||||
AttrAction = attribute.Key("action")
|
||||
AttrHTTPMethod = attribute.Key("http_method")
|
||||
AttrHTTPStatus = attribute.Key("http_status_code")
|
||||
AttrStatus = attribute.Key("status")
|
||||
AttrErrorType = attribute.Key("error_type")
|
||||
AttrPluginID = attribute.Key("plugin_id")
|
||||
AttrPluginType = attribute.Key("plugin_type")
|
||||
AttrOperation = attribute.Key("operation")
|
||||
AttrRouteType = attribute.Key("route_type")
|
||||
AttrTargetType = attribute.Key("target_type")
|
||||
AttrSchemaVersion = attribute.Key("schema_version")
|
||||
)
|
||||
|
||||
// GetMetrics lazily initializes instruments and returns a cached reference.
|
||||
func GetMetrics(ctx context.Context) (*Metrics, error) {
|
||||
metricsOnce.Do(func() {
|
||||
metricsInstance, metricsErr = newMetrics()
|
||||
})
|
||||
return metricsInstance, metricsErr
|
||||
}
|
||||
|
||||
func newMetrics() (*Metrics, error) {
|
||||
meter := otel.GetMeterProvider().Meter(
|
||||
"github.com/beckn-one/beckn-onix/telemetry",
|
||||
metric.WithInstrumentationVersion("1.0.0"),
|
||||
)
|
||||
|
||||
m := &Metrics{}
|
||||
var err error
|
||||
|
||||
if m.HTTPRequestsTotal, err = meter.Int64Counter(
|
||||
"http_server_requests_total",
|
||||
metric.WithDescription("Total number of HTTP requests processed"),
|
||||
metric.WithUnit("{request}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_requests_total: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPRequestDuration, err = meter.Float64Histogram(
|
||||
"http_server_request_duration_seconds",
|
||||
metric.WithDescription("HTTP request duration in seconds"),
|
||||
metric.WithUnit("s"),
|
||||
metric.WithExplicitBucketBoundaries(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_request_duration_seconds: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPRequestsInFlight, err = meter.Int64UpDownCounter(
|
||||
"http_server_requests_in_flight",
|
||||
metric.WithDescription("Number of HTTP requests currently being processed"),
|
||||
metric.WithUnit("{request}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_requests_in_flight: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPRequestSize, err = meter.Int64Histogram(
|
||||
"http_server_request_size_bytes",
|
||||
metric.WithDescription("Size of HTTP request payloads"),
|
||||
metric.WithUnit("By"),
|
||||
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_request_size_bytes: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPResponseSize, err = meter.Int64Histogram(
|
||||
"http_server_response_size_bytes",
|
||||
metric.WithDescription("Size of HTTP responses"),
|
||||
metric.WithUnit("By"),
|
||||
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_response_size_bytes: %w", err)
|
||||
}
|
||||
|
||||
if m.StepExecutionDuration, err = meter.Float64Histogram(
|
||||
"onix_step_execution_duration_seconds",
|
||||
metric.WithDescription("Duration of individual processing steps"),
|
||||
metric.WithUnit("s"),
|
||||
metric.WithExplicitBucketBoundaries(0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_step_execution_duration_seconds: %w", err)
|
||||
}
|
||||
|
||||
if m.StepExecutionTotal, err = meter.Int64Counter(
|
||||
"onix_step_executions_total",
|
||||
metric.WithDescription("Total processing step executions"),
|
||||
metric.WithUnit("{execution}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_step_executions_total: %w", err)
|
||||
}
|
||||
|
||||
if m.StepErrorsTotal, err = meter.Int64Counter(
|
||||
"onix_step_errors_total",
|
||||
metric.WithDescription("Processing step errors"),
|
||||
metric.WithUnit("{error}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_step_errors_total: %w", err)
|
||||
}
|
||||
|
||||
if m.PluginExecutionDuration, err = meter.Float64Histogram(
|
||||
"onix_plugin_execution_duration_seconds",
|
||||
metric.WithDescription("Plugin execution time"),
|
||||
metric.WithUnit("s"),
|
||||
metric.WithExplicitBucketBoundaries(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_plugin_execution_duration_seconds: %w", err)
|
||||
}
|
||||
|
||||
if m.PluginErrorsTotal, err = meter.Int64Counter(
|
||||
"onix_plugin_errors_total",
|
||||
metric.WithDescription("Plugin level errors"),
|
||||
metric.WithUnit("{error}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_plugin_errors_total: %w", err)
|
||||
}
|
||||
|
||||
if m.BecknMessagesTotal, err = meter.Int64Counter(
|
||||
"beckn_messages_total",
|
||||
metric.WithDescription("Total Beckn protocol messages processed"),
|
||||
metric.WithUnit("{message}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("beckn_messages_total: %w", err)
|
||||
}
|
||||
|
||||
if m.SignatureValidationsTotal, err = meter.Int64Counter(
|
||||
"beckn_signature_validations_total",
|
||||
metric.WithDescription("Signature validation attempts"),
|
||||
metric.WithUnit("{validation}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("beckn_signature_validations_total: %w", err)
|
||||
}
|
||||
|
||||
if m.SchemaValidationsTotal, err = meter.Int64Counter(
|
||||
"beckn_schema_validations_total",
|
||||
metric.WithDescription("Schema validation attempts"),
|
||||
metric.WithUnit("{validation}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("beckn_schema_validations_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheOperationsTotal, err = meter.Int64Counter(
|
||||
"onix_cache_operations_total",
|
||||
metric.WithDescription("Redis cache operations"),
|
||||
metric.WithUnit("{operation}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_operations_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheHitsTotal, err = meter.Int64Counter(
|
||||
"onix_cache_hits_total",
|
||||
metric.WithDescription("Redis cache hits"),
|
||||
metric.WithUnit("{hit}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_hits_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheMissesTotal, err = meter.Int64Counter(
|
||||
"onix_cache_misses_total",
|
||||
metric.WithDescription("Redis cache misses"),
|
||||
metric.WithUnit("{miss}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_misses_total: %w", err)
|
||||
}
|
||||
|
||||
if m.RoutingDecisionsTotal, err = meter.Int64Counter(
|
||||
"onix_routing_decisions_total",
|
||||
metric.WithDescription("Routing decisions taken by handler"),
|
||||
metric.WithUnit("{decision}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_routing_decisions_total: %w", err)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
33
pkg/telemetry/metrics_test.go
Normal file
33
pkg/telemetry/metrics_test.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewProviderAndMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
provider, err := NewProvider(ctx, &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, provider)
|
||||
require.NotNil(t, provider.MetricsHandler)
|
||||
|
||||
metrics, err := GetMetrics(ctx)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, metrics)
|
||||
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest("GET", "/metrics", nil)
|
||||
provider.MetricsHandler.ServeHTTP(rec, req)
|
||||
require.Equal(t, 200, rec.Code)
|
||||
|
||||
require.NoError(t, provider.Shutdown(context.Background()))
|
||||
}
|
||||
78
pkg/telemetry/step_instrumentor.go
Normal file
78
pkg/telemetry/step_instrumentor.go
Normal file
@@ -0,0 +1,78 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/model"
|
||||
"github.com/beckn-one/beckn-onix/pkg/plugin/definition"
|
||||
)
|
||||
|
||||
// InstrumentedStep wraps a processing step with telemetry instrumentation.
|
||||
type InstrumentedStep struct {
|
||||
step definition.Step
|
||||
stepName string
|
||||
moduleName string
|
||||
metrics *Metrics
|
||||
}
|
||||
|
||||
// NewInstrumentedStep returns a telemetry enabled wrapper around a definition.Step.
|
||||
func NewInstrumentedStep(step definition.Step, stepName, moduleName string) (*InstrumentedStep, error) {
|
||||
metrics, err := GetMetrics(context.Background())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &InstrumentedStep{
|
||||
step: step,
|
||||
stepName: stepName,
|
||||
moduleName: moduleName,
|
||||
metrics: metrics,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type becknError interface {
|
||||
BecknError() *model.Error
|
||||
}
|
||||
|
||||
// Run executes the underlying step and records RED style metrics.
|
||||
func (is *InstrumentedStep) Run(ctx *model.StepContext) error {
|
||||
if is.metrics == nil {
|
||||
return is.step.Run(ctx)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err := is.step.Run(ctx)
|
||||
duration := time.Since(start).Seconds()
|
||||
|
||||
attrs := []attribute.KeyValue{
|
||||
AttrModule.String(is.moduleName),
|
||||
AttrStep.String(is.stepName),
|
||||
AttrRole.String(string(ctx.Role)),
|
||||
}
|
||||
|
||||
is.metrics.StepExecutionTotal.Add(ctx.Context, 1, metric.WithAttributes(attrs...))
|
||||
is.metrics.StepExecutionDuration.Record(ctx.Context, duration, metric.WithAttributes(attrs...))
|
||||
|
||||
if err != nil {
|
||||
errorType := fmt.Sprintf("%T", err)
|
||||
var becknErr becknError
|
||||
if errors.As(err, &becknErr) {
|
||||
if be := becknErr.BecknError(); be != nil && be.Code != "" {
|
||||
errorType = be.Code
|
||||
}
|
||||
}
|
||||
|
||||
errorAttrs := append(attrs, AttrErrorType.String(errorType))
|
||||
is.metrics.StepErrorsTotal.Add(ctx.Context, 1, metric.WithAttributes(errorAttrs...))
|
||||
log.Errorf(ctx.Context, err, "Step %s failed", is.stepName)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
60
pkg/telemetry/step_instrumentor_test.go
Normal file
60
pkg/telemetry/step_instrumentor_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/model"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type stubStep struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (s stubStep) Run(ctx *model.StepContext) error {
|
||||
return s.err
|
||||
}
|
||||
|
||||
func TestInstrumentedStepSuccess(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
provider, err := NewProvider(ctx, &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer provider.Shutdown(context.Background())
|
||||
|
||||
step, err := NewInstrumentedStep(stubStep{}, "test-step", "test-module")
|
||||
require.NoError(t, err)
|
||||
|
||||
stepCtx := &model.StepContext{
|
||||
Context: context.Background(),
|
||||
Role: model.RoleBAP,
|
||||
}
|
||||
require.NoError(t, step.Run(stepCtx))
|
||||
}
|
||||
|
||||
func TestInstrumentedStepError(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
provider, err := NewProvider(ctx, &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer provider.Shutdown(context.Background())
|
||||
|
||||
step, err := NewInstrumentedStep(stubStep{err: errors.New("boom")}, "test-step", "test-module")
|
||||
require.NoError(t, err)
|
||||
|
||||
stepCtx := &model.StepContext{
|
||||
Context: context.Background(),
|
||||
Role: model.RoleBAP,
|
||||
}
|
||||
require.Error(t, step.Run(stepCtx))
|
||||
}
|
||||
110
pkg/telemetry/telemetry.go
Normal file
110
pkg/telemetry/telemetry.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
clientprom "github.com/prometheus/client_golang/prometheus"
|
||||
clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
)
|
||||
|
||||
// Config represents OpenTelemetry related configuration.
|
||||
type Config struct {
|
||||
ServiceName string `yaml:"serviceName"`
|
||||
ServiceVersion string `yaml:"serviceVersion"`
|
||||
EnableMetrics bool `yaml:"enableMetrics"`
|
||||
Environment string `yaml:"environment"`
|
||||
}
|
||||
|
||||
// Provider holds references to telemetry components that need coordinated shutdown.
|
||||
type Provider struct {
|
||||
MeterProvider *metric.MeterProvider
|
||||
MetricsHandler http.Handler
|
||||
Shutdown func(context.Context) error
|
||||
}
|
||||
|
||||
// DefaultConfig returns sensible defaults for telemetry configuration.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
ServiceName: "beckn-onix",
|
||||
ServiceVersion: "dev",
|
||||
EnableMetrics: true,
|
||||
Environment: "development",
|
||||
}
|
||||
}
|
||||
|
||||
// NewProvider wires OpenTelemetry with a Prometheus exporter and exposes /metrics handler.
|
||||
func NewProvider(ctx context.Context, cfg *Config) (*Provider, error) {
|
||||
if cfg == nil {
|
||||
cfg = DefaultConfig()
|
||||
}
|
||||
if cfg.ServiceName == "" {
|
||||
cfg.ServiceName = DefaultConfig().ServiceName
|
||||
}
|
||||
if cfg.ServiceVersion == "" {
|
||||
cfg.ServiceVersion = DefaultConfig().ServiceVersion
|
||||
}
|
||||
if cfg.Environment == "" {
|
||||
cfg.Environment = DefaultConfig().Environment
|
||||
}
|
||||
|
||||
if !cfg.EnableMetrics {
|
||||
log.Info(ctx, "OpenTelemetry metrics disabled")
|
||||
return &Provider{
|
||||
Shutdown: func(context.Context) error { return nil },
|
||||
}, nil
|
||||
}
|
||||
|
||||
res, err := resource.New(
|
||||
ctx,
|
||||
resource.WithAttributes(
|
||||
attribute.String("service.name", cfg.ServiceName),
|
||||
attribute.String("service.version", cfg.ServiceVersion),
|
||||
attribute.String("deployment.environment", cfg.Environment),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create telemetry resource: %w", err)
|
||||
}
|
||||
|
||||
registry := clientprom.NewRegistry()
|
||||
|
||||
exporter, err := otelprom.New(
|
||||
otelprom.WithRegisterer(registry),
|
||||
otelprom.WithoutUnits(),
|
||||
otelprom.WithoutScopeInfo(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create prometheus exporter: %w", err)
|
||||
}
|
||||
|
||||
meterProvider := metric.NewMeterProvider(
|
||||
metric.WithReader(exporter),
|
||||
metric.WithResource(res),
|
||||
)
|
||||
|
||||
otel.SetMeterProvider(meterProvider)
|
||||
log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s",
|
||||
cfg.ServiceName, cfg.ServiceVersion, cfg.Environment)
|
||||
|
||||
if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(0)); err != nil {
|
||||
log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err)
|
||||
}
|
||||
|
||||
return &Provider{
|
||||
MeterProvider: meterProvider,
|
||||
MetricsHandler: clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}),
|
||||
Shutdown: func(ctx context.Context) error {
|
||||
return meterProvider.Shutdown(ctx)
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user