make changes as per the doc

This commit is contained in:
Manendra Pal Singh
2025-11-21 16:01:59 +05:30
parent bf2c132ab3
commit 8ef4904076
28 changed files with 937 additions and 1062 deletions

222
pkg/telemetry/metrics.go Normal file
View File

@@ -0,0 +1,222 @@
package telemetry
import (
"context"
"fmt"
"sync"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
// Metrics exposes strongly typed metric instruments used across the adapter.
type Metrics struct {
HTTPRequestsTotal metric.Int64Counter
HTTPRequestDuration metric.Float64Histogram
HTTPRequestsInFlight metric.Int64UpDownCounter
HTTPRequestSize metric.Int64Histogram
HTTPResponseSize metric.Int64Histogram
StepExecutionDuration metric.Float64Histogram
StepExecutionTotal metric.Int64Counter
StepErrorsTotal metric.Int64Counter
PluginExecutionDuration metric.Float64Histogram
PluginErrorsTotal metric.Int64Counter
BecknMessagesTotal metric.Int64Counter
SignatureValidationsTotal metric.Int64Counter
SchemaValidationsTotal metric.Int64Counter
CacheOperationsTotal metric.Int64Counter
CacheHitsTotal metric.Int64Counter
CacheMissesTotal metric.Int64Counter
RoutingDecisionsTotal metric.Int64Counter
}
var (
metricsInstance *Metrics
metricsOnce sync.Once
metricsErr error
)
// Attribute keys shared across instruments.
var (
AttrModule = attribute.Key("module")
AttrSubsystem = attribute.Key("subsystem")
AttrName = attribute.Key("name")
AttrStep = attribute.Key("step")
AttrRole = attribute.Key("role")
AttrAction = attribute.Key("action")
AttrHTTPMethod = attribute.Key("http_method")
AttrHTTPStatus = attribute.Key("http_status_code")
AttrStatus = attribute.Key("status")
AttrErrorType = attribute.Key("error_type")
AttrPluginID = attribute.Key("plugin_id")
AttrPluginType = attribute.Key("plugin_type")
AttrOperation = attribute.Key("operation")
AttrRouteType = attribute.Key("route_type")
AttrTargetType = attribute.Key("target_type")
AttrSchemaVersion = attribute.Key("schema_version")
)
// GetMetrics lazily initializes instruments and returns a cached reference.
func GetMetrics(ctx context.Context) (*Metrics, error) {
metricsOnce.Do(func() {
metricsInstance, metricsErr = newMetrics()
})
return metricsInstance, metricsErr
}
func newMetrics() (*Metrics, error) {
meter := otel.GetMeterProvider().Meter(
"github.com/beckn-one/beckn-onix/telemetry",
metric.WithInstrumentationVersion("1.0.0"),
)
m := &Metrics{}
var err error
if m.HTTPRequestsTotal, err = meter.Int64Counter(
"http_server_requests_total",
metric.WithDescription("Total number of HTTP requests processed"),
metric.WithUnit("{request}"),
); err != nil {
return nil, fmt.Errorf("http_server_requests_total: %w", err)
}
if m.HTTPRequestDuration, err = meter.Float64Histogram(
"http_server_request_duration_seconds",
metric.WithDescription("HTTP request duration in seconds"),
metric.WithUnit("s"),
metric.WithExplicitBucketBoundaries(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10),
); err != nil {
return nil, fmt.Errorf("http_server_request_duration_seconds: %w", err)
}
if m.HTTPRequestsInFlight, err = meter.Int64UpDownCounter(
"http_server_requests_in_flight",
metric.WithDescription("Number of HTTP requests currently being processed"),
metric.WithUnit("{request}"),
); err != nil {
return nil, fmt.Errorf("http_server_requests_in_flight: %w", err)
}
if m.HTTPRequestSize, err = meter.Int64Histogram(
"http_server_request_size_bytes",
metric.WithDescription("Size of HTTP request payloads"),
metric.WithUnit("By"),
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
); err != nil {
return nil, fmt.Errorf("http_server_request_size_bytes: %w", err)
}
if m.HTTPResponseSize, err = meter.Int64Histogram(
"http_server_response_size_bytes",
metric.WithDescription("Size of HTTP responses"),
metric.WithUnit("By"),
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
); err != nil {
return nil, fmt.Errorf("http_server_response_size_bytes: %w", err)
}
if m.StepExecutionDuration, err = meter.Float64Histogram(
"onix_step_execution_duration_seconds",
metric.WithDescription("Duration of individual processing steps"),
metric.WithUnit("s"),
metric.WithExplicitBucketBoundaries(0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5),
); err != nil {
return nil, fmt.Errorf("onix_step_execution_duration_seconds: %w", err)
}
if m.StepExecutionTotal, err = meter.Int64Counter(
"onix_step_executions_total",
metric.WithDescription("Total processing step executions"),
metric.WithUnit("{execution}"),
); err != nil {
return nil, fmt.Errorf("onix_step_executions_total: %w", err)
}
if m.StepErrorsTotal, err = meter.Int64Counter(
"onix_step_errors_total",
metric.WithDescription("Processing step errors"),
metric.WithUnit("{error}"),
); err != nil {
return nil, fmt.Errorf("onix_step_errors_total: %w", err)
}
if m.PluginExecutionDuration, err = meter.Float64Histogram(
"onix_plugin_execution_duration_seconds",
metric.WithDescription("Plugin execution time"),
metric.WithUnit("s"),
metric.WithExplicitBucketBoundaries(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1),
); err != nil {
return nil, fmt.Errorf("onix_plugin_execution_duration_seconds: %w", err)
}
if m.PluginErrorsTotal, err = meter.Int64Counter(
"onix_plugin_errors_total",
metric.WithDescription("Plugin level errors"),
metric.WithUnit("{error}"),
); err != nil {
return nil, fmt.Errorf("onix_plugin_errors_total: %w", err)
}
if m.BecknMessagesTotal, err = meter.Int64Counter(
"beckn_messages_total",
metric.WithDescription("Total Beckn protocol messages processed"),
metric.WithUnit("{message}"),
); err != nil {
return nil, fmt.Errorf("beckn_messages_total: %w", err)
}
if m.SignatureValidationsTotal, err = meter.Int64Counter(
"beckn_signature_validations_total",
metric.WithDescription("Signature validation attempts"),
metric.WithUnit("{validation}"),
); err != nil {
return nil, fmt.Errorf("beckn_signature_validations_total: %w", err)
}
if m.SchemaValidationsTotal, err = meter.Int64Counter(
"beckn_schema_validations_total",
metric.WithDescription("Schema validation attempts"),
metric.WithUnit("{validation}"),
); err != nil {
return nil, fmt.Errorf("beckn_schema_validations_total: %w", err)
}
if m.CacheOperationsTotal, err = meter.Int64Counter(
"onix_cache_operations_total",
metric.WithDescription("Redis cache operations"),
metric.WithUnit("{operation}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_operations_total: %w", err)
}
if m.CacheHitsTotal, err = meter.Int64Counter(
"onix_cache_hits_total",
metric.WithDescription("Redis cache hits"),
metric.WithUnit("{hit}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_hits_total: %w", err)
}
if m.CacheMissesTotal, err = meter.Int64Counter(
"onix_cache_misses_total",
metric.WithDescription("Redis cache misses"),
metric.WithUnit("{miss}"),
); err != nil {
return nil, fmt.Errorf("onix_cache_misses_total: %w", err)
}
if m.RoutingDecisionsTotal, err = meter.Int64Counter(
"onix_routing_decisions_total",
metric.WithDescription("Routing decisions taken by handler"),
metric.WithUnit("{decision}"),
); err != nil {
return nil, fmt.Errorf("onix_routing_decisions_total: %w", err)
}
return m, nil
}

View File

@@ -0,0 +1,33 @@
package telemetry
import (
"context"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/require"
)
func TestNewProviderAndMetrics(t *testing.T) {
ctx := context.Background()
provider, err := NewProvider(ctx, &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
})
require.NoError(t, err)
require.NotNil(t, provider)
require.NotNil(t, provider.MetricsHandler)
metrics, err := GetMetrics(ctx)
require.NoError(t, err)
require.NotNil(t, metrics)
rec := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
provider.MetricsHandler.ServeHTTP(rec, req)
require.Equal(t, 200, rec.Code)
require.NoError(t, provider.Shutdown(context.Background()))
}

View File

@@ -0,0 +1,78 @@
package telemetry
import (
"context"
"errors"
"fmt"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/model"
"github.com/beckn-one/beckn-onix/pkg/plugin/definition"
)
// InstrumentedStep wraps a processing step with telemetry instrumentation.
type InstrumentedStep struct {
step definition.Step
stepName string
moduleName string
metrics *Metrics
}
// NewInstrumentedStep returns a telemetry enabled wrapper around a definition.Step.
func NewInstrumentedStep(step definition.Step, stepName, moduleName string) (*InstrumentedStep, error) {
metrics, err := GetMetrics(context.Background())
if err != nil {
return nil, err
}
return &InstrumentedStep{
step: step,
stepName: stepName,
moduleName: moduleName,
metrics: metrics,
}, nil
}
type becknError interface {
BecknError() *model.Error
}
// Run executes the underlying step and records RED style metrics.
func (is *InstrumentedStep) Run(ctx *model.StepContext) error {
if is.metrics == nil {
return is.step.Run(ctx)
}
start := time.Now()
err := is.step.Run(ctx)
duration := time.Since(start).Seconds()
attrs := []attribute.KeyValue{
AttrModule.String(is.moduleName),
AttrStep.String(is.stepName),
AttrRole.String(string(ctx.Role)),
}
is.metrics.StepExecutionTotal.Add(ctx.Context, 1, metric.WithAttributes(attrs...))
is.metrics.StepExecutionDuration.Record(ctx.Context, duration, metric.WithAttributes(attrs...))
if err != nil {
errorType := fmt.Sprintf("%T", err)
var becknErr becknError
if errors.As(err, &becknErr) {
if be := becknErr.BecknError(); be != nil && be.Code != "" {
errorType = be.Code
}
}
errorAttrs := append(attrs, AttrErrorType.String(errorType))
is.metrics.StepErrorsTotal.Add(ctx.Context, 1, metric.WithAttributes(errorAttrs...))
log.Errorf(ctx.Context, err, "Step %s failed", is.stepName)
}
return err
}

View File

@@ -0,0 +1,60 @@
package telemetry
import (
"context"
"errors"
"testing"
"github.com/beckn-one/beckn-onix/pkg/model"
"github.com/stretchr/testify/require"
)
type stubStep struct {
err error
}
func (s stubStep) Run(ctx *model.StepContext) error {
return s.err
}
func TestInstrumentedStepSuccess(t *testing.T) {
ctx := context.Background()
provider, err := NewProvider(ctx, &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
})
require.NoError(t, err)
defer provider.Shutdown(context.Background())
step, err := NewInstrumentedStep(stubStep{}, "test-step", "test-module")
require.NoError(t, err)
stepCtx := &model.StepContext{
Context: context.Background(),
Role: model.RoleBAP,
}
require.NoError(t, step.Run(stepCtx))
}
func TestInstrumentedStepError(t *testing.T) {
ctx := context.Background()
provider, err := NewProvider(ctx, &Config{
ServiceName: "test-service",
ServiceVersion: "1.0.0",
EnableMetrics: true,
Environment: "test",
})
require.NoError(t, err)
defer provider.Shutdown(context.Background())
step, err := NewInstrumentedStep(stubStep{err: errors.New("boom")}, "test-step", "test-module")
require.NoError(t, err)
stepCtx := &model.StepContext{
Context: context.Background(),
Role: model.RoleBAP,
}
require.Error(t, step.Run(stepCtx))
}

110
pkg/telemetry/telemetry.go Normal file
View File

@@ -0,0 +1,110 @@
package telemetry
import (
"context"
"fmt"
"net/http"
clientprom "github.com/prometheus/client_golang/prometheus"
clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"github.com/beckn-one/beckn-onix/pkg/log"
)
// Config represents OpenTelemetry related configuration.
type Config struct {
ServiceName string `yaml:"serviceName"`
ServiceVersion string `yaml:"serviceVersion"`
EnableMetrics bool `yaml:"enableMetrics"`
Environment string `yaml:"environment"`
}
// Provider holds references to telemetry components that need coordinated shutdown.
type Provider struct {
MeterProvider *metric.MeterProvider
MetricsHandler http.Handler
Shutdown func(context.Context) error
}
// DefaultConfig returns sensible defaults for telemetry configuration.
func DefaultConfig() *Config {
return &Config{
ServiceName: "beckn-onix",
ServiceVersion: "dev",
EnableMetrics: true,
Environment: "development",
}
}
// NewProvider wires OpenTelemetry with a Prometheus exporter and exposes /metrics handler.
func NewProvider(ctx context.Context, cfg *Config) (*Provider, error) {
if cfg == nil {
cfg = DefaultConfig()
}
if cfg.ServiceName == "" {
cfg.ServiceName = DefaultConfig().ServiceName
}
if cfg.ServiceVersion == "" {
cfg.ServiceVersion = DefaultConfig().ServiceVersion
}
if cfg.Environment == "" {
cfg.Environment = DefaultConfig().Environment
}
if !cfg.EnableMetrics {
log.Info(ctx, "OpenTelemetry metrics disabled")
return &Provider{
Shutdown: func(context.Context) error { return nil },
}, nil
}
res, err := resource.New(
ctx,
resource.WithAttributes(
attribute.String("service.name", cfg.ServiceName),
attribute.String("service.version", cfg.ServiceVersion),
attribute.String("deployment.environment", cfg.Environment),
),
)
if err != nil {
return nil, fmt.Errorf("failed to create telemetry resource: %w", err)
}
registry := clientprom.NewRegistry()
exporter, err := otelprom.New(
otelprom.WithRegisterer(registry),
otelprom.WithoutUnits(),
otelprom.WithoutScopeInfo(),
)
if err != nil {
return nil, fmt.Errorf("failed to create prometheus exporter: %w", err)
}
meterProvider := metric.NewMeterProvider(
metric.WithReader(exporter),
metric.WithResource(res),
)
otel.SetMeterProvider(meterProvider)
log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s",
cfg.ServiceName, cfg.ServiceVersion, cfg.Environment)
if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(0)); err != nil {
log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err)
}
return &Provider{
MeterProvider: meterProvider,
MetricsHandler: clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}),
Shutdown: func(ctx context.Context) error {
return meterProvider.Shutdown(ctx)
},
}, nil
}