make changes as per the doc
This commit is contained in:
222
pkg/telemetry/metrics.go
Normal file
222
pkg/telemetry/metrics.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
)
|
||||
|
||||
// Metrics exposes strongly typed metric instruments used across the adapter.
|
||||
type Metrics struct {
|
||||
HTTPRequestsTotal metric.Int64Counter
|
||||
HTTPRequestDuration metric.Float64Histogram
|
||||
HTTPRequestsInFlight metric.Int64UpDownCounter
|
||||
HTTPRequestSize metric.Int64Histogram
|
||||
HTTPResponseSize metric.Int64Histogram
|
||||
|
||||
StepExecutionDuration metric.Float64Histogram
|
||||
StepExecutionTotal metric.Int64Counter
|
||||
StepErrorsTotal metric.Int64Counter
|
||||
|
||||
PluginExecutionDuration metric.Float64Histogram
|
||||
PluginErrorsTotal metric.Int64Counter
|
||||
|
||||
BecknMessagesTotal metric.Int64Counter
|
||||
SignatureValidationsTotal metric.Int64Counter
|
||||
SchemaValidationsTotal metric.Int64Counter
|
||||
CacheOperationsTotal metric.Int64Counter
|
||||
CacheHitsTotal metric.Int64Counter
|
||||
CacheMissesTotal metric.Int64Counter
|
||||
RoutingDecisionsTotal metric.Int64Counter
|
||||
}
|
||||
|
||||
var (
|
||||
metricsInstance *Metrics
|
||||
metricsOnce sync.Once
|
||||
metricsErr error
|
||||
)
|
||||
|
||||
// Attribute keys shared across instruments.
|
||||
var (
|
||||
AttrModule = attribute.Key("module")
|
||||
AttrSubsystem = attribute.Key("subsystem")
|
||||
AttrName = attribute.Key("name")
|
||||
AttrStep = attribute.Key("step")
|
||||
AttrRole = attribute.Key("role")
|
||||
AttrAction = attribute.Key("action")
|
||||
AttrHTTPMethod = attribute.Key("http_method")
|
||||
AttrHTTPStatus = attribute.Key("http_status_code")
|
||||
AttrStatus = attribute.Key("status")
|
||||
AttrErrorType = attribute.Key("error_type")
|
||||
AttrPluginID = attribute.Key("plugin_id")
|
||||
AttrPluginType = attribute.Key("plugin_type")
|
||||
AttrOperation = attribute.Key("operation")
|
||||
AttrRouteType = attribute.Key("route_type")
|
||||
AttrTargetType = attribute.Key("target_type")
|
||||
AttrSchemaVersion = attribute.Key("schema_version")
|
||||
)
|
||||
|
||||
// GetMetrics lazily initializes instruments and returns a cached reference.
|
||||
func GetMetrics(ctx context.Context) (*Metrics, error) {
|
||||
metricsOnce.Do(func() {
|
||||
metricsInstance, metricsErr = newMetrics()
|
||||
})
|
||||
return metricsInstance, metricsErr
|
||||
}
|
||||
|
||||
func newMetrics() (*Metrics, error) {
|
||||
meter := otel.GetMeterProvider().Meter(
|
||||
"github.com/beckn-one/beckn-onix/telemetry",
|
||||
metric.WithInstrumentationVersion("1.0.0"),
|
||||
)
|
||||
|
||||
m := &Metrics{}
|
||||
var err error
|
||||
|
||||
if m.HTTPRequestsTotal, err = meter.Int64Counter(
|
||||
"http_server_requests_total",
|
||||
metric.WithDescription("Total number of HTTP requests processed"),
|
||||
metric.WithUnit("{request}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_requests_total: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPRequestDuration, err = meter.Float64Histogram(
|
||||
"http_server_request_duration_seconds",
|
||||
metric.WithDescription("HTTP request duration in seconds"),
|
||||
metric.WithUnit("s"),
|
||||
metric.WithExplicitBucketBoundaries(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_request_duration_seconds: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPRequestsInFlight, err = meter.Int64UpDownCounter(
|
||||
"http_server_requests_in_flight",
|
||||
metric.WithDescription("Number of HTTP requests currently being processed"),
|
||||
metric.WithUnit("{request}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_requests_in_flight: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPRequestSize, err = meter.Int64Histogram(
|
||||
"http_server_request_size_bytes",
|
||||
metric.WithDescription("Size of HTTP request payloads"),
|
||||
metric.WithUnit("By"),
|
||||
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_request_size_bytes: %w", err)
|
||||
}
|
||||
|
||||
if m.HTTPResponseSize, err = meter.Int64Histogram(
|
||||
"http_server_response_size_bytes",
|
||||
metric.WithDescription("Size of HTTP responses"),
|
||||
metric.WithUnit("By"),
|
||||
metric.WithExplicitBucketBoundaries(100, 1000, 10000, 100000, 1000000),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("http_server_response_size_bytes: %w", err)
|
||||
}
|
||||
|
||||
if m.StepExecutionDuration, err = meter.Float64Histogram(
|
||||
"onix_step_execution_duration_seconds",
|
||||
metric.WithDescription("Duration of individual processing steps"),
|
||||
metric.WithUnit("s"),
|
||||
metric.WithExplicitBucketBoundaries(0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_step_execution_duration_seconds: %w", err)
|
||||
}
|
||||
|
||||
if m.StepExecutionTotal, err = meter.Int64Counter(
|
||||
"onix_step_executions_total",
|
||||
metric.WithDescription("Total processing step executions"),
|
||||
metric.WithUnit("{execution}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_step_executions_total: %w", err)
|
||||
}
|
||||
|
||||
if m.StepErrorsTotal, err = meter.Int64Counter(
|
||||
"onix_step_errors_total",
|
||||
metric.WithDescription("Processing step errors"),
|
||||
metric.WithUnit("{error}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_step_errors_total: %w", err)
|
||||
}
|
||||
|
||||
if m.PluginExecutionDuration, err = meter.Float64Histogram(
|
||||
"onix_plugin_execution_duration_seconds",
|
||||
metric.WithDescription("Plugin execution time"),
|
||||
metric.WithUnit("s"),
|
||||
metric.WithExplicitBucketBoundaries(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_plugin_execution_duration_seconds: %w", err)
|
||||
}
|
||||
|
||||
if m.PluginErrorsTotal, err = meter.Int64Counter(
|
||||
"onix_plugin_errors_total",
|
||||
metric.WithDescription("Plugin level errors"),
|
||||
metric.WithUnit("{error}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_plugin_errors_total: %w", err)
|
||||
}
|
||||
|
||||
if m.BecknMessagesTotal, err = meter.Int64Counter(
|
||||
"beckn_messages_total",
|
||||
metric.WithDescription("Total Beckn protocol messages processed"),
|
||||
metric.WithUnit("{message}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("beckn_messages_total: %w", err)
|
||||
}
|
||||
|
||||
if m.SignatureValidationsTotal, err = meter.Int64Counter(
|
||||
"beckn_signature_validations_total",
|
||||
metric.WithDescription("Signature validation attempts"),
|
||||
metric.WithUnit("{validation}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("beckn_signature_validations_total: %w", err)
|
||||
}
|
||||
|
||||
if m.SchemaValidationsTotal, err = meter.Int64Counter(
|
||||
"beckn_schema_validations_total",
|
||||
metric.WithDescription("Schema validation attempts"),
|
||||
metric.WithUnit("{validation}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("beckn_schema_validations_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheOperationsTotal, err = meter.Int64Counter(
|
||||
"onix_cache_operations_total",
|
||||
metric.WithDescription("Redis cache operations"),
|
||||
metric.WithUnit("{operation}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_operations_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheHitsTotal, err = meter.Int64Counter(
|
||||
"onix_cache_hits_total",
|
||||
metric.WithDescription("Redis cache hits"),
|
||||
metric.WithUnit("{hit}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_hits_total: %w", err)
|
||||
}
|
||||
|
||||
if m.CacheMissesTotal, err = meter.Int64Counter(
|
||||
"onix_cache_misses_total",
|
||||
metric.WithDescription("Redis cache misses"),
|
||||
metric.WithUnit("{miss}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_cache_misses_total: %w", err)
|
||||
}
|
||||
|
||||
if m.RoutingDecisionsTotal, err = meter.Int64Counter(
|
||||
"onix_routing_decisions_total",
|
||||
metric.WithDescription("Routing decisions taken by handler"),
|
||||
metric.WithUnit("{decision}"),
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("onix_routing_decisions_total: %w", err)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
33
pkg/telemetry/metrics_test.go
Normal file
33
pkg/telemetry/metrics_test.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewProviderAndMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
provider, err := NewProvider(ctx, &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, provider)
|
||||
require.NotNil(t, provider.MetricsHandler)
|
||||
|
||||
metrics, err := GetMetrics(ctx)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, metrics)
|
||||
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest("GET", "/metrics", nil)
|
||||
provider.MetricsHandler.ServeHTTP(rec, req)
|
||||
require.Equal(t, 200, rec.Code)
|
||||
|
||||
require.NoError(t, provider.Shutdown(context.Background()))
|
||||
}
|
||||
78
pkg/telemetry/step_instrumentor.go
Normal file
78
pkg/telemetry/step_instrumentor.go
Normal file
@@ -0,0 +1,78 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
"github.com/beckn-one/beckn-onix/pkg/model"
|
||||
"github.com/beckn-one/beckn-onix/pkg/plugin/definition"
|
||||
)
|
||||
|
||||
// InstrumentedStep wraps a processing step with telemetry instrumentation.
|
||||
type InstrumentedStep struct {
|
||||
step definition.Step
|
||||
stepName string
|
||||
moduleName string
|
||||
metrics *Metrics
|
||||
}
|
||||
|
||||
// NewInstrumentedStep returns a telemetry enabled wrapper around a definition.Step.
|
||||
func NewInstrumentedStep(step definition.Step, stepName, moduleName string) (*InstrumentedStep, error) {
|
||||
metrics, err := GetMetrics(context.Background())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &InstrumentedStep{
|
||||
step: step,
|
||||
stepName: stepName,
|
||||
moduleName: moduleName,
|
||||
metrics: metrics,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type becknError interface {
|
||||
BecknError() *model.Error
|
||||
}
|
||||
|
||||
// Run executes the underlying step and records RED style metrics.
|
||||
func (is *InstrumentedStep) Run(ctx *model.StepContext) error {
|
||||
if is.metrics == nil {
|
||||
return is.step.Run(ctx)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err := is.step.Run(ctx)
|
||||
duration := time.Since(start).Seconds()
|
||||
|
||||
attrs := []attribute.KeyValue{
|
||||
AttrModule.String(is.moduleName),
|
||||
AttrStep.String(is.stepName),
|
||||
AttrRole.String(string(ctx.Role)),
|
||||
}
|
||||
|
||||
is.metrics.StepExecutionTotal.Add(ctx.Context, 1, metric.WithAttributes(attrs...))
|
||||
is.metrics.StepExecutionDuration.Record(ctx.Context, duration, metric.WithAttributes(attrs...))
|
||||
|
||||
if err != nil {
|
||||
errorType := fmt.Sprintf("%T", err)
|
||||
var becknErr becknError
|
||||
if errors.As(err, &becknErr) {
|
||||
if be := becknErr.BecknError(); be != nil && be.Code != "" {
|
||||
errorType = be.Code
|
||||
}
|
||||
}
|
||||
|
||||
errorAttrs := append(attrs, AttrErrorType.String(errorType))
|
||||
is.metrics.StepErrorsTotal.Add(ctx.Context, 1, metric.WithAttributes(errorAttrs...))
|
||||
log.Errorf(ctx.Context, err, "Step %s failed", is.stepName)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
60
pkg/telemetry/step_instrumentor_test.go
Normal file
60
pkg/telemetry/step_instrumentor_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/model"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type stubStep struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (s stubStep) Run(ctx *model.StepContext) error {
|
||||
return s.err
|
||||
}
|
||||
|
||||
func TestInstrumentedStepSuccess(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
provider, err := NewProvider(ctx, &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer provider.Shutdown(context.Background())
|
||||
|
||||
step, err := NewInstrumentedStep(stubStep{}, "test-step", "test-module")
|
||||
require.NoError(t, err)
|
||||
|
||||
stepCtx := &model.StepContext{
|
||||
Context: context.Background(),
|
||||
Role: model.RoleBAP,
|
||||
}
|
||||
require.NoError(t, step.Run(stepCtx))
|
||||
}
|
||||
|
||||
func TestInstrumentedStepError(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
provider, err := NewProvider(ctx, &Config{
|
||||
ServiceName: "test-service",
|
||||
ServiceVersion: "1.0.0",
|
||||
EnableMetrics: true,
|
||||
Environment: "test",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer provider.Shutdown(context.Background())
|
||||
|
||||
step, err := NewInstrumentedStep(stubStep{err: errors.New("boom")}, "test-step", "test-module")
|
||||
require.NoError(t, err)
|
||||
|
||||
stepCtx := &model.StepContext{
|
||||
Context: context.Background(),
|
||||
Role: model.RoleBAP,
|
||||
}
|
||||
require.Error(t, step.Run(stepCtx))
|
||||
}
|
||||
110
pkg/telemetry/telemetry.go
Normal file
110
pkg/telemetry/telemetry.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
clientprom "github.com/prometheus/client_golang/prometheus"
|
||||
clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
|
||||
"github.com/beckn-one/beckn-onix/pkg/log"
|
||||
)
|
||||
|
||||
// Config represents OpenTelemetry related configuration.
|
||||
type Config struct {
|
||||
ServiceName string `yaml:"serviceName"`
|
||||
ServiceVersion string `yaml:"serviceVersion"`
|
||||
EnableMetrics bool `yaml:"enableMetrics"`
|
||||
Environment string `yaml:"environment"`
|
||||
}
|
||||
|
||||
// Provider holds references to telemetry components that need coordinated shutdown.
|
||||
type Provider struct {
|
||||
MeterProvider *metric.MeterProvider
|
||||
MetricsHandler http.Handler
|
||||
Shutdown func(context.Context) error
|
||||
}
|
||||
|
||||
// DefaultConfig returns sensible defaults for telemetry configuration.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
ServiceName: "beckn-onix",
|
||||
ServiceVersion: "dev",
|
||||
EnableMetrics: true,
|
||||
Environment: "development",
|
||||
}
|
||||
}
|
||||
|
||||
// NewProvider wires OpenTelemetry with a Prometheus exporter and exposes /metrics handler.
|
||||
func NewProvider(ctx context.Context, cfg *Config) (*Provider, error) {
|
||||
if cfg == nil {
|
||||
cfg = DefaultConfig()
|
||||
}
|
||||
if cfg.ServiceName == "" {
|
||||
cfg.ServiceName = DefaultConfig().ServiceName
|
||||
}
|
||||
if cfg.ServiceVersion == "" {
|
||||
cfg.ServiceVersion = DefaultConfig().ServiceVersion
|
||||
}
|
||||
if cfg.Environment == "" {
|
||||
cfg.Environment = DefaultConfig().Environment
|
||||
}
|
||||
|
||||
if !cfg.EnableMetrics {
|
||||
log.Info(ctx, "OpenTelemetry metrics disabled")
|
||||
return &Provider{
|
||||
Shutdown: func(context.Context) error { return nil },
|
||||
}, nil
|
||||
}
|
||||
|
||||
res, err := resource.New(
|
||||
ctx,
|
||||
resource.WithAttributes(
|
||||
attribute.String("service.name", cfg.ServiceName),
|
||||
attribute.String("service.version", cfg.ServiceVersion),
|
||||
attribute.String("deployment.environment", cfg.Environment),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create telemetry resource: %w", err)
|
||||
}
|
||||
|
||||
registry := clientprom.NewRegistry()
|
||||
|
||||
exporter, err := otelprom.New(
|
||||
otelprom.WithRegisterer(registry),
|
||||
otelprom.WithoutUnits(),
|
||||
otelprom.WithoutScopeInfo(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create prometheus exporter: %w", err)
|
||||
}
|
||||
|
||||
meterProvider := metric.NewMeterProvider(
|
||||
metric.WithReader(exporter),
|
||||
metric.WithResource(res),
|
||||
)
|
||||
|
||||
otel.SetMeterProvider(meterProvider)
|
||||
log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s",
|
||||
cfg.ServiceName, cfg.ServiceVersion, cfg.Environment)
|
||||
|
||||
if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(0)); err != nil {
|
||||
log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err)
|
||||
}
|
||||
|
||||
return &Provider{
|
||||
MeterProvider: meterProvider,
|
||||
MetricsHandler: clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}),
|
||||
Shutdown: func(ctx context.Context) error {
|
||||
return meterProvider.Shutdown(ctx)
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user