Files
onix/pkg/plugin/implementation/otelsetup/otelsetup.go
2026-02-27 19:05:09 +05:30

215 lines
7.7 KiB
Go

package otelsetup
import (
"context"
"fmt"
"time"
"github.com/beckn-one/beckn-onix/pkg/log"
"github.com/beckn-one/beckn-onix/pkg/plugin"
"github.com/beckn-one/beckn-onix/pkg/telemetry"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/log/global"
logsdk "go.opentelemetry.io/otel/sdk/log"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
// Setup wires the telemetry provider. This is the concrete implementation
// behind the OtelSetupMetricsProvider interface.
type Setup struct{}
// Config represents OpenTelemetry related configuration.
type Config struct {
ServiceName string `yaml:"serviceName"`
ServiceVersion string `yaml:"serviceVersion"`
Environment string `yaml:"environment"`
Domain string `yaml:"domain"`
DeviceID string `yaml:"deviceID"`
EnableMetrics bool `yaml:"enableMetrics"`
EnableTracing bool `yaml:"enableTracing"`
EnableLogs bool `yaml:"enableLogs"`
OtlpEndpoint string `yaml:"otlpEndpoint"`
TimeInterval int64 `yaml:"timeInterval"`
AuditFieldsConfig string `yaml:"auditFieldsConfig"`
Producer string `yaml:"producer"`
ProducerType string `yaml:"producerType"`
}
// DefaultConfig returns sensible defaults for telemetry configuration.
func DefaultConfig() *Config {
return &Config{
ServiceName: "beckn-onix",
ServiceVersion: "dev",
Environment: "development",
Domain: "",
DeviceID: "beckn-onix-device",
OtlpEndpoint: "localhost:4317",
TimeInterval: 5,
}
}
// ToPluginConfig converts Config to plugin.Config format.
func ToPluginConfig(cfg *Config) *plugin.Config {
return &plugin.Config{
ID: "otelsetup",
Config: map[string]string{
"serviceName": cfg.ServiceName,
"serviceVersion": cfg.ServiceVersion,
"environment": cfg.Environment,
"domain": cfg.Domain,
"enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics),
"enableTracing": fmt.Sprintf("%t", cfg.EnableTracing),
"enableLogs": fmt.Sprintf("%t", cfg.EnableLogs),
"otlpEndpoint": cfg.OtlpEndpoint,
"deviceID": cfg.DeviceID,
"timeInterval": fmt.Sprintf("%d", cfg.TimeInterval),
"auditFieldsConfig": cfg.AuditFieldsConfig,
},
}
}
// New initializes the underlying telemetry provider. The returned provider
// exposes the HTTP handler and shutdown hooks that the core application can
// manage directly.
func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) {
if cfg == nil {
return nil, fmt.Errorf("telemetry config cannot be nil")
}
// Apply defaults if fields are empty
if cfg.ServiceName == "" {
cfg.ServiceName = DefaultConfig().ServiceName
}
if cfg.ServiceVersion == "" {
cfg.ServiceVersion = DefaultConfig().ServiceVersion
}
if cfg.Environment == "" {
cfg.Environment = DefaultConfig().Environment
}
if cfg.Domain == "" {
cfg.Domain = DefaultConfig().Domain
}
if cfg.DeviceID == "" {
cfg.DeviceID = DefaultConfig().DeviceID
}
if cfg.TimeInterval == 0 {
cfg.TimeInterval = DefaultConfig().TimeInterval
}
if !cfg.EnableMetrics && !cfg.EnableTracing && !cfg.EnableLogs {
log.Info(ctx, "OpenTelemetry metrics, tracing, and logs are all disabled")
return &telemetry.Provider{
Shutdown: func(context.Context) error { return nil },
}, nil
}
baseAttrs := []attribute.KeyValue{
attribute.String("service.name", cfg.ServiceName),
attribute.String("service.version", cfg.ServiceVersion),
attribute.String("environment", cfg.Environment),
attribute.String("domain", cfg.Domain),
attribute.String("device_id", cfg.DeviceID),
attribute.String("producerType", cfg.ProducerType),
attribute.String("producer", cfg.Producer),
}
var meterProvider *metric.MeterProvider
if cfg.EnableMetrics {
resMetric, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "METRIC")...))
if err != nil {
return nil, fmt.Errorf("failed to create telemetry resource for metric: %w", err)
}
metricExporter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(cfg.OtlpEndpoint),
otlpmetricgrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())))
if err != nil {
return nil, fmt.Errorf("failed to create OTLP metric exporter: %w", err)
}
reader := metric.NewPeriodicReader(metricExporter, metric.WithInterval(time.Second*time.Duration(cfg.TimeInterval)))
meterProvider = metric.NewMeterProvider(metric.WithReader(reader), metric.WithResource(resMetric))
otel.SetMeterProvider(meterProvider)
log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s (OTLP endpoint=%s)",
cfg.ServiceName, cfg.ServiceVersion, cfg.Environment, cfg.OtlpEndpoint)
if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(runtime.DefaultMinimumReadMemStatsInterval)); err != nil {
log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err)
}
}
var traceProvider *trace.TracerProvider
if cfg.EnableTracing {
resTrace, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "API")...))
if err != nil {
return nil, fmt.Errorf("failed to create trace resource: %w", err)
}
traceExporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithEndpoint(cfg.OtlpEndpoint), otlptracegrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())))
if err != nil {
return nil, fmt.Errorf("failed to create OTLP trace exporter: %w", err)
}
traceProvider = trace.NewTracerProvider(trace.WithBatcher(traceExporter), trace.WithResource(resTrace))
otel.SetTracerProvider(traceProvider)
log.Infof(ctx, "OpenTelemetry tracing initialized for service=%s (OTLP endpoint=%s)",
cfg.ServiceName, cfg.OtlpEndpoint)
}
var logProvider *logsdk.LoggerProvider
if cfg.EnableLogs {
resAudit, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "AUDIT")...))
if err != nil {
return nil, fmt.Errorf("failed to create audit resource: %w", err)
}
logExporter, err := otlploggrpc.New(ctx, otlploggrpc.WithEndpoint(cfg.OtlpEndpoint), otlploggrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials())))
if err != nil {
return nil, fmt.Errorf("failed to create OTLP logs exporter: %w", err)
}
processor := logsdk.NewBatchProcessor(logExporter)
logProvider = logsdk.NewLoggerProvider(logsdk.WithProcessor(processor), logsdk.WithResource(resAudit))
global.SetLoggerProvider(logProvider)
}
return &telemetry.Provider{
MeterProvider: meterProvider,
TraceProvider: traceProvider,
LogProvider: logProvider,
Shutdown: func(shutdownCtx context.Context) error {
var errs []error
if traceProvider != nil {
if err := traceProvider.Shutdown(shutdownCtx); err != nil {
errs = append(errs, fmt.Errorf("tracer shutdown: %w", err))
}
}
if meterProvider != nil {
if err := meterProvider.Shutdown(shutdownCtx); err != nil {
errs = append(errs, fmt.Errorf("meter shutdown: %w", err))
}
}
if logProvider != nil {
if err := logProvider.Shutdown(shutdownCtx); err != nil {
errs = append(errs, fmt.Errorf("logs shutdown: %w", err))
}
}
if len(errs) > 0 {
return fmt.Errorf("shutdown errors: %v", errs)
}
return nil
},
}, nil
}
func buildAtts(base []attribute.KeyValue, eid string) []attribute.KeyValue {
atts := make([]attribute.KeyValue, 0, len(base)+1)
atts = append(atts, base...)
atts = append(atts, attribute.String("eid", eid))
return atts
}