From 8948479dea2d5b0e2427d1dc79039fc6b2e8f885 Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Wed, 28 Jan 2026 21:29:37 +0530 Subject: [PATCH 1/9] Feat: Update the logger config to show the parent_id --- .gitignore | 1 + cmd/adapter/main.go | 38 +++++++++++++++- pkg/log/log_test.go | 45 +++++++++++++++---- pkg/model/error_test.go | 1 + pkg/model/model.go | 9 +++- .../reqpreprocessor/cmd/plugin.go | 6 +++ .../reqpreprocessor/cmd/plugin_test.go | 5 ++- .../reqpreprocessor/reqpreprocessor.go | 6 +++ .../reqpreprocessor/reqpreprocessor_test.go | 6 ++- 9 files changed, 104 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 3e925e6..cfb4d29 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Logs .DS_Store logs +.idea *.log npm-debug.log* yarn-debug.log* diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 43ad48d..518a419 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/beckn-one/beckn-onix/pkg/model" "gopkg.in/yaml.v2" "github.com/beckn-one/beckn-onix/core/module" @@ -52,7 +53,7 @@ var runFunc = run func main() { // Define and parse command-line flags. - flag.StringVar(&configPath, "config", "../../config/onix/adapter.yaml", "Path to the configuration file") + flag.StringVar(&configPath, "config", "config/onix/adapter.yaml", "Path to the configuration file") flag.Parse() // Use custom log for initial setup messages. @@ -153,6 +154,9 @@ func run(ctx context.Context, configPath string) error { return fmt.Errorf("failed to initialize logger: %w", err) } + //to add the parent_id in the context value so it get passed to the logs + ctx = addParentIdCtx(ctx, cfg) + // Initialize plugin manager. log.Infof(ctx, "Initializing plugin manager") mgr, closer, err := newManagerFunc(ctx, cfg.PluginManager) @@ -220,3 +224,35 @@ func shutdown(ctx context.Context, httpServer *http.Server, wg *sync.WaitGroup, } }() } + +func addParentIdCtx(ctx context.Context, config *Config) context.Context { + var parentID string + var podName string + + if p := os.Getenv("POD_NAME"); p != "" { + log.Infof(ctx, "Adding POD name: %s", p) + podName = p + } else { + log.Info(ctx, "POD_NAME environment variable not set falling back to hostname") + if hostname, err := os.Hostname(); err == nil { + log.Infof(ctx, "Setting POD name as hostname: %s", hostname) + podName = hostname + } else { + log.Info(ctx, "failed to get POD name") + } + } + + for _, m := range config.Modules { + if m.Handler.Role != "" && m.Handler.SubscriberID != "" { + parentID = string(m.Handler.Role) + ":" + m.Handler.SubscriberID + ":" + podName + break + } + } + + if parentID != "" { + ctx = context.WithValue(ctx, model.ContextKeyParentID, parentID) + } else { + log.Warnf(ctx, "Failed to find parent ID in config please add the role and subscriber_id in the handler config ") + } + return ctx +} diff --git a/pkg/log/log_test.go b/pkg/log/log_test.go index 30245ba..334788b 100644 --- a/pkg/log/log_test.go +++ b/pkg/log/log_test.go @@ -20,6 +20,11 @@ import ( type ctxKey any var requestID ctxKey = "requestID" +var transaction_id ctxKey = "transactionID" +var message_id ctxKey = "messageID" +var subscriber_id ctxKey = "subscriberID" +var module_id ctxKey = "moduleID" +var parent_id ctxKey = "parentID" const testLogFilePath = "./test_logs/test.log" @@ -69,6 +74,7 @@ func setupLogger(t *testing.T, l level) string { model.ContextKeyMsgID, model.ContextKeySubscriberID, model.ContextKeyModuleID, + model.ContextKeyParentID, }, } @@ -103,16 +109,25 @@ func parseLogLine(t *testing.T, line string) map[string]interface{} { func TestDebug(t *testing.T) { t.Helper() logPath := setupLogger(t, DebugLevel) - ctx := context.WithValue(context.Background(), model.ContextKeySubscriberID, "12345") + ctx := context.WithValue(context.Background(), model.ContextKeySubscriberID, "subscriber-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyTxnID, "trx-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyMsgID, "message-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyModuleID, "module-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyParentID, "parent-id-12345") + Debug(ctx, "Debug message") lines := readLogFile(t, logPath) if len(lines) == 0 { t.Fatal("No logs were written.") } expected := map[string]interface{}{ - "level": "debug", - "subscriber_id": "12345", - "message": "Debug message", + "level": "debug", + "transaction_id": "trx-id-12345", + "message_id": "message-id-12345", + "subscriber_id": "subscriber-id-12345", + "module_id": "module-id-12345", + "parent_id": "parent-id-12345", + "message": "Debug message", } var found bool @@ -135,16 +150,24 @@ func TestDebug(t *testing.T) { func TestInfo(t *testing.T) { logPath := setupLogger(t, InfoLevel) - ctx := context.WithValue(context.Background(), model.ContextKeySubscriberID, "12345") + ctx := context.WithValue(context.Background(), model.ContextKeySubscriberID, "subscriber-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyTxnID, "trx-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyMsgID, "message-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyModuleID, "module-id-12345") + ctx = context.WithValue(ctx, model.ContextKeyParentID, "parent-id-12345") Info(ctx, "Info message") lines := readLogFile(t, logPath) if len(lines) == 0 { t.Fatal("No logs were written.") } expected := map[string]interface{}{ - "level": "info", - "subscriber_id": "12345", - "message": "Info message", + "level": "info", + "transaction_id": "trx-id-12345", + "message_id": "message-id-12345", + "subscriber_id": "subscriber-id-12345", + "module_id": "module-id-12345", + "parent_id": "parent-id-12345", + "message": "Info message", } var found bool @@ -227,6 +250,12 @@ func TestError(t *testing.T) { func TestRequest(t *testing.T) { logPath := setupLogger(t, InfoLevel) ctx := context.WithValue(context.Background(), requestID, "abc-123") + ctx = context.WithValue(context.Background(), transaction_id, "transaction-id-123-") + ctx = context.WithValue(context.Background(), message_id, "message-id-123") + ctx = context.WithValue(context.Background(), subscriber_id, "subscriber-id-123") + ctx = context.WithValue(context.Background(), module_id, "module-id-123") + ctx = context.WithValue(context.Background(), parent_id, "parent-id-123") + req, _ := http.NewRequest("POST", "/api/test", bytes.NewBuffer([]byte(`{"key":"value"}`))) req.RemoteAddr = "127.0.0.1:8080" Request(ctx, req, []byte(`{"key":"value"}`)) diff --git a/pkg/model/error_test.go b/pkg/model/error_test.go index 1ac952e..6cf69fc 100644 --- a/pkg/model/error_test.go +++ b/pkg/model/error_test.go @@ -208,6 +208,7 @@ func TestParseContextKey_ValidKeys(t *testing.T) { {"message_id", ContextKeyMsgID}, {"subscriber_id", ContextKeySubscriberID}, {"module_id", ContextKeyModuleID}, + {"parent_id", ContextKeyParentID}, } for _, tt := range tests { diff --git a/pkg/model/model.go b/pkg/model/model.go index 8adc569..1cd08cc 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -53,6 +53,9 @@ const ( // ContextKeyModuleID is the context key for storing and retrieving the model ID from a request context. ContextKeyModuleID ContextKey = "module_id" + + // ContextKeyParentID is the context key for storing and retrieving the parent ID from a request context + ContextKeyParentID ContextKey = "parent_id" ) var contextKeys = map[string]ContextKey{ @@ -60,6 +63,7 @@ var contextKeys = map[string]ContextKey{ "message_id": ContextKeyMsgID, "subscriber_id": ContextKeySubscriberID, "module_id": ContextKeyModuleID, + "parent_id": ContextKeyParentID, } // ParseContextKey converts a string into a valid ContextKey. @@ -100,6 +104,8 @@ const ( RoleGateway Role = "gateway" // RoleRegistery represents the Registry that maintains network participant details. RoleRegistery Role = "registery" + // RoleCDS represents the CDS for that network + RoleCDS Role = "cds" ) var validRoles = map[Role]bool{ @@ -107,6 +113,7 @@ var validRoles = map[Role]bool{ RoleBPP: true, RoleGateway: true, RoleRegistery: true, + RoleCDS: true, } // UnmarshalYAML implements custom YAML unmarshalling for Role to ensure only valid values are accepted. @@ -184,4 +191,4 @@ type Message struct { // Response represents the main response structure. type Response struct { Message Message `json:"message"` -} \ No newline at end of file +} diff --git a/pkg/plugin/implementation/reqpreprocessor/cmd/plugin.go b/pkg/plugin/implementation/reqpreprocessor/cmd/plugin.go index e02d477..494d6d1 100644 --- a/pkg/plugin/implementation/reqpreprocessor/cmd/plugin.go +++ b/pkg/plugin/implementation/reqpreprocessor/cmd/plugin.go @@ -5,6 +5,7 @@ import ( "net/http" "strings" + "github.com/beckn-one/beckn-onix/pkg/model" "github.com/beckn-one/beckn-onix/pkg/plugin/implementation/reqpreprocessor" ) @@ -18,6 +19,11 @@ func (p provider) New(ctx context.Context, c map[string]string) (func(http.Handl if contextKeys, ok := c["contextKeys"]; ok { config.ContextKeys = strings.Split(contextKeys, ",") } + + if v := ctx.Value(model.ContextKeyParentID); v != nil { + config.ParentID = v.(string) + } + return reqpreprocessor.NewPreProcessor(config) } diff --git a/pkg/plugin/implementation/reqpreprocessor/cmd/plugin_test.go b/pkg/plugin/implementation/reqpreprocessor/cmd/plugin_test.go index cb5c7e6..2df83e0 100644 --- a/pkg/plugin/implementation/reqpreprocessor/cmd/plugin_test.go +++ b/pkg/plugin/implementation/reqpreprocessor/cmd/plugin_test.go @@ -7,6 +7,7 @@ import ( "strings" "testing" + "github.com/beckn-one/beckn-onix/pkg/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -92,7 +93,9 @@ func TestProviderNew(t *testing.T) { }` p := provider{} - middleware, err := p.New(context.Background(), tc.config) + ctx := context.Background() + ctx = context.WithValue(ctx, model.ContextKeyParentID, "bap:bap-1:instanceID") + middleware, err := p.New(ctx, tc.config) if tc.expectedError { assert.Error(t, err) return diff --git a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go index 12e6581..fa32898 100644 --- a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go +++ b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go @@ -17,6 +17,7 @@ import ( type Config struct { Role string ContextKeys []string + ParentID string } const contextKey = "context" @@ -58,6 +59,11 @@ func NewPreProcessor(cfg *Config) (func(http.Handler) http.Handler, error) { log.Debugf(ctx, "adding subscriberId to request:%s, %v", model.ContextKeySubscriberID, subID) ctx = context.WithValue(ctx, model.ContextKeySubscriberID, subID) } + + if cfg.ParentID != "" { + log.Debugf(ctx, "adding parentID to request:%s, %v", model.ContextKeyParentID, cfg.ParentID) + ctx = context.WithValue(ctx, model.ContextKeyParentID, cfg.ParentID) + } for _, key := range cfg.ContextKeys { ctxKey, _ := model.ParseContextKey(key) if v, ok := reqContext[key]; ok { diff --git a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor_test.go b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor_test.go index 97a147a..1e9f0c1 100644 --- a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor_test.go +++ b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor_test.go @@ -22,7 +22,8 @@ func TestNewPreProcessorSuccessCases(t *testing.T) { { name: "BAP role with valid context", config: &Config{ - Role: "bap", + Role: "bap", + ParentID: "bap:bap-123", }, requestBody: map[string]interface{}{ "context": map[string]interface{}{ @@ -38,7 +39,8 @@ func TestNewPreProcessorSuccessCases(t *testing.T) { { name: "BPP role with valid context", config: &Config{ - Role: "bpp", + Role: "bpp", + ParentID: "bap:bap-123", }, requestBody: map[string]interface{}{ "context": map[string]interface{}{ From 2745047b27a327fd9d780371b4da648ffd0c6d1c Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Thu, 29 Jan 2026 23:15:41 +0530 Subject: [PATCH 2/9] Fix : update the name RoleCDS to RoleDiscovery --- pkg/model/model.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/model/model.go b/pkg/model/model.go index 1cd08cc..de53254 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -104,8 +104,8 @@ const ( RoleGateway Role = "gateway" // RoleRegistery represents the Registry that maintains network participant details. RoleRegistery Role = "registery" - // RoleCDS represents the CDS for that network - RoleCDS Role = "cds" + // RoleDiscovery represents the discovery for that network + RoleDiscovery Role = "discovery" ) var validRoles = map[Role]bool{ @@ -113,7 +113,7 @@ var validRoles = map[Role]bool{ RoleBPP: true, RoleGateway: true, RoleRegistery: true, - RoleCDS: true, + RoleDiscovery: true, } // UnmarshalYAML implements custom YAML unmarshalling for Role to ensure only valid values are accepted. From ab891027112db508eb6a2a3f6da6199f45aa67fd Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Mon, 23 Feb 2026 16:08:44 +0530 Subject: [PATCH 3/9] Feat: configure audit fields and metrics for onix adapter and add local configuration for onix adapterZ --- config/onix/adapter.local.yaml | 221 ++++++++ config/onix/adapter.yaml | 212 +++---- config/onix/audit-fields.yaml | 24 + config/onix/bapTxnCaller-routing.yaml | 48 +- config/onix/bapTxnReciever-routing.yaml | 51 +- core/module/handler/http_metric.go | 120 ++++ core/module/handler/stdHandler.go | 110 +++- core/module/handler/step.go | 66 ++- core/module/handler/step_instrumentor.go | 34 +- core/module/handler/step_metrics.go | 6 +- core/module/handler/step_metrics_test.go | 119 +++- go.mod | 38 +- go.sum | 100 +++- pkg/log/log.go | 6 + pkg/model/model.go | 4 + pkg/plugin/implementation/cache/cache.go | 19 +- .../implementation/otelsetup/cmd/plugin.go | 82 ++- .../implementation/otelsetup/otelsetup.go | 225 +++++--- .../otelsetup/otelsetup_test.go | 100 +++- .../reqpreprocessor/reqpreprocessor.go | 14 + .../simplekeymanager/simplekeymanager.go | 52 +- pkg/plugin/manager.go | 4 +- pkg/telemetry/audit.go | 56 ++ pkg/telemetry/audit_fields.go | 216 ++++++++ pkg/telemetry/audit_fields_test.go | 518 ++++++++++++++++++ pkg/telemetry/metrics_test.go | 31 +- pkg/telemetry/pluginMetrics.go | 64 ++- pkg/telemetry/telemetry.go | 15 +- pkg/telemetry/test_helper.go | 53 +- 29 files changed, 2167 insertions(+), 441 deletions(-) create mode 100644 config/onix/adapter.local.yaml create mode 100644 config/onix/audit-fields.yaml create mode 100644 core/module/handler/http_metric.go create mode 100644 pkg/telemetry/audit.go create mode 100644 pkg/telemetry/audit_fields.go create mode 100644 pkg/telemetry/audit_fields_test.go diff --git a/config/onix/adapter.local.yaml b/config/onix/adapter.local.yaml new file mode 100644 index 0000000..ebaff50 --- /dev/null +++ b/config/onix/adapter.local.yaml @@ -0,0 +1,221 @@ +appName: "onix" +log: + level: debug + destinations: + - type: stdout + contextKeys: + - transaction_id + - message_id + - subscriber_id + - module_id +http: + port: 8080 + timeout: + read: 30 + write: 30 + idle: 30 +pluginManager: + root: ./plugins + remoteRoot: "" +modules: + - name: bapTxnReciever + path: /bap/reciever/ + handler: + type: std + role: bap + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 10.81.192.4:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signValidator: + id: signvalidator + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bapTxnReciever-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bap + steps: + - validateSign + - addRoute + - validateSchema + - name: bapTxnCaller + path: /bap/caller/ + handler: + type: std + role: bap + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 192.168.1.1:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signer: + id: signer + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bapTxnCaller-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bap + steps: + - validateSchema + - addRoute + - sign + - name: bppTxnReciever + path: /bpp/reciever/ + handler: + type: std + role: bpp + subscriberId: bpp1 + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 192.168.1.1:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signValidator: + id: signvalidator + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bppTxnReciever-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bpp + steps: + - validateSign + - addRoute + - validateSchema + - name: bppTxnCaller + path: /bpp/caller/ + handler: + type: std + role: bpp + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 192.168.1.1:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signer: + id: signer + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bppTxnCaller-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bpp + steps: + - validateSchema + - addRoute + - sign diff --git a/config/onix/adapter.yaml b/config/onix/adapter.yaml index 403f616..4349e57 100644 --- a/config/onix/adapter.yaml +++ b/config/onix/adapter.yaml @@ -1,4 +1,4 @@ -appName: "onix" +appName: "onix-ev-charging" log: level: debug destinations: @@ -8,21 +8,49 @@ log: - message_id - subscriber_id - module_id + - parent_id + + +# OpenTelemetry (OTLP) - metrics and traces sent to OTEL collector, then to Loki/backend +plugins: + otelsetup: + id: otelsetup + config: + serviceName: "onix-ev-charging-bap" + serviceVersion: "1.0.0" + environment: "development" + domain: "ev_charging" + otlpEndpoint: "otel-collector:4317" + enableMetrics: "true" + networkMetricsGranularity: "2min" + networkMetricsFrequency: "4min" + enableTracing: "true" + enableLogs: "true" + timeInterval: "5" + auditFieldsConfig: "/app/config/audit-fields.yaml" + + +# this is the port for the bap plugin where bap app can dump the requests to the plugin http: - port: 8080 + port: 8001 timeout: read: 30 write: 30 idle: 30 + pluginManager: root: /app/plugins - remoteRoot: /mnt/gcs/plugins/plugins_bundle.zip + modules: - - name: bapTxnReciever - path: /bap/reciever/ + # BAP Receiver - Receives callbacks from CDS (Phase 1) and BPPs (Phase 2+) + # Phase 1: Receives on_search from CDS with aggregated catalog + # Phase 2+: Receives callbacks from BPPs (on_select, on_init, on_confirm, etc.) + - name: bapTxnReceiver + path: /bap/receiver/ handler: type: std role: bap + subscriberId: ev-charging.sandbox1.com httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 @@ -32,47 +60,55 @@ modules: registry: id: registry config: - url: http://localhost:8080/reg + url: http://mock-registry:3030 retry_max: 3 retry_wait_min: 100ms retry_wait_max: 500ms keyManager: - id: secretskeymanager + id: simplekeymanager config: - projectID: ${projectID} + networkParticipant: example-bap.com + keyId: bap-key-1 + signingPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= + signingPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= + encrPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= + encrPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= cache: - id: redis + id: cache config: - addr: 10.81.192.4:6379 + addr: redis-bap:6379 schemaValidator: - id: schemavalidator + id: schemav2validator config: - schemaDir: /mnt/gcs/configs/schemas + type: url + location: https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/api/beckn.yaml + cacheTTL: "3600" signValidator: id: signvalidator - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever router: id: router config: - routingConfigPath: /mnt/gcs/configs/bapTxnReciever-routing.yaml + routingConfig: /app/config/bapTxnReciever-routing.yaml middleware: - id: reqpreprocessor config: - contextKeys: transaction_id,message_id + contextKeys: transaction_id,message_id,parent_id role: bap steps: - validateSign - addRoute - validateSchema + + # BAP Caller - Entry point for all requests from BAP + # Phase 1: Routes search to external CDS for aggregation + # Phase 2+: Routes other requests directly to BPP (bypasses CDS) + # Uses bpp_uri from context for dynamic routing in Phase 2+ - name: bapTxnCaller path: /bap/caller/ handler: type: std role: bap + subscriberId: ev-charging.sandbox1.com httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 @@ -82,140 +118,42 @@ modules: registry: id: registry config: - url: http://localhost:8080/reg + url: http://mock-registry:3030 retry_max: 3 retry_wait_min: 100ms retry_wait_max: 500ms keyManager: - id: secretskeymanager + id: simplekeymanager config: - projectID: ${projectID} + networkParticipant: example-bap.com + keyId: bap-key-1 + signingPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= + signingPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= + encrPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= + encrPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= cache: - id: redis + id: cache config: - addr: 192.168.1.1:6379 + addr: redis-bap:6379 schemaValidator: - id: schemavalidator + id: schemav2validator config: - schemaDir: /mnt/gcs/configs/schemas - signer: - id: signer - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever + type: url + location: https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/api/beckn.yaml + cacheTTL: "3600" router: id: router config: - routingConfigPath: /mnt/gcs/configs/bapTxnCaller-routing.yaml + routingConfig: /app/config/bapTxnCaller-routing.yaml + signer: + id: signer middleware: - id: reqpreprocessor config: - contextKeys: transaction_id,message_id + contextKeys: transaction_id,message_id,parent_id role: bap + steps: - validateSchema - addRoute - - sign - - name: bppTxnReciever - path: /bpp/reciever/ - handler: - type: std - role: bpp - subscriberId: bpp1 - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signValidator: - id: signvalidator - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bppTxnReciever-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bpp - steps: - - validateSign - - addRoute - - validateSchema - - name: bppTxnCaller - path: /bpp/caller/ - handler: - type: std - role: bpp - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signer: - id: signer - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bppTxnCaller-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bpp - steps: - - validateSchema - - addRoute - - sign + - sign \ No newline at end of file diff --git a/config/onix/audit-fields.yaml b/config/onix/audit-fields.yaml new file mode 100644 index 0000000..3e332a2 --- /dev/null +++ b/config/onix/audit-fields.yaml @@ -0,0 +1,24 @@ +auditRules: + default: + - context.transaction_id + - context.message_id + - context.action + - context.domain + - context.bap_id + - context.bpp_id + + search: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.intent + + select: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:buyer.beckn:id + - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:id + diff --git a/config/onix/bapTxnCaller-routing.yaml b/config/onix/bapTxnCaller-routing.yaml index b1d5a44..404d0fe 100644 --- a/config/onix/bapTxnCaller-routing.yaml +++ b/config/onix/bapTxnCaller-routing.yaml @@ -1,25 +1,41 @@ +# ONIX BAP Caller Routing Configuration + +# Supports Phase 1 (Discover Aggregation) and Phase 2+ (Direct BPP Routing) + +# Phase 1: Discover (Aggregation via CDS) + +# Phase 2+: Other Requests (Direct to BPP, NO CDS involvement) + +# These routes use bpp_uri from context (provided in on_discover aggregated response) + routingRules: - - domain: "ONDC:TRV10" - version: "2.0.0" - routingType: "bpp" + + # Phase 1: Discover to CDS + + - domain: ev_charging_network + version: "1.0.0" + targetType: url target: - url: "https://gateway.example.com" + url: http://mock-cds:8082/csd + excludeAction: false endpoints: - - search - - domain: "ONDC:TRV10" - version: "2.0.0" - routingType: "bpp" + - discover + + + + # Phase 2+: Other actions to BPP (via context_endpoint) + + - domain: ev_charging_network + version: "1.0.0" + targetType: bpp + target: {} endpoints: - select - init - confirm - status + - track - cancel - - domain: "ONDC:TRV12" - version: "2.0.0" - routingType: "bpp" - endpoints: - - select - - init - - confirm - - status \ No newline at end of file + - update + - rating + - support diff --git a/config/onix/bapTxnReciever-routing.yaml b/config/onix/bapTxnReciever-routing.yaml index ca4a478..dfdaa81 100644 --- a/config/onix/bapTxnReciever-routing.yaml +++ b/config/onix/bapTxnReciever-routing.yaml @@ -1,20 +1,47 @@ +# ONIX BAP Receiver Routing Configuration + +# Supports Phase 1 (Discover Aggregation) and Phase 2+ (Direct BPP Callbacks) + + + +# Phase 1: Discover Aggregation + +# Phase 2+: Other Callbacks (Direct from BPPs to BAP, NO CDS involvement) + +# These routes use bap_uri from context to route callbacks back to originating BAP + + + routingRules: - - domain: "ONDC:TRV10" - version: "2.0.0" - routingType: "url" + + # Phase 1: on_discover callback to BAP (routed to mock-bap for testing) + - domain: ev_charging_network + version: "1.0.0" + targetType: url target: - url: "https://services-backend/trv/v1" + url: http://mock-bap:9001 + excludeAction: false + endpoints: + - on_discover + + + + + # Phase 2+: Other callbacks to BAP (routed to mock-bap for testing) + + - domain: ev_charging_network + version: "1.0.0" + targetType: url + target: + url: http://mock-bap:9001 + excludeAction: false endpoints: - on_select - on_init - on_confirm - on_status - - on_update + - on_track - on_cancel - - domain: "ONDC:TRV10" - version: "2.0.0" - routingType: "msgq" - target: - topic_id: "trv_topic_id1" - endpoints: - - on_search \ No newline at end of file + - on_update + - on_rating + - on_support diff --git a/core/module/handler/http_metric.go b/core/module/handler/http_metric.go new file mode 100644 index 0000000..54e3323 --- /dev/null +++ b/core/module/handler/http_metric.go @@ -0,0 +1,120 @@ +package handler + +import ( + "context" + "fmt" + "net/http" + "strconv" + "sync" + "time" + + "github.com/beckn-one/beckn-onix/pkg/telemetry" + "github.com/google/uuid" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +type HTTPMetrics struct { + HttpRequestCount metric.Int64Counter +} + +var ( + httlMetricsInstance *HTTPMetrics + httpMetricsOnce sync.Once + httpMetricsErr error +) + +func newHTTPMetrics() (*HTTPMetrics, error) { + + meter := otel.GetMeterProvider().Meter(telemetry.ScopeName, + metric.WithInstrumentationVersion(telemetry.ScopeVersion)) + m := &HTTPMetrics{} + var err error + + if m.HttpRequestCount, err = meter.Int64Counter( + "onix_http_request_count", + metric.WithDescription("Total HTTP requests by status, route, method, role and calle "), + metric.WithUnit("1"), + ); err != nil { + return nil, fmt.Errorf("onix_http_request_count: %w", err) + } + + return m, nil +} + +func GetHTTPMetrics(ctx context.Context) (*HTTPMetrics, error) { + httpMetricsOnce.Do(func() { + httlMetricsInstance, httpMetricsErr = newHTTPMetrics() + }) + return httlMetricsInstance, httpMetricsErr +} + +// StatusClass returns the HTTP status class string (e.g. 200 -> "2xx"). +func StatusClass(statusCode int) string { + switch { + case statusCode >= 100 && statusCode < 200: + return "1xx" + case statusCode >= 200 && statusCode < 300: + return "2xx" + case statusCode >= 300 && statusCode < 400: + return "3xx" + case statusCode >= 400 && statusCode < 500: + return "4xx" + default: + return "5xx" + } +} + +func RecordHTTPRequest(ctx context.Context, statusCode int, action, role, caller string) { + m, err := GetHTTPMetrics(ctx) + if err != nil || m == nil { + return + } + status := StatusClass(statusCode) + attributes := []attribute.KeyValue{ + telemetry.AttrHTTPStatus.String(status), + telemetry.AttrAction.String(action), + telemetry.AttrRole.String(role), + telemetry.AttrCaller.String(caller), + } + + metric_code := action + "_api_total_count" + category := "NetworkHealth" + if action == "/search" || action == "/discovery" { + category = "Discovery" + } + attributes = append(attributes, specHttpMetricAttr(metric_code, category)...) //TODO: need to update as per the furthur discussion + m.HttpRequestCount.Add(ctx, 1, metric.WithAttributes(attributes...)) +} + +type responseRecorder struct { + http.ResponseWriter + statusCode int + written bool + record func() +} + +func (r *responseRecorder) WriteHeader(statusCode int) { + if !r.written { + r.written = true + r.statusCode = statusCode + if r.record != nil { + r.record() + } + } + r.ResponseWriter.WriteHeader(statusCode) +} + +func specHttpMetricAttr(metricCode, category string) []attribute.KeyValue { + + granularity, frequency := telemetry.GetNetworkMetricsConfig() + return []attribute.KeyValue{ + telemetry.AttrMetricUUID.String(uuid.New().String()), + telemetry.AttrMetricCode.String(metricCode), + telemetry.AttrMetricCategory.String(category), + telemetry.AttrMetricGranularity.String(granularity), + telemetry.AttrMetricFrequency.String(frequency), + telemetry.AttrObservedTimeUnixNano.String(strconv.FormatInt(time.Now().UnixNano(), 10)), + } +} diff --git a/core/module/handler/stdHandler.go b/core/module/handler/stdHandler.go index 9e9fefc..ab50ffb 100644 --- a/core/module/handler/stdHandler.go +++ b/core/module/handler/stdHandler.go @@ -7,12 +7,22 @@ import ( "io" "net/http" "net/http/httputil" + "strconv" + "time" "github.com/beckn-one/beckn-onix/pkg/log" "github.com/beckn-one/beckn-onix/pkg/model" "github.com/beckn-one/beckn-onix/pkg/plugin" "github.com/beckn-one/beckn-onix/pkg/plugin/definition" "github.com/beckn-one/beckn-onix/pkg/response" + "github.com/beckn-one/beckn-onix/pkg/telemetry" + "github.com/google/uuid" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + auditlog "go.opentelemetry.io/otel/log" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/trace" ) // stdHandler orchestrates the execution of defined processing steps. @@ -94,31 +104,74 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { r.Header.Del("X-Role") }() - ctx, err := h.stepCtx(r, w.Header()) + // to start a new trace + propagator := otel.GetTextMapPropagator() + traceCtx := propagator.Extract(r.Context(), propagation.HeaderCarrier(r.Header)) + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) + spanName := r.URL.Path + traceCtx, span := tracer.Start(traceCtx, spanName, trace.WithSpanKind(trace.SpanKindServer)) + + //to build the request with trace + r = r.WithContext(traceCtx) + + var recordOnce func() + wrapped := &responseRecorder{ + ResponseWriter: w, + statusCode: http.StatusOK, + record: nil, + } + + caller := "unknown" + if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok && v != "" { + caller = v + } + httpMeter, _ := GetHTTPMetrics(r.Context()) + if httpMeter != nil { + recordOnce = func() { + RecordHTTPRequest(r.Context(), wrapped.statusCode, r.URL.Path, string(h.role), caller) + } + wrapped.record = recordOnce + } + + // set beckn attribute + setBecknAttr(span, r, h) + + stepCtx, err := h.stepCtx(r, w.Header()) if err != nil { log.Errorf(r.Context(), err, "stepCtx(r):%v", err) - response.SendNack(r.Context(), w, err) + response.SendNack(r.Context(), wrapped, err) return } - log.Request(r.Context(), r, ctx.Body) + log.Request(r.Context(), r, stepCtx.Body) + + defer func() { + span.SetAttributes(attribute.Int("http.response.status_code", wrapped.statusCode), attribute.String("observedTimeUnixNano", strconv.FormatInt(time.Now().UnixNano(), 10))) + if wrapped.statusCode < 200 || wrapped.statusCode >= 400 { + span.SetStatus(codes.Error, "status code is invalid") + } + + body := stepCtx.Body + go telemetry.EmitAuditLogs(r.Context(), body, auditlog.Int("http.response.status_code", wrapped.statusCode)) + span.End() + }() // Execute processing steps. for _, step := range h.steps { - if err := step.Run(ctx); err != nil { - log.Errorf(ctx, err, "%T.run():%v", step, err) - response.SendNack(ctx, w, err) + if err := step.Run(stepCtx); err != nil { + log.Errorf(stepCtx, err, "%T.run():%v", step, err) + response.SendNack(stepCtx, wrapped, err) return } } // Restore request body before forwarding or publishing. - r.Body = io.NopCloser(bytes.NewReader(ctx.Body)) - if ctx.Route == nil { + r.Body = io.NopCloser(bytes.NewReader(stepCtx.Body)) + if stepCtx.Route == nil { response.SendAck(w) return } // Handle routing based on the defined route type. - route(ctx, r, w, h.publisher, h.httpClient) + route(stepCtx, r, wrapped, h.publisher, h.httpClient) } // stepCtx creates a new StepContext for processing an HTTP request. @@ -321,3 +374,42 @@ func (h *stdHandler) initSteps(ctx context.Context, mgr PluginManager, cfg *Conf log.Infof(ctx, "Processor steps initialized: %v", cfg.Steps) return nil } + +func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { + recipientID := h.SubscriberID + + if v, ok := r.Context().Value(model.ContextKeySubscriberID).(string); ok { + recipientID = v + } + senderID := "" + if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok { + senderID = v + } + + attrs := []attribute.KeyValue{ + attribute.String("recipient.id", recipientID), + attribute.String("sender.id", senderID), + attribute.String("span_uuid", uuid.New().String()), + attribute.String("http.request.method", r.Method), + attribute.String("http.route", r.URL.Path), + } + + if trxID, ok := r.Context().Value(model.ContextKeyTxnID).(string); ok { + attrs = append(attrs, attribute.String("transaction_id", trxID)) + } + if mesID, ok := r.Context().Value(model.ContextKeyMsgID).(string); ok { + attrs = append(attrs, attribute.String("message_id", mesID)) + } + if parentID, ok := r.Context().Value(model.ContextKeyParentID).(string); ok && parentID != "" { + attrs = append(attrs, attribute.String("parentSpanId", parentID)) + } + if r.Host != "" { + attrs = append(attrs, attribute.String("server.address", r.Host)) + } + + if ua := r.UserAgent(); ua != "" { + attrs = append(attrs, attribute.String("user_agent.original", ua)) + } + + span.SetAttributes(attrs...) +} diff --git a/core/module/handler/step.go b/core/module/handler/step.go index f985031..2ea061f 100644 --- a/core/module/handler/step.go +++ b/core/module/handler/step.go @@ -7,7 +7,9 @@ import ( "strings" "time" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" "github.com/beckn-one/beckn-onix/pkg/log" "github.com/beckn-one/beckn-onix/pkg/model" @@ -38,24 +40,42 @@ func (s *signStep) Run(ctx *model.StepContext) error { if len(ctx.SubID) == 0 { return model.NewBadReqErr(fmt.Errorf("subscriberID not set")) } - keySet, err := s.km.Keyset(ctx, ctx.SubID) - if err != nil { - return fmt.Errorf("failed to get signing key: %w", err) - } - createdAt := time.Now().Unix() - validTill := time.Now().Add(5 * time.Minute).Unix() - sign, err := s.signer.Sign(ctx, ctx.Body, keySet.SigningPrivate, createdAt, validTill) - if err != nil { - return fmt.Errorf("failed to sign request: %w", err) + + tracer := otel.Tracer("beckn-onix") + + var keySet *model.Keyset + { + // to create span to finding the key set + keySetCtx, keySetSpan := tracer.Start(ctx.Context, "keyset") + defer keySetSpan.End() + ks, err := s.km.Keyset(keySetCtx, ctx.SubID) + if err != nil { + return fmt.Errorf("failed to get signing key: %w", err) + } + keySet = ks + } - authHeader := s.generateAuthHeader(ctx.SubID, keySet.UniqueKeyID, createdAt, validTill, sign) - log.Debugf(ctx, "Signature generated: %v", sign) - header := model.AuthHeaderSubscriber - if ctx.Role == model.RoleGateway { - header = model.AuthHeaderGateway + { + // to create span for the signa + signerCtx, signerSpan := tracer.Start(ctx.Context, "sign") + defer signerSpan.End() + createdAt := time.Now().Unix() + validTill := time.Now().Add(5 * time.Minute).Unix() + sign, err := s.signer.Sign(signerCtx, ctx.Body, keySet.SigningPrivate, createdAt, validTill) + if err != nil { + return fmt.Errorf("failed to sign request: %w", err) + } + authHeader := s.generateAuthHeader(ctx.SubID, keySet.UniqueKeyID, createdAt, validTill, sign) + log.Debugf(ctx, "Signature generated: %v", sign) + header := model.AuthHeaderSubscriber + if ctx.Role == model.RoleGateway { + header = model.AuthHeaderGateway + } + ctx.Request.Header.Set(header, authHeader) + } - ctx.Request.Header.Set(header, authHeader) + return nil } @@ -93,8 +113,20 @@ func newValidateSignStep(signValidator definition.SignValidator, km definition.K // Run executes the validation step. func (s *validateSignStep) Run(ctx *model.StepContext) error { - err := s.validateHeaders(ctx) - s.recordMetrics(ctx, err) + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) + spanCtx, span := tracer.Start(ctx.Context, "validate-sign") + defer span.End() + stepCtx := &model.StepContext{ + Context: spanCtx, + Request: ctx.Request, + Body: ctx.Body, + Role: ctx.Role, + SubID: ctx.SubID, + RespHeader: ctx.RespHeader, + Route: ctx.Route, + } + err := s.validateHeaders(stepCtx) + s.recordMetrics(stepCtx, err) return err } diff --git a/core/module/handler/step_instrumentor.go b/core/module/handler/step_instrumentor.go index 0869304..8b1787f 100644 --- a/core/module/handler/step_instrumentor.go +++ b/core/module/handler/step_instrumentor.go @@ -6,8 +6,10 @@ import ( "fmt" "time" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" "github.com/beckn-one/beckn-onix/pkg/log" "github.com/beckn-one/beckn-onix/pkg/model" @@ -52,18 +54,34 @@ func (is *InstrumentedStep) Run(ctx *model.StepContext) error { return is.step.Run(ctx) } + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) + stepName := "step:" + is.stepName + spanCtx, span := tracer.Start(ctx.Context, stepName) + defer span.End() + + // run step with context that contains the step span + stepCtx := &model.StepContext{ + Context: spanCtx, + Request: ctx.Request, + Body: ctx.Body, + Role: ctx.Role, + SubID: ctx.SubID, + RespHeader: ctx.RespHeader, + Route: ctx.Route, + } + start := time.Now() - err := is.step.Run(ctx) + err := is.step.Run(stepCtx) duration := time.Since(start).Seconds() attrs := []attribute.KeyValue{ telemetry.AttrModule.String(is.moduleName), telemetry.AttrStep.String(is.stepName), - telemetry.AttrRole.String(string(ctx.Role)), + telemetry.AttrRole.String(string(stepCtx.Role)), } - is.metrics.StepExecutionTotal.Add(ctx.Context, 1, metric.WithAttributes(attrs...)) - is.metrics.StepExecutionDuration.Record(ctx.Context, duration, metric.WithAttributes(attrs...)) + is.metrics.StepExecutionTotal.Add(stepCtx.Context, 1, metric.WithAttributes(attrs...)) + is.metrics.StepExecutionDuration.Record(stepCtx.Context, duration, metric.WithAttributes(attrs...)) if err != nil { errorType := fmt.Sprintf("%T", err) @@ -75,10 +93,12 @@ func (is *InstrumentedStep) Run(ctx *model.StepContext) error { } errorAttrs := append(attrs, telemetry.AttrErrorType.String(errorType)) - is.metrics.StepErrorsTotal.Add(ctx.Context, 1, metric.WithAttributes(errorAttrs...)) - log.Errorf(ctx.Context, err, "Step %s failed", is.stepName) + is.metrics.StepErrorsTotal.Add(stepCtx.Context, 1, metric.WithAttributes(errorAttrs...)) + log.Errorf(stepCtx.Context, err, "Step %s failed", is.stepName) } + if stepCtx.Route != nil { + ctx.Route = stepCtx.Route + } return err } - diff --git a/core/module/handler/step_metrics.go b/core/module/handler/step_metrics.go index e3fc418..a4c3f74 100644 --- a/core/module/handler/step_metrics.go +++ b/core/module/handler/step_metrics.go @@ -5,6 +5,7 @@ import ( "fmt" "sync" + "github.com/beckn-one/beckn-onix/pkg/telemetry" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/metric" ) @@ -32,8 +33,8 @@ func GetStepMetrics(ctx context.Context) (*StepMetrics, error) { func newStepMetrics() (*StepMetrics, error) { meter := otel.GetMeterProvider().Meter( - "github.com/beckn-one/beckn-onix/telemetry", - metric.WithInstrumentationVersion("1.0.0"), + telemetry.ScopeName, + metric.WithInstrumentationVersion(telemetry.ScopeVersion), ) m := &StepMetrics{} @@ -66,4 +67,3 @@ func newStepMetrics() (*StepMetrics, error) { return m, nil } - diff --git a/core/module/handler/step_metrics_test.go b/core/module/handler/step_metrics_test.go index 777821b..c2498f6 100644 --- a/core/module/handler/step_metrics_test.go +++ b/core/module/handler/step_metrics_test.go @@ -2,7 +2,7 @@ package handler import ( "context" - "net/http/httptest" + "sync" "testing" "go.opentelemetry.io/otel/metric" @@ -103,11 +103,8 @@ func TestStepMetrics_Instruments(t *testing.T) { metric.WithAttributes(telemetry.AttrStep.String("test-step"), telemetry.AttrModule.String("test-module"))) }, "StepErrorsTotal.Add should not panic") - // Verify metrics are exposed via HTTP handler - rec := httptest.NewRecorder() - req := httptest.NewRequest("GET", "/metrics", nil) - provider.MetricsHandler.ServeHTTP(rec, req) - assert.Equal(t, 200, rec.Code, "Metrics endpoint should return 200") + // MeterProvider is set by NewTestProvider; metrics are recorded via OTel SDK + assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set") } func TestStepMetrics_MultipleCalls(t *testing.T) { @@ -129,3 +126,113 @@ func TestStepMetrics_MultipleCalls(t *testing.T) { } } +func TestStepMetrics_RecordWithDifferentAttributes(t *testing.T) { + ctx := context.Background() + provider, err := telemetry.NewTestProvider(ctx) + require.NoError(t, err) + defer provider.Shutdown(context.Background()) + + metrics, err := GetStepMetrics(ctx) + require.NoError(t, err) + require.NotNil(t, metrics) + + attrsList := []struct { + step string + module string + }{ + {"test-step", "test-module"}, + {"", "module-only"}, + {"step-only", ""}, + {"", ""}, + {"long-step-name-with-many-parts", "long-module-name"}, + } + + for _, a := range attrsList { + attrs := metric.WithAttributes( + telemetry.AttrStep.String(a.step), + telemetry.AttrModule.String(a.module), + ) + require.NotPanics(t, func() { + metrics.StepExecutionDuration.Record(ctx, 0.01, attrs) + metrics.StepExecutionTotal.Add(ctx, 1, attrs) + metrics.StepErrorsTotal.Add(ctx, 0, attrs) + }, "Recording with step=%q module=%q should not panic", a.step, a.module) + } +} + +func TestStepMetrics_DurationValues(t *testing.T) { + ctx := context.Background() + provider, err := telemetry.NewTestProvider(ctx) + require.NoError(t, err) + defer provider.Shutdown(context.Background()) + + metrics, err := GetStepMetrics(ctx) + require.NoError(t, err) + require.NotNil(t, metrics) + + attrs := metric.WithAttributes( + telemetry.AttrStep.String("test-step"), + telemetry.AttrModule.String("test-module"), + ) + + durations := []float64{0, 0.0005, 0.001, 0.01, 0.1, 0.5} + for _, d := range durations { + d := d + require.NotPanics(t, func() { + metrics.StepExecutionDuration.Record(ctx, d, attrs) + }, "StepExecutionDuration.Record(%.4f) should not panic", d) + } +} + +func TestStepMetrics_ConcurrentRecord(t *testing.T) { + ctx := context.Background() + provider, err := telemetry.NewTestProvider(ctx) + require.NoError(t, err) + defer provider.Shutdown(context.Background()) + + metrics, err := GetStepMetrics(ctx) + require.NoError(t, err) + require.NotNil(t, metrics) + + attrs := metric.WithAttributes( + telemetry.AttrStep.String("concurrent-step"), + telemetry.AttrModule.String("concurrent-module"), + ) + + var wg sync.WaitGroup + for i := 0; i < 20; i++ { + wg.Add(1) + go func() { + defer wg.Done() + metrics.StepExecutionDuration.Record(ctx, 0.05, attrs) + metrics.StepExecutionTotal.Add(ctx, 1, attrs) + metrics.StepErrorsTotal.Add(ctx, 0, attrs) + }() + } + wg.Wait() +} + +func TestStepMetrics_WithTraceProvider(t *testing.T) { + ctx := context.Background() + provider, sr, err := telemetry.NewTestProviderWithTrace(ctx) + require.NoError(t, err) + require.NotNil(t, provider) + require.NotNil(t, sr) + defer provider.Shutdown(ctx) + + metrics, err := GetStepMetrics(ctx) + require.NoError(t, err) + require.NotNil(t, metrics) + assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set") + assert.NotNil(t, provider.TraceProvider, "TraceProvider should be set") + + attrs := metric.WithAttributes( + telemetry.AttrStep.String("trace-test-step"), + telemetry.AttrModule.String("trace-test-module"), + ) + require.NotPanics(t, func() { + metrics.StepExecutionDuration.Record(ctx, 0.1, attrs) + metrics.StepExecutionTotal.Add(ctx, 1, attrs) + metrics.StepErrorsTotal.Add(ctx, 0, attrs) + }, "Step metrics should work when trace provider is also set") +} diff --git a/go.mod b/go.mod index b53f3db..2030ae7 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.24.0 require ( github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 - golang.org/x/crypto v0.36.0 + golang.org/x/crypto v0.47.0 ) require github.com/stretchr/testify v1.11.1 @@ -19,18 +19,20 @@ require ( require github.com/zenazn/pkcs7pad v0.0.0-20170308005700-253a5b1f0e03 -require golang.org/x/text v0.26.0 // indirect +require golang.org/x/text v0.33.0 // indirect require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect - github.com/go-jose/go-jose/v4 v4.0.1 // indirect + github.com/go-jose/go-jose/v4 v4.1.3 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-rootcerts v1.0.2 // indirect @@ -55,12 +57,17 @@ require ( github.com/redis/go-redis/extra/rediscmd/v9 v9.16.0 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect github.com/woodsbury/decimal128 v1.3.0 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/otel/trace v1.38.0 // indirect - golang.org/x/net v0.38.0 // indirect - golang.org/x/sys v0.38.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.16.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect + go.opentelemetry.io/otel/log v0.16.0 // indirect + go.opentelemetry.io/proto/otlp v1.9.0 // indirect + golang.org/x/net v0.49.0 // indirect + golang.org/x/sys v0.40.0 // indirect golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1 // indirect - google.golang.org/protobuf v1.32.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect + google.golang.org/protobuf v1.36.11 // indirect ) require ( @@ -74,13 +81,18 @@ require ( github.com/redis/go-redis/extra/redisotel/v9 v9.16.0 github.com/redis/go-redis/v9 v9.16.0 github.com/rs/zerolog v1.34.0 - go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 - go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/contrib/instrumentation/runtime v0.64.0 + go.opentelemetry.io/otel v1.40.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0 go.opentelemetry.io/otel/exporters/prometheus v0.46.0 - go.opentelemetry.io/otel/metric v1.38.0 - go.opentelemetry.io/otel/sdk v1.38.0 - go.opentelemetry.io/otel/sdk/metric v1.38.0 + go.opentelemetry.io/otel/metric v1.40.0 + go.opentelemetry.io/otel/sdk v1.40.0 + go.opentelemetry.io/otel/sdk/log v0.16.0 + go.opentelemetry.io/otel/sdk/metric v1.40.0 + go.opentelemetry.io/otel/trace v1.40.0 go.uber.org/automaxprocs v1.6.0 + google.golang.org/grpc v1.78.0 gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v2 v2.4.0 ) diff --git a/go.sum b/go.sum index e684441..0e2eb1a 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= @@ -24,8 +26,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/getkin/kin-openapi v0.133.0 h1:pJdmNohVIJ97r4AUFtEXRXwESr8b0bD721u/Tz6k8PQ= github.com/getkin/kin-openapi v0.133.0/go.mod h1:boAciF6cXk5FhPqe/NQeBTeenbjqU4LhWBf09ILVvWE= -github.com/go-jose/go-jose/v4 v4.0.1 h1:QVEPDE3OluqXBQZDcnNvQrInro2h0e4eqNbnZSWqS6U= -github.com/go-jose/go-jose/v4 v4.0.1/go.mod h1:WVf9LFMHh/QVrmqrOfqun0C45tMe3RoiKJMPvgWwLfY= +github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= +github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -38,10 +40,16 @@ github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM= github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 h1:X+2YciYSxvMQK0UZ7sg45ZVabVZBeBuvMkmuI2V3Fak= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fmfp2l3+ZkQ2me/+oecHYLOII= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -125,8 +133,8 @@ github.com/redis/go-redis/extra/redisotel/v9 v9.16.0 h1:+a9h9qxFXdf3gX0FXnDcz7X4 github.com/redis/go-redis/extra/redisotel/v9 v9.16.0/go.mod h1:EtTTC7vnKWgznfG6kBgl9ySLqd7NckRCFUBzVXdeHeI= github.com/redis/go-redis/v9 v9.16.0 h1:OotgqgLSRCmzfqChbQyG1PHC3tLNR89DG4jdOERSEP4= github.com/redis/go-redis/v9 v9.16.0/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370= -github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= -github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= @@ -147,42 +155,78 @@ github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIj github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= github.com/zenazn/pkcs7pad v0.0.0-20170308005700-253a5b1f0e03 h1:m1h+vudopHsI67FPT9MOncyndWhTcdUoBtI1R1uajGY= github.com/zenazn/pkcs7pad v0.0.0-20170308005700-253a5b1f0e03/go.mod h1:8sheVFH84v3PCyFY/O02mIgSQY9I6wMYPWsq7mDnEZY= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 h1:PeBoRj6af6xMI7qCupwFvTbbnd49V7n5YpG6pg8iDYQ= -go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0/go.mod h1:ingqBCtMCe8I4vpz/UVzCW6sxoqgZB37nao91mLQ3Bw= -go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= -go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/runtime v0.64.0 h1:/+/+UjlXjFcdDlXxKL1PouzX8Z2Vl0OxolRKeBEgYDw= +go.opentelemetry.io/contrib/instrumentation/runtime v0.64.0/go.mod h1:Ldm/PDuzY2DP7IypudopCR3OCOW42NJlN9+mNEroevo= +go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.16.0 h1:ZVg+kCXxd9LtAaQNKBxAvJ5NpMf7LpvEr4MIZqb0TMQ= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.16.0/go.mod h1:hh0tMeZ75CCXrHd9OXRYxTlCAdxcXioWHFIpYw2rZu8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0 h1:cEf8jF6WbuGQWUVcqgyWtTR0kOOAWY1DYZ+UhvdmQPw= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.39.0/go.mod h1:k1lzV5n5U3HkGvTCJHraTAGJ7MqsgL1wrGwTj1Isfiw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 h1:f0cb2XPmrqn4XMy9PNliTgRKJgS5WcL/u0/WRYGz4t0= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0/go.mod h1:vnakAaFckOMiMtOIhFI2MNH4FYrZzXCYxmb1LlhoGz8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0 h1:in9O8ESIOlwJAEGTkkf34DesGRAc/Pn8qJ7k3r/42LM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0/go.mod h1:Rp0EXBm5tfnv0WL+ARyO/PHBEaEAT8UUHQ6AGJcSq6c= go.opentelemetry.io/otel/exporters/prometheus v0.46.0 h1:I8WIFXR351FoLJYuloU4EgXbtNX2URfU/85pUPheIEQ= go.opentelemetry.io/otel/exporters/prometheus v0.46.0/go.mod h1:ztwVUHe5DTR/1v7PeuGRnU5Bbd4QKYwApWmuutKsJSs= -go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= -go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= -go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= -go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= -go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= -go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= -go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= -go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/otel/log v0.16.0 h1:DeuBPqCi6pQwtCK0pO4fvMB5eBq6sNxEnuTs88pjsN4= +go.opentelemetry.io/otel/log v0.16.0/go.mod h1:rWsmqNVTLIA8UnwYVOItjyEZDbKIkMxdQunsIhpUMes= +go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= +go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= +go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= +go.opentelemetry.io/otel/sdk/log v0.16.0 h1:e/b4bdlQwC5fnGtG3dlXUrNOnP7c8YLVSpSfEBIkTnI= +go.opentelemetry.io/otel/sdk/log v0.16.0/go.mod h1:JKfP3T6ycy7QEuv3Hj8oKDy7KItrEkus8XJE6EoSzw4= +go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= +go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= +go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= +go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A= +go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4= go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= -golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU= +golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc= +golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= +golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= -golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1 h1:NusfzzA6yGQ+ua51ck7E3omNUX/JuqbFSaRGqU8CcLI= golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= -google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= +google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= +google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 h1:merA0rdPeUV3YIIfHHcH4qBkiQAc1nfCKSI7lB4cV2M= +google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409/go.mod h1:fl8J1IvUjCilwZzQowmw2b7HQB2eAuYBabMXzWurF+I= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM= +google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig= +google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= +google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/pkg/log/log.go b/pkg/log/log.go index e1c788c..a80ebea 100644 --- a/pkg/log/log.go +++ b/pkg/log/log.go @@ -14,6 +14,7 @@ import ( "github.com/beckn-one/beckn-onix/pkg/model" "github.com/rs/zerolog" + "go.opentelemetry.io/otel/trace" "gopkg.in/natefinch/lumberjack.v2" ) @@ -273,6 +274,11 @@ func Request(ctx context.Context, r *http.Request, body []byte) { // addCtx adds context values to the log event based on configured context keys. func addCtx(ctx context.Context, event *zerolog.Event) { + span := trace.SpanFromContext(ctx) + if span.SpanContext().IsValid() { + event.Str("trace_id", span.SpanContext().TraceID().String()) + event.Str("span_id", span.SpanContext().SpanID().String()) + } for _, key := range cfg.ContextKeys { val, ok := ctx.Value(key).(string) if !ok { diff --git a/pkg/model/model.go b/pkg/model/model.go index de53254..ac3dc5b 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -56,6 +56,9 @@ const ( // ContextKeyParentID is the context key for storing and retrieving the parent ID from a request context ContextKeyParentID ContextKey = "parent_id" + + // ContextKeyCallerID is the context key for the caller who is calling the bap/bpp + ContextKeyCallerID ContextKey = "caller_id" ) var contextKeys = map[string]ContextKey{ @@ -64,6 +67,7 @@ var contextKeys = map[string]ContextKey{ "subscriber_id": ContextKeySubscriberID, "module_id": ContextKeyModuleID, "parent_id": ContextKeyParentID, + "caller_id": ContextKeyCallerID, } // ParseContextKey converts a string into a valid ContextKey. diff --git a/pkg/plugin/implementation/cache/cache.go b/pkg/plugin/implementation/cache/cache.go index fe91c17..334e0f1 100644 --- a/pkg/plugin/implementation/cache/cache.go +++ b/pkg/plugin/implementation/cache/cache.go @@ -8,13 +8,14 @@ import ( "os" "time" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/metric" - "github.com/beckn-one/beckn-onix/pkg/log" "github.com/beckn-one/beckn-onix/pkg/telemetry" "github.com/redis/go-redis/extra/redisotel/v9" "github.com/redis/go-redis/v9" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" ) // RedisCl global variable for the Redis client, can be overridden in tests @@ -103,10 +104,6 @@ func New(ctx context.Context, cfg *Config) (*Cache, func() error, error) { log.Debugf(ctx, "Failed to instrument Redis tracing: %v", err) } - if err := redisotel.InstrumentMetrics(redisClient); err != nil { - // Log error but don't fail - instrumentation is optional - log.Debugf(ctx, "Failed to instrument Redis metrics: %v", err) - } } metrics, _ := GetCacheMetrics(ctx) @@ -141,8 +138,12 @@ func (c *Cache) Get(ctx context.Context, key string) (string, error) { // Set stores the given key-value pair in Redis with the specified TTL (time to live). func (c *Cache) Set(ctx context.Context, key, value string, ttl time.Duration) error { - err := c.Client.Set(ctx, key, value, ttl).Err() - c.recordOperation(ctx, "set", err) + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) + spanCtx, span := tracer.Start(ctx, "redis_set") + defer span.End() + + err := c.Client.Set(spanCtx, key, value, ttl).Err() + c.recordOperation(spanCtx, "set", err) return err } diff --git a/pkg/plugin/implementation/otelsetup/cmd/plugin.go b/pkg/plugin/implementation/otelsetup/cmd/plugin.go index 260231e..a0407d7 100644 --- a/pkg/plugin/implementation/otelsetup/cmd/plugin.go +++ b/pkg/plugin/implementation/otelsetup/cmd/plugin.go @@ -4,9 +4,11 @@ import ( "context" "errors" "strconv" + "strings" "time" "github.com/beckn-one/beckn-onix/pkg/log" + "github.com/beckn-one/beckn-onix/pkg/model" "github.com/beckn-one/beckn-onix/pkg/plugin/implementation/otelsetup" "github.com/beckn-one/beckn-onix/pkg/telemetry" ) @@ -27,31 +29,81 @@ func (m metricsProvider) New(ctx context.Context, config map[string]string) (*te ServiceName: config["serviceName"], ServiceVersion: config["serviceVersion"], Environment: config["environment"], - MetricsPort: config["metricsPort"], + Domain: config["domain"], + OtlpEndpoint: config["otlpEndpoint"], + } + + // to extract the device id from the parent id from context + var deviceId string + var producer string + var producerType string + var err error + if v := ctx.Value(model.ContextKeyParentID); v != nil { + parentID := v.(string) + p := strings.Split(parentID, ":") + deviceId = p[len(p)-1] + producerType = p[0] + producer = p[1] + } + + if deviceId != "" { + telemetryConfig.DeviceID = deviceId + } + + if producer != "" { + telemetryConfig.Producer = producer + } + if producerType != "" { + telemetryConfig.ProducerType = producerType + } + + // Parse enableTracing from config + if enableTracingStr, ok := config["enableTracing"]; ok && enableTracingStr != "" { + telemetryConfig.EnableTracing, err = strconv.ParseBool(enableTracingStr) + if err != nil { + log.Warnf(ctx, "Invalid enableTracing value: %s, defaulting to False", enableTracingStr) + } } // Parse enableMetrics as boolean if enableMetricsStr, ok := config["enableMetrics"]; ok && enableMetricsStr != "" { - enableMetrics, err := strconv.ParseBool(enableMetricsStr) + telemetryConfig.EnableMetrics, err = strconv.ParseBool(enableMetricsStr) if err != nil { - log.Warnf(ctx, "Invalid enableMetrics value '%s', defaulting to true: %v", enableMetricsStr, err) - telemetryConfig.EnableMetrics = true - } else { - telemetryConfig.EnableMetrics = enableMetrics + log.Warnf(ctx, "Invalid enableMetrics value '%s', defaulting to False: %v", enableMetricsStr, err) } - } else { - telemetryConfig.EnableMetrics = true // Default to true if not specified or empty } - // Apply defaults if fields are empty - if telemetryConfig.ServiceName == "" { - telemetryConfig.ServiceName = otelsetup.DefaultConfig().ServiceName + // Parse enableLogs as boolean + if enableLogsStr, ok := config["enableLogs"]; ok && enableLogsStr != "" { + telemetryConfig.EnableLogs, err = strconv.ParseBool(enableLogsStr) + if err != nil { + log.Warnf(ctx, "Invalid enableLogs value '%s', defaulting to False: %v", enableLogsStr, err) + } } - if telemetryConfig.ServiceVersion == "" { - telemetryConfig.ServiceVersion = otelsetup.DefaultConfig().ServiceVersion + + // Parse timeInterval as int + if timeIntervalStr, ok := config["timeInterval"]; ok && timeIntervalStr != "" { + telemetryConfig.TimeInterval, err = strconv.ParseInt(timeIntervalStr, 10, 64) + if err != nil { + log.Warnf(ctx, "Invalid timeInterval value: %s, defaulting to 5 second ", timeIntervalStr) + } + } - if telemetryConfig.Environment == "" { - telemetryConfig.Environment = otelsetup.DefaultConfig().Environment + + // to set fields for audit logs + if v, ok := config["auditFieldsConfig"]; ok && v != "" { + if err := telemetry.LoadAuditFieldRules(ctx, v); err != nil { + log.Warnf(ctx, "Failed to load audit field rules: %v", err) + } + } + + //to set network leval matric frequency and granularity + if v, ok := config["networkMetricsGranularity"]; ok && v != "" { + telemetry.SetNetworkMetricsConfig(v, "") + } + + if v, ok := config["networkMetricsFrequency"]; ok && v != "" { + telemetry.SetNetworkMetricsConfig("", v) } log.Debugf(ctx, "Telemetry config mapped: %+v", telemetryConfig) diff --git a/pkg/plugin/implementation/otelsetup/otelsetup.go b/pkg/plugin/implementation/otelsetup/otelsetup.go index 4b52d78..adccc5e 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup.go @@ -3,23 +3,25 @@ package otelsetup import ( "context" "fmt" - "net" - "net/http" - "sync" - "time" - clientprom "github.com/prometheus/client_golang/prometheus" - clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp" - "go.opentelemetry.io/contrib/instrumentation/runtime" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/attribute" - otelprom "go.opentelemetry.io/otel/exporters/prometheus" - "go.opentelemetry.io/otel/sdk/metric" - "go.opentelemetry.io/otel/sdk/resource" + "time" "github.com/beckn-one/beckn-onix/pkg/log" "github.com/beckn-one/beckn-onix/pkg/plugin" "github.com/beckn-one/beckn-onix/pkg/telemetry" + "go.opentelemetry.io/contrib/instrumentation/runtime" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/log/global" + logsdk "go.opentelemetry.io/otel/sdk/log" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" ) // Setup wires the telemetry provider. This is the concrete implementation @@ -30,9 +32,16 @@ type Setup struct{} type Config struct { ServiceName string `yaml:"serviceName"` ServiceVersion string `yaml:"serviceVersion"` - EnableMetrics bool `yaml:"enableMetrics"` Environment string `yaml:"environment"` - MetricsPort string `yaml:"metricsPort"` + Domain string `yaml:"domain"` + DeviceID string `yaml:"deviceID"` + EnableMetrics bool `yaml:"enableMetrics"` + EnableTracing bool `yaml:"enableTracing"` + EnableLogs bool `yaml:"enableLogs"` + OtlpEndpoint string `yaml:"otlpEndpoint"` + TimeInterval int64 `yaml:"timeInterval"` + Producer string `yaml:"producer"` + ProducerType string `yaml:"producerType"` } // DefaultConfig returns sensible defaults for telemetry configuration. @@ -40,9 +49,11 @@ func DefaultConfig() *Config { return &Config{ ServiceName: "beckn-onix", ServiceVersion: "dev", - EnableMetrics: true, Environment: "development", - MetricsPort: "9090", + Domain: "", + DeviceID: "beckn-onix-device", + OtlpEndpoint: "localhost:4317", + TimeInterval: 5, } } @@ -53,9 +64,11 @@ func ToPluginConfig(cfg *Config) *plugin.Config { Config: map[string]string{ "serviceName": cfg.ServiceName, "serviceVersion": cfg.ServiceVersion, - "enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics), "environment": cfg.Environment, - "metricsPort": cfg.MetricsPort, + "enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics), + "enableTracing": fmt.Sprintf("%t", cfg.EnableTracing), + "otelEndpoint": cfg.OtlpEndpoint, + "deviceID": cfg.DeviceID, }, } } @@ -78,92 +91,126 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) if cfg.Environment == "" { cfg.Environment = DefaultConfig().Environment } - if cfg.MetricsPort == "" { - cfg.MetricsPort = DefaultConfig().MetricsPort + if cfg.Domain == "" { + cfg.Domain = DefaultConfig().Domain + } + if cfg.DeviceID == "" { + cfg.DeviceID = DefaultConfig().DeviceID + } + if cfg.TimeInterval == 0 { + cfg.TimeInterval = DefaultConfig().TimeInterval } - if !cfg.EnableMetrics { - log.Info(ctx, "OpenTelemetry metrics disabled") + if !cfg.EnableMetrics && !cfg.EnableTracing { + log.Info(ctx, "OpenTelemetry metrics and tracing are disabled") return &telemetry.Provider{ Shutdown: func(context.Context) error { return nil }, }, nil } - res, err := resource.New( - ctx, - resource.WithAttributes( - attribute.String("service.name", cfg.ServiceName), - attribute.String("service.version", cfg.ServiceVersion), - attribute.String("deployment.environment", cfg.Environment), - ), - ) + //this will be used by both matric and traces + + // to build resource with envelope metadata + baseAttrs := []attribute.KeyValue{ + attribute.String("service.name", cfg.ServiceName), + attribute.String("service.version", cfg.ServiceVersion), + attribute.String("environment", cfg.Environment), + attribute.String("domain", cfg.Domain), + attribute.String("device_id", cfg.DeviceID), + attribute.String("producerType", cfg.ProducerType), + attribute.String("producer", cfg.Producer), + } + + resMetric, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "METRIC")...)) if err != nil { - return nil, fmt.Errorf("failed to create telemetry resource: %w", err) + return nil, fmt.Errorf("failed to create telemetry resource for matric: %w", err) } - registry := clientprom.NewRegistry() - - exporter, err := otelprom.New( - otelprom.WithRegisterer(registry), - otelprom.WithoutUnits(), - otelprom.WithoutScopeInfo(), - ) - if err != nil { - return nil, fmt.Errorf("failed to create prometheus exporter: %w", err) - } - - meterProvider := metric.NewMeterProvider( - metric.WithReader(exporter), - metric.WithResource(res), - ) - - otel.SetMeterProvider(meterProvider) - log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s", - cfg.ServiceName, cfg.ServiceVersion, cfg.Environment) - - if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(0)); err != nil { - log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err) - } - - // Create metrics handler - metricsHandler := clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}) - - // Create and start metrics HTTP server - metricsMux := http.NewServeMux() - metricsMux.Handle("/metrics", metricsHandler) - - metricsServer := &http.Server{ - Addr: net.JoinHostPort("", cfg.MetricsPort), - Handler: metricsMux, - ReadTimeout: 10 * time.Second, - WriteTimeout: 10 * time.Second, - IdleTimeout: 30 * time.Second, - } - - var serverWg sync.WaitGroup - serverWg.Add(1) - go func() { - defer serverWg.Done() - log.Infof(ctx, "Metrics server listening on %s", metricsServer.Addr) - if err := metricsServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { - log.Errorf(ctx, fmt.Errorf("metrics server ListenAndServe: %w", err), "error listening and serving metrics") + //OTLP matric + var meterProvider *metric.MeterProvider + if cfg.EnableMetrics { + metricExpoter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(cfg.OtlpEndpoint), + otlpmetricgrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials()))) + if err != nil { + return nil, fmt.Errorf("failed to create OTLP metric exporter: %w", err) } - }() + reader := metric.NewPeriodicReader(metricExpoter, metric.WithInterval(time.Second*time.Duration(cfg.TimeInterval))) + meterProvider = metric.NewMeterProvider(metric.WithReader(reader), metric.WithResource(resMetric)) + otel.SetMeterProvider(meterProvider) + log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s (OTLP endpoint=%s)", + cfg.ServiceName, cfg.ServiceVersion, cfg.Environment, cfg.OtlpEndpoint) + // for the go runtime matrics + if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(runtime.DefaultMinimumReadMemStatsInterval)); err != nil { + log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err) + } + } + + //OTLP traces + restrace, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "API")...)) + if err != nil { + return nil, fmt.Errorf("failed to create trace resource: %w", err) + } + var traceProvider *trace.TracerProvider + if cfg.EnableTracing { + traceExpoter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithEndpoint(cfg.OtlpEndpoint), otlptracegrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials()))) + if err != nil { + return nil, fmt.Errorf("failed to create OTLP trace exporter: %w", err) + } + traceProvider = trace.NewTracerProvider(trace.WithBatcher(traceExpoter), trace.WithResource(restrace)) //TODO: need to add the trace sampleing rate + otel.SetTracerProvider(traceProvider) + log.Infof(ctx, "OpenTelemetry tracing initialized for service=%s (OTLP endpoint=%s)", + cfg.ServiceName, cfg.OtlpEndpoint) + } + + resAudit, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "AUDIT")...)) + if err != nil { + return nil, fmt.Errorf("failed to create audit resource: %w", err) + } + var logProvider *logsdk.LoggerProvider + if cfg.EnableLogs { + logExporter, err := otlploggrpc.New(ctx, otlploggrpc.WithEndpoint(cfg.OtlpEndpoint), otlploggrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials()))) + if err != nil { + return nil, fmt.Errorf("failed to create OTLP logs exporter: %w", err) + } + processor := logsdk.NewBatchProcessor(logExporter) + logProvider = logsdk.NewLoggerProvider(logsdk.WithProcessor(processor), logsdk.WithResource(resAudit)) + global.SetLoggerProvider(logProvider) + } return &telemetry.Provider{ - MeterProvider: meterProvider, - MetricsHandler: metricsHandler, + MeterProvider: meterProvider, + TraceProvider: traceProvider, + LogProvider: logProvider, Shutdown: func(shutdownCtx context.Context) error { - log.Infof(ctx, "Shutting down metrics server...") - // Shutdown the metrics server - serverShutdownCtx, cancel := context.WithTimeout(shutdownCtx, 10*time.Second) - defer cancel() - if err := metricsServer.Shutdown(serverShutdownCtx); err != nil { - log.Errorf(ctx, fmt.Errorf("metrics server shutdown: %w", err), "error shutting down metrics server") + + var errs []error + if traceProvider != nil { + if err := traceProvider.Shutdown(shutdownCtx); err != nil { + errs = append(errs, fmt.Errorf("tracer shutdown: %w", err)) + } } - serverWg.Wait() - // Shutdown the meter provider - return meterProvider.Shutdown(shutdownCtx) + if meterProvider != nil { + if err := meterProvider.Shutdown(shutdownCtx); err != nil { + errs = append(errs, fmt.Errorf("meter shutdown: %w", err)) + } + } + + if logProvider != nil { + if err := logProvider.Shutdown(shutdownCtx); err != nil { + errs = append(errs, fmt.Errorf("logs shutdown: %w", err)) + } + } + if len(errs) > 0 { + return fmt.Errorf("shutdown errors: %v", errs) + } + return nil }, }, nil } + +func buildAtts(base []attribute.KeyValue, eid string) []attribute.KeyValue { + atts := make([]attribute.KeyValue, 0, len(base)+1) + atts = append(atts, base...) + atts = append(atts, attribute.String("eid", eid)) + return atts +} diff --git a/pkg/plugin/implementation/otelsetup/otelsetup_test.go b/pkg/plugin/implementation/otelsetup/otelsetup_test.go index 916b632..81d5afb 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup_test.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup_test.go @@ -22,15 +22,21 @@ func TestSetup_New_Success(t *testing.T) { ServiceName: "test-service", ServiceVersion: "1.0.0", EnableMetrics: true, + EnableTracing: false, Environment: "test", + Domain: "test-domain", + DeviceID: "test-device", + OtlpEndpoint: "localhost:4317", + TimeInterval: 5, }, }, { - name: "Valid config with metrics disabled", + name: "Valid config with metrics and tracing disabled", cfg: &Config{ ServiceName: "test-service", ServiceVersion: "1.0.0", EnableMetrics: false, + EnableTracing: false, Environment: "test", }, }, @@ -40,6 +46,7 @@ func TestSetup_New_Success(t *testing.T) { ServiceName: "", ServiceVersion: "", EnableMetrics: true, + EnableTracing: false, Environment: "", }, }, @@ -56,10 +63,12 @@ func TestSetup_New_Success(t *testing.T) { if tt.cfg.EnableMetrics { assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set when metrics enabled") } + if tt.cfg.EnableTracing { + assert.NotNil(t, provider.TraceProvider, "TraceProvider should be set when tracing enabled") + } - // Test shutdown - err = provider.Shutdown(ctx) - assert.NoError(t, err, "Shutdown should not return error") + // Shutdown for cleanup. When metrics/tracing are enabled, shutdown may fail without a real OTLP backend. + _ = provider.Shutdown(ctx) }) } } @@ -104,7 +113,10 @@ func TestSetup_New_DefaultValues(t *testing.T) { ServiceName: "", ServiceVersion: "", EnableMetrics: true, + EnableTracing: false, Environment: "", + OtlpEndpoint: "localhost:4317", + TimeInterval: 5, } provider, err := setup.New(ctx, cfg) @@ -114,9 +126,8 @@ func TestSetup_New_DefaultValues(t *testing.T) { // Verify defaults are applied by checking that provider is functional assert.NotNil(t, provider.MeterProvider, "MeterProvider should be set with defaults") - // Cleanup - err = provider.Shutdown(ctx) - assert.NoError(t, err) + // Cleanup (shutdown may fail without a real OTLP backend) + _ = provider.Shutdown(ctx) } func TestSetup_New_MetricsDisabled(t *testing.T) { @@ -127,6 +138,7 @@ func TestSetup_New_MetricsDisabled(t *testing.T) { ServiceName: "test-service", ServiceVersion: "1.0.0", EnableMetrics: false, + EnableTracing: false, Environment: "test", } @@ -134,8 +146,9 @@ func TestSetup_New_MetricsDisabled(t *testing.T) { require.NoError(t, err) require.NotNil(t, provider) - // When metrics are disabled, MetricsHandler should be nil and MeterProvider should be nil + // When metrics and tracing are disabled, MeterProvider and TraceProvider should be nil assert.Nil(t, provider.MeterProvider, "MeterProvider should be nil when metrics disabled") + assert.Nil(t, provider.TraceProvider, "TraceProvider should be nil when tracing disabled") // Shutdown should still work err = provider.Shutdown(ctx) @@ -155,32 +168,42 @@ func TestToPluginConfig_Success(t *testing.T) { ServiceName: "test-service", ServiceVersion: "1.0.0", EnableMetrics: true, + EnableTracing: true, Environment: "test", + Domain: "test-domain", + DeviceID: "test-device", + OtlpEndpoint: "localhost:4317", + TimeInterval: 5, }, expectedID: "otelsetup", expectedConfig: map[string]string{ "serviceName": "test-service", "serviceVersion": "1.0.0", - "enableMetrics": "true", "environment": "test", - "metricsPort": "", + "enableMetrics": "true", + "enableTracing": "true", + "otelEndpoint": "localhost:4317", + "deviceID": "test-device", }, }, { - name: "Config with enableMetrics false", + name: "Config with enableMetrics and enableTracing false", cfg: &Config{ ServiceName: "my-service", ServiceVersion: "2.0.0", EnableMetrics: false, + EnableTracing: false, Environment: "production", }, expectedID: "otelsetup", expectedConfig: map[string]string{ "serviceName": "my-service", "serviceVersion": "2.0.0", - "enableMetrics": "false", "environment": "production", - "metricsPort": "", + "enableMetrics": "false", + "enableTracing": "false", + "otelEndpoint": "", + "deviceID": "", }, }, { @@ -189,15 +212,21 @@ func TestToPluginConfig_Success(t *testing.T) { ServiceName: "", ServiceVersion: "", EnableMetrics: true, + EnableTracing: false, Environment: "", + Domain: "", + DeviceID: "", + OtlpEndpoint: "", }, expectedID: "otelsetup", expectedConfig: map[string]string{ "serviceName": "", "serviceVersion": "", - "enableMetrics": "true", "environment": "", - "metricsPort": "", + "enableMetrics": "true", + "enableTracing": "false", + "otelEndpoint": "", + "deviceID": "", }, }, } @@ -224,19 +253,32 @@ func TestToPluginConfig_NilConfig(t *testing.T) { func TestToPluginConfig_BooleanConversion(t *testing.T) { tests := []struct { - name string - enableMetrics bool - expected string + name string + enableMetrics bool + enableTracing bool + expectedMetric string + expectedTrace string }{ { - name: "EnableMetrics true", - enableMetrics: true, - expected: "true", + name: "EnableMetrics and EnableTracing true", + enableMetrics: true, + enableTracing: true, + expectedMetric: "true", + expectedTrace: "true", }, { - name: "EnableMetrics false", - enableMetrics: false, - expected: "false", + name: "EnableMetrics and EnableTracing false", + enableMetrics: false, + enableTracing: false, + expectedMetric: "false", + expectedTrace: "false", + }, + { + name: "EnableMetrics true, EnableTracing false", + enableMetrics: true, + enableTracing: false, + expectedMetric: "true", + expectedTrace: "false", }, } @@ -246,14 +288,18 @@ func TestToPluginConfig_BooleanConversion(t *testing.T) { ServiceName: "test", ServiceVersion: "1.0.0", EnableMetrics: tt.enableMetrics, + EnableTracing: tt.enableTracing, Environment: "test", - MetricsPort: "", + OtlpEndpoint: "localhost:4317", + DeviceID: "test-device", } result := ToPluginConfig(cfg) require.NotNil(t, result) - assert.Equal(t, tt.expected, result.Config["enableMetrics"], "enableMetrics should be converted to string correctly") - assert.Equal(t, "", result.Config["metricsPort"], "metricsPort should be included even when empty") + assert.Equal(t, tt.expectedMetric, result.Config["enableMetrics"], "enableMetrics should be converted to string correctly") + assert.Equal(t, tt.expectedTrace, result.Config["enableTracing"], "enableTracing should be converted to string correctly") + assert.Equal(t, "localhost:4317", result.Config["otelEndpoint"], "otelEndpoint should be included") + assert.Equal(t, "test-device", result.Config["deviceID"], "deviceID should be included") }) } } diff --git a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go index fa32898..97d74a4 100644 --- a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go +++ b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go @@ -48,6 +48,7 @@ func NewPreProcessor(cfg *Config) (func(http.Handler) http.Handler, error) { http.Error(w, fmt.Sprintf("%s field not found or invalid.", contextKey), http.StatusBadRequest) return } + var subID any switch cfg.Role { case "bap": @@ -55,6 +56,14 @@ func NewPreProcessor(cfg *Config) (func(http.Handler) http.Handler, error) { case "bpp": subID = reqContext["bpp_id"] } + + var callerID any + switch cfg.Role { + case "bap": + callerID = reqContext["bpp_id"] + case "bpp": + callerID = reqContext["bap_id"] + } if subID != nil { log.Debugf(ctx, "adding subscriberId to request:%s, %v", model.ContextKeySubscriberID, subID) ctx = context.WithValue(ctx, model.ContextKeySubscriberID, subID) @@ -64,6 +73,11 @@ func NewPreProcessor(cfg *Config) (func(http.Handler) http.Handler, error) { log.Debugf(ctx, "adding parentID to request:%s, %v", model.ContextKeyParentID, cfg.ParentID) ctx = context.WithValue(ctx, model.ContextKeyParentID, cfg.ParentID) } + + if callerID != nil { + log.Debugf(ctx, "adding callerID to request:%s, %v", model.ContextKeyCallerID, callerID) + ctx = context.WithValue(ctx, model.ContextKeyCallerID, callerID) + } for _, key := range cfg.ContextKeys { ctxKey, _ := model.ParseContextKey(key) if v, ok := reqContext[key]; ok { diff --git a/pkg/plugin/implementation/simplekeymanager/simplekeymanager.go b/pkg/plugin/implementation/simplekeymanager/simplekeymanager.go index 2194250..88a499b 100644 --- a/pkg/plugin/implementation/simplekeymanager/simplekeymanager.go +++ b/pkg/plugin/implementation/simplekeymanager/simplekeymanager.go @@ -15,7 +15,10 @@ import ( "github.com/beckn-one/beckn-onix/pkg/log" "github.com/beckn-one/beckn-onix/pkg/model" "github.com/beckn-one/beckn-onix/pkg/plugin/definition" + "github.com/beckn-one/beckn-onix/pkg/telemetry" "github.com/google/uuid" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/trace" ) // Config holds configuration parameters for SimpleKeyManager. @@ -245,28 +248,43 @@ func (skm *SimpleKeyMgr) LookupNPKeys(ctx context.Context, subscriberID, uniqueK return "", "", err } + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) cacheKey := fmt.Sprintf("%s_%s", subscriberID, uniqueKeyID) - cachedData, err := skm.Cache.Get(ctx, cacheKey) - if err == nil { - var keys model.Keyset - if err := json.Unmarshal([]byte(cachedData), &keys); err == nil { - log.Debugf(ctx, "Found cached keys for subscriber: %s, uniqueKeyID: %s", subscriberID, uniqueKeyID) - return keys.SigningPublic, keys.EncrPublic, nil + var cachedData string + + { + spanCtx, span := tracer.Start(ctx, "redis lookup") + defer span.End() + var err error + cachedData, err = skm.Cache.Get(spanCtx, cacheKey) + if err == nil { + var keys model.Keyset + if err := json.Unmarshal([]byte(cachedData), &keys); err == nil { + log.Debugf(ctx, "Found cached keys for subscriber: %s, uniqueKeyID: %s", subscriberID, uniqueKeyID) + return keys.SigningPublic, keys.EncrPublic, nil + } } } log.Debugf(ctx, "Cache miss, looking up registry for subscriber: %s, uniqueKeyID: %s", subscriberID, uniqueKeyID) - subscribers, err := skm.Registry.Lookup(ctx, &model.Subscription{ - Subscriber: model.Subscriber{ - SubscriberID: subscriberID, - }, - KeyID: uniqueKeyID, - }) - if err != nil { - return "", "", fmt.Errorf("failed to lookup registry: %w", err) - } - if len(subscribers) == 0 { - return "", "", ErrSubscriberNotFound + var subscribers []model.Subscription + { + spanCtx, span := tracer.Start(ctx, "registry lookup") + defer span.End() + var err error + + subscribers, err = skm.Registry.Lookup(spanCtx, &model.Subscription{ + Subscriber: model.Subscriber{ + SubscriberID: subscriberID, + }, + KeyID: uniqueKeyID, + }) + if err != nil { + return "", "", fmt.Errorf("failed to lookup registry: %w", err) + } + if len(subscribers) == 0 { + return "", "", ErrSubscriberNotFound + } } log.Debugf(ctx, "Successfully looked up keys for subscriber: %s, uniqueKeyID: %s", subscriberID, uniqueKeyID) diff --git a/pkg/plugin/manager.go b/pkg/plugin/manager.go index ebc4316..ef00dd8 100644 --- a/pkg/plugin/manager.go +++ b/pkg/plugin/manager.go @@ -197,9 +197,7 @@ func (m *Manager) Middleware(ctx context.Context, cfg *Config) (func(http.Handle return mwp.New(ctx, cfg.Config) } -// OtelSetup initializes OpenTelemetry via a dedicated plugin. The plugin is -// expected to return a telemetry Provider that the core application can use for -// instrumentation. +// OtelSetup initializes OpenTelemetry via a dedicated plugin. The plugin is expected to return a telemetry Provider that the core application can use for instrumentation. func (m *Manager) OtelSetup(ctx context.Context, cfg *Config) (*telemetry.Provider, error) { if cfg == nil { log.Info(ctx, "Telemetry config not provided; skipping OpenTelemetry setup") diff --git a/pkg/telemetry/audit.go b/pkg/telemetry/audit.go new file mode 100644 index 0000000..2398c3c --- /dev/null +++ b/pkg/telemetry/audit.go @@ -0,0 +1,56 @@ +package telemetry + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "time" + + logger "github.com/beckn-one/beckn-onix/pkg/log" + "github.com/beckn-one/beckn-onix/pkg/model" + "github.com/google/uuid" + "go.opentelemetry.io/otel/log" + "go.opentelemetry.io/otel/log/global" +) + +const auditLoggerName = "Beckn_ONIX" + +func EmitAuditLogs(ctx context.Context, body []byte, attrs ...log.KeyValue) { + + provider := global.GetLoggerProvider() + if provider == nil { + logger.Warnf(ctx, "failed to emit audit logs, logs disabled") + return + } + + //maskedBody := MaskPIIInAuditBody(body) + + sum := sha256.Sum256(body) + auditBody := selectAuditPayload(ctx, body) + auditlog := provider.Logger(auditLoggerName) + record := log.Record{} + record.SetBody(log.StringValue(string(auditBody))) + record.SetTimestamp(time.Now()) + record.SetObservedTimestamp(time.Now()) + record.SetSeverity(log.SeverityInfo) + + checkSum := hex.EncodeToString(sum[:]) + + txnID, _ := ctx.Value(model.ContextKeyTxnID).(string) + msgID, _ := ctx.Value(model.ContextKeyMsgID).(string) + parentID, _ := ctx.Value(model.ContextKeyParentID).(string) + + record.AddAttributes( + log.String("checkSum", checkSum), + log.String("log_uuid", uuid.New().String()), + log.String("transaction_id", txnID), + log.String("message_id", msgID), + log.String("parent_id", parentID), + ) + + if len(attrs) > 0 { + record.AddAttributes(attrs...) + } + + auditlog.Emit(ctx, record) +} diff --git a/pkg/telemetry/audit_fields.go b/pkg/telemetry/audit_fields.go new file mode 100644 index 0000000..d5e5635 --- /dev/null +++ b/pkg/telemetry/audit_fields.go @@ -0,0 +1,216 @@ +package telemetry + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + "sync" + + "github.com/beckn-one/beckn-onix/pkg/log" + "gopkg.in/yaml.v3" +) + +type auditFieldsRules struct { + AuditRules map[string][]string `yaml:"auditRules"` +} + +var ( + auditRules = map[string][]string{} + auditRulesMutex sync.RWMutex +) + +func LoadAuditFieldRules(ctx context.Context, configPath string) error { + + if strings.TrimSpace(configPath) == "" { + err := fmt.Errorf("config file path is empty") + log.Error(ctx, err, "there are no audit rules defined") + return err + } + + data, err := os.ReadFile(configPath) + if err != nil { + log.Error(ctx, err, "failed to read audit rules file") + return err + } + + var config auditFieldsRules + if err := yaml.Unmarshal(data, &config); err != nil { + log.Error(ctx, err, "failed to parse audit rules file") + return err + } + + if config.AuditRules == nil { + log.Warn(ctx, "audit rules are not defined") + config.AuditRules = map[string][]string{} + } + + auditRulesMutex.Lock() + auditRules = config.AuditRules + auditRulesMutex.Unlock() + log.Info(ctx, "audit rules loaded") + return nil +} + +func selectAuditPayload(ctx context.Context, body []byte) []byte { + + var root map[string]interface{} + if err := json.Unmarshal(body, &root); err != nil { + log.Warn(ctx, "failed to unmarshal audit payload ") + return nil + } + + action := "" + if c, ok := root["context"].(map[string]interface{}); ok { + if v, ok := c["action"].(string); ok { + action = strings.TrimSpace(v) + } + } + + fields := getFieldForAction(ctx, action) + if len(fields) == 0 { + return nil + } + + out := map[string]interface{}{} + for _, field := range fields { + parts := strings.Split(field, ".") + partial, ok := projectPath(root, parts) + if !ok { + continue + } + merged := deepMerge(out, partial) + if m, ok := merged.(map[string]interface{}); ok { + out = m + } + } + + body, err := json.Marshal(out) + if err != nil { + log.Warn(ctx, "failed to marshal audit payload") + return nil + } + return body +} + +func getFieldForAction(ctx context.Context, action string) []string { + auditRulesMutex.RLock() + defer auditRulesMutex.RUnlock() + + if action != "" { + if fields, ok := auditRules[action]; ok && len(fields) > 0 { + return fields + } + } + + log.Warn(ctx, "audit rules are not defined for this action send default") + return auditRules["default"] +} + +//func getByPath(root map[string]interface{}, path string) (interface{}, bool) { +// +// parts := strings.Split(path, ".") +// var cur interface{} = root +// +// for _, part := range parts { +// m, ok := cur.(map[string]interface{}) +// if !ok { +// return nil, false +// } +// v, ok := m[part] +// if !ok { +// return nil, false +// } +// cur = v +// } +// return cur, true +//} +// +//func setByPath(root map[string]interface{}, path string, value interface{}) { +// parts := strings.Split(path, ".") +// cur := root +// +// for i := 0; i < len(parts)-1; i++ { +// k := parts[i] +// next, ok := cur[k].(map[string]interface{}) +// if !ok { +// next = map[string]interface{}{} +// cur[k] = next +// } +// cur = next +// } +// cur[parts[len(parts)-1]] = value +//} + +func projectPath(cur interface{}, parts []string) (interface{}, bool) { + if len(parts) == 0 { + return cur, true + } + + switch node := cur.(type) { + case map[string]interface{}: + next, ok := node[parts[0]] + if !ok { + return nil, false + } + child, ok := projectPath(next, parts[1:]) + if !ok { + return nil, false + } + return map[string]interface{}{parts[0]: child}, true + + case []interface{}: + out := make([]interface{}, 0, len(node)) + found := false + + for _, n := range node { + child, ok := projectPath(n, parts) + if ok { + out = append(out, child) + found = true + } + } + if !found { + return nil, false + } + return out, true + + default: + return nil, false + } +} +func deepMerge(dst, src interface{}) interface{} { + if dst == nil { + return src + } + + dm, dok := dst.(map[string]interface{}) + sm, sok := src.(map[string]interface{}) + if dok && sok { + for k, sv := range sm { + if dv, ok := dm[k]; ok { + dm[k] = deepMerge(dv, sv) + } else { + dm[k] = sv + } + } + return dm + } + + da, dok := dst.([]interface{}) + sa, sok := src.([]interface{}) + if dok && sok { + if len(da) < len(sa) { + ext := make([]interface{}, len(sa)-len(da)) + da = append(da, ext...) + } + + for i := range sa { + da[i] = deepMerge(da[i], sa[i]) + } + return da + } + + return src +} diff --git a/pkg/telemetry/audit_fields_test.go b/pkg/telemetry/audit_fields_test.go new file mode 100644 index 0000000..17141a7 --- /dev/null +++ b/pkg/telemetry/audit_fields_test.go @@ -0,0 +1,518 @@ +package telemetry + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Test projectPath + +func TestProjectPath_EmptyParts(t *testing.T) { + root := map[string]interface{}{"a": "v"} + got, ok := projectPath(root, nil) + require.True(t, ok) + assert.Equal(t, root, got) + + got, ok = projectPath(root, []string{}) + require.True(t, ok) + assert.Equal(t, root, got) +} + +func TestProjectPath_MapSingleLevel(t *testing.T) { + root := map[string]interface{}{"context": map[string]interface{}{"action": "search"}} + got, ok := projectPath(root, []string{"context"}) + require.True(t, ok) + assert.Equal(t, map[string]interface{}{"context": map[string]interface{}{"action": "search"}}, got) +} + +func TestProjectPath_MapNested(t *testing.T) { + root := map[string]interface{}{ + "context": map[string]interface{}{ + "action": "select", + "transaction_id": "tx-1", + }, + } + got, ok := projectPath(root, []string{"context", "action"}) + require.True(t, ok) + assert.Equal(t, map[string]interface{}{"context": map[string]interface{}{"action": "select"}}, got) +} + +func TestProjectPath_MissingKey(t *testing.T) { + root := map[string]interface{}{"context": map[string]interface{}{"action": "search"}} + got, ok := projectPath(root, []string{"context", "missing"}) + require.False(t, ok) + assert.Nil(t, got) +} + +func TestProjectPath_ArrayTraverseAndProject(t *testing.T) { + root := map[string]interface{}{ + "message": map[string]interface{}{ + "order": map[string]interface{}{ + "beckn:orderItems": []interface{}{ + map[string]interface{}{"beckn:orderedItem": "item-1"}, + map[string]interface{}{"beckn:orderedItem": "item-2"}, + }, + }, + }, + } + parts := []string{"message", "order", "beckn:orderItems", "beckn:orderedItem"} + got, ok := projectPath(root, parts) + require.True(t, ok) + + expected := map[string]interface{}{ + "message": map[string]interface{}{ + "order": map[string]interface{}{ + "beckn:orderItems": []interface{}{ + map[string]interface{}{"beckn:orderedItem": "item-1"}, + map[string]interface{}{"beckn:orderedItem": "item-2"}, + }, + }, + }, + } + assert.Equal(t, expected, got) +} + +func TestProjectPath_NonMapOrSlice(t *testing.T) { + _, ok := projectPath("string", []string{"a"}) + require.False(t, ok) + + _, ok = projectPath(42, []string{"a"}) + require.False(t, ok) +} + +func TestProjectPath_EmptyArray(t *testing.T) { + root := map[string]interface{}{"items": []interface{}{}} + got, ok := projectPath(root, []string{"items", "id"}) + require.False(t, ok) + assert.Nil(t, got) +} + +// Test deepMerge + +func TestDeepMerge_NilDst(t *testing.T) { + src := map[string]interface{}{"a": 1} + got := deepMerge(nil, src) + assert.Equal(t, src, got) +} + +func TestDeepMerge_MapIntoMap(t *testing.T) { + dst := map[string]interface{}{"a": 1, "b": 2} + src := map[string]interface{}{"b": 20, "c": 3} + got := deepMerge(dst, src) + assert.Equal(t, map[string]interface{}{"a": 1, "b": 20, "c": 3}, got) +} + +func TestDeepMerge_MapNested(t *testing.T) { + dst := map[string]interface{}{ + "context": map[string]interface{}{"action": "search", "domain": "retail"}, + } + src := map[string]interface{}{ + "context": map[string]interface{}{"action": "search", "transaction_id": "tx-1"}, + } + got := deepMerge(dst, src) + ctx, ok := got.(map[string]interface{})["context"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "search", ctx["action"]) + assert.Equal(t, "retail", ctx["domain"]) + assert.Equal(t, "tx-1", ctx["transaction_id"]) +} + +func TestDeepMerge_ArrayIntoArray(t *testing.T) { + dst := []interface{}{ + map[string]interface{}{"id": "a"}, + map[string]interface{}{"id": "b"}, + } + src := []interface{}{ + map[string]interface{}{"id": "a", "name": "A"}, + map[string]interface{}{"id": "b", "name": "B"}, + } + got := deepMerge(dst, src) + sl, ok := got.([]interface{}) + require.True(t, ok) + require.Len(t, sl, 2) + assert.Equal(t, map[string]interface{}{"id": "a", "name": "A"}, sl[0]) + assert.Equal(t, map[string]interface{}{"id": "b", "name": "B"}, sl[1]) +} + +func TestDeepMerge_ArraySrcLonger(t *testing.T) { + dst := []interface{}{map[string]interface{}{"a": 1}} + src := []interface{}{ + map[string]interface{}{"a": 1}, + map[string]interface{}{"a": 2}, + } + got := deepMerge(dst, src) + sl, ok := got.([]interface{}) + require.True(t, ok) + require.Len(t, sl, 2) +} + +func TestDeepMerge_ScalarSrc(t *testing.T) { + dst := map[string]interface{}{"a": 1} + src := "overwrite" + got := deepMerge(dst, src) + assert.Equal(t, "overwrite", got) +} + +// Test getFieldForAction and selectAuditPayload (require loaded rules via temp file) + +func writeAuditRulesFile(t *testing.T, content string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "audit-fields.yaml") + err := os.WriteFile(path, []byte(content), 0600) + require.NoError(t, err) + return path +} + +func TestGetFieldForAction_ActionMatch(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: + - context.transaction_id + - context.action + search: + - context.action + - context.timestamp + select: + - context.action + - message.order +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + fields := getFieldForAction(ctx, "search") + assert.Equal(t, []string{"context.action", "context.timestamp"}, fields) + + fields = getFieldForAction(ctx, "select") + assert.Equal(t, []string{"context.action", "message.order"}, fields) +} + +func TestGetFieldForAction_FallbackToDefault(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: + - context.transaction_id + - context.message_id + search: + - context.action +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + fields := getFieldForAction(ctx, "unknown_action") + assert.Equal(t, []string{"context.transaction_id", "context.message_id"}, fields) + + fields = getFieldForAction(ctx, "") + assert.Equal(t, []string{"context.transaction_id", "context.message_id"}, fields) +} + +func TestGetFieldForAction_EmptyDefault(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: [] + search: + - context.action +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + fields := getFieldForAction(ctx, "other") + assert.Empty(t, fields) +} + +func TestSelectAuditPayload_InvalidJSON(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: + - context.action +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + got := selectAuditPayload(ctx, []byte("not json")) + assert.Nil(t, got) +} + +func TestSelectAuditPayload_NoRulesLoaded(t *testing.T) { + ctx := context.Background() + // use a fresh context without loading any rules; auditRules may be from previous test + path := writeAuditRulesFile(t, ` +auditRules: + default: [] +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + body := []byte(`{"context":{"action":"search"}}`) + got := selectAuditPayload(ctx, body) + assert.Nil(t, got) +} + +func TestSelectAuditPayload_ContextAndActionOnly(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: + - context.transaction_id + - context.message_id + - context.action +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + body := []byte(`{ + "context": { + "action": "search", + "transaction_id": "tx-1", + "message_id": "msg-1", + "domain": "retail" + }, + "message": {"intent": "buy"} + }`) + got := selectAuditPayload(ctx, body) + require.NotNil(t, got) + + var out map[string]interface{} + require.NoError(t, json.Unmarshal(got, &out)) + ctxMap, ok := out["context"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "search", ctxMap["action"]) + assert.Equal(t, "tx-1", ctxMap["transaction_id"]) + assert.Equal(t, "msg-1", ctxMap["message_id"]) + _, hasMessage := out["message"] + assert.False(t, hasMessage) +} + +func TestSelectAuditPayload_ActionSpecificRules(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: + - context.action + search: + - context.action + - context.timestamp + - message.intent +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + body := []byte(`{ + "context": {"action": "search", "timestamp": "2024-01-15T10:30:00Z", "domain": "retail"}, + "message": {"intent": {"item": {"id": "x"}}} + }`) + got := selectAuditPayload(ctx, body) + require.NotNil(t, got) + + var out map[string]interface{} + require.NoError(t, json.Unmarshal(got, &out)) + ctxMap := out["context"].(map[string]interface{}) + assert.Equal(t, "search", ctxMap["action"]) + assert.Equal(t, "2024-01-15T10:30:00Z", ctxMap["timestamp"]) + msg := out["message"].(map[string]interface{}) + assert.NotNil(t, msg["intent"]) +} + +func TestSelectAuditPayload_ArrayFieldProjection(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: + - context.action + select: + - context.transaction_id + - context.action + - message.order.beckn:orderItems.beckn:orderedItem +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + body := []byte(`{ + "context": {"action": "select", "transaction_id": "tx-2"}, + "message": { + "order": { + "beckn:orderItems": [ + {"beckn:orderedItem": "item-A", "other": "x"}, + {"beckn:orderedItem": "item-B", "other": "y"} + ] + } + } + }`) + got := selectAuditPayload(ctx, body) + require.NotNil(t, got) + + var out map[string]interface{} + require.NoError(t, json.Unmarshal(got, &out)) + ctxMap := out["context"].(map[string]interface{}) + assert.Equal(t, "select", ctxMap["action"]) + assert.Equal(t, "tx-2", ctxMap["transaction_id"]) + + order := out["message"].(map[string]interface{})["order"].(map[string]interface{}) + items := order["beckn:orderItems"].([]interface{}) + require.Len(t, items, 2) + assert.Equal(t, map[string]interface{}{"beckn:orderedItem": "item-A"}, items[0]) + assert.Equal(t, map[string]interface{}{"beckn:orderedItem": "item-B"}, items[1]) +} + +// TestSelectAuditPayload_SelectOrderExample uses a full select request payload and +// select audit rules to verify that only configured fields are projected into the +// audit log body. The request mirrors a real select with context, message.order, +// beckn:orderItems (array), beckn:acceptedOffer, and beckn:orderAttributes. +// Rules include array traversal (e.g. message.order.beckn:orderItems.beckn:orderedItem +// projects that field from each array element) and nested paths like +// message.order.beckn:orderItems.beckn:acceptedOffer.beckn:price.value. +func TestSelectAuditPayload_SelectOrderExample(t *testing.T) { + ctx := context.Background() + path := writeAuditRulesFile(t, ` +auditRules: + default: [] + select: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order + - message.order.beckn:seller + - message.order.beckn:buyer + - message.order.beckn:buyer.beckn:id + - message.order.beckn:orderItems + - message.order.beckn:orderItems.beckn:orderedItem + - message.order.beckn:orderItems.beckn:acceptedOffer + - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:id + - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:price + - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:price.value + - message.order.beckn:orderAttributes + - message.order.beckn:orderAttributes.preferences + - message.order.beckn:orderAttributes.preferences.startTime +`) + require.NoError(t, LoadAuditFieldRules(ctx, path)) + + // Full select request example: context (version, action, domain, timestamp, ids, URIs, ttl) + // and message.order with orderStatus, seller, buyer, orderItems array (orderedItem, quantity, + // acceptedOffer with id, descriptor, items, provider, price), orderAttributes (buyerFinderFee, preferences). + body := []byte(`{ + "context": { + "version": "1.0.0", + "action": "select", + "domain": "ev_charging", + "timestamp": "2024-01-15T10:30:00Z", + "message_id": "bb9f86db-9a3d-4e9c-8c11-81c8f1a7b901", + "transaction_id": "2b4d69aa-22e4-4c78-9f56-5a7b9e2b2002", + "bap_id": "bap.example.com", + "bap_uri": "https://bap.example.com", + "ttl": "PT30S", + "bpp_id": "bpp.example.com", + "bpp_uri": "https://bpp.example.com" + }, + "message": { + "order": { + "@context": "https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/schema/core/v2/context.jsonld", + "@type": "beckn:Order", + "beckn:orderStatus": "CREATED", + "beckn:seller": "ecopower-charging", + "beckn:buyer": { + "@context": "https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/schema/core/v2/context.jsonld", + "@type": "beckn:Buyer", + "beckn:id": "user-123", + "beckn:role": "BUYER", + "beckn:displayName": "Ravi Kumar", + "beckn:telephone": "+91-9876543210", + "beckn:email": "ravi.kumar@example.com", + "beckn:taxID": "GSTIN29ABCDE1234F1Z5" + }, + "beckn:orderItems": [ + { + "beckn:orderedItem": "IND*ecopower-charging*cs-01*IN*ECO*BTM*01*CCS2*A*CCS2-A", + "beckn:quantity": { + "unitText": "Kilowatt Hour", + "unitCode": "KWH", + "unitQuantity": 2.5 + }, + "beckn:acceptedOffer": { + "@context": "https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/schema/core/v2/context.jsonld", + "@type": "beckn:Offer", + "beckn:id": "offer-ccs2-60kw-kwh", + "beckn:descriptor": { + "@type": "beckn:Descriptor", + "schema:name": "Per-kWh Tariff - CCS2 60kW" + }, + "beckn:items": [ + "IND*ecopower-charging*cs-01*IN*ECO*BTM*01*CCS2*A*CCS2-A" + ], + "beckn:provider": "ecopower-charging", + "beckn:price": { + "currency": "INR", + "value": 45.0, + "applicableQuantity": { + "unitText": "Kilowatt Hour", + "unitCode": "KWH", + "unitQuantity": 1 + } + } + } + } + ], + "beckn:orderAttributes": { + "@context": "https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/schema/EvChargingSession/v1/context.jsonld", + "@type": "ChargingSession", + "buyerFinderFee": { + "feeType": "PERCENTAGE", + "feeValue": 2.5 + }, + "preferences": { + "startTime": "2026-01-04T08:00:00+05:30", + "endTime": "2026-01-04T20:00:00+05:30" + } + } + } + } +}`) + got := selectAuditPayload(ctx, body) + require.NotNil(t, got, "selectAuditPayload should return projected body for select action") + + var out map[string]interface{} + require.NoError(t, json.Unmarshal(got, &out)) + + // Context: only transaction_id, message_id, action, timestamp + ctxMap, ok := out["context"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "select", ctxMap["action"]) + assert.Equal(t, "2b4d69aa-22e4-4c78-9f56-5a7b9e2b2002", ctxMap["transaction_id"]) + assert.Equal(t, "bb9f86db-9a3d-4e9c-8c11-81c8f1a7b901", ctxMap["message_id"]) + assert.Equal(t, "2024-01-15T10:30:00Z", ctxMap["timestamp"]) + _, hasBapID := ctxMap["bap_id"] + assert.False(t, hasBapID, "context should not include bap_id when not in audit rules") + + // message.order: full order merged with projected array fields + msg, ok := out["message"].(map[string]interface{}) + require.True(t, ok) + order, ok := msg["order"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "ecopower-charging", order["beckn:seller"]) + buyer, ok := order["beckn:buyer"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "user-123", buyer["beckn:id"]) + + // beckn:orderItems: array with projected fields from each element (beckn:orderedItem, beckn:acceptedOffer with id, price, price.value) + items, ok := order["beckn:orderItems"].([]interface{}) + require.True(t, ok) + require.Len(t, items, 1) + item0, ok := items[0].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "IND*ecopower-charging*cs-01*IN*ECO*BTM*01*CCS2*A*CCS2-A", item0["beckn:orderedItem"]) + acceptedOffer, ok := item0["beckn:acceptedOffer"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "offer-ccs2-60kw-kwh", acceptedOffer["beckn:id"]) + price, ok := acceptedOffer["beckn:price"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, 45.0, price["value"]) + + // beckn:orderAttributes: only preferences and preferences.startTime + orderAttrs, ok := order["beckn:orderAttributes"].(map[string]interface{}) + require.True(t, ok) + prefs, ok := orderAttrs["preferences"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, "2026-01-04T08:00:00+05:30", prefs["startTime"]) +} diff --git a/pkg/telemetry/metrics_test.go b/pkg/telemetry/metrics_test.go index 1c3663a..289edd3 100644 --- a/pkg/telemetry/metrics_test.go +++ b/pkg/telemetry/metrics_test.go @@ -2,9 +2,9 @@ package telemetry import ( "context" - "net/http/httptest" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -13,16 +13,31 @@ func TestNewProviderAndMetrics(t *testing.T) { provider, err := NewTestProvider(ctx) require.NoError(t, err) require.NotNil(t, provider) - require.NotNil(t, provider.MetricsHandler) + require.NotNil(t, provider.MeterProvider, "MeterProvider should be set") metrics, err := GetMetrics(ctx) require.NoError(t, err) require.NotNil(t, metrics) - rec := httptest.NewRecorder() - req := httptest.NewRequest("GET", "/metrics", nil) - provider.MetricsHandler.ServeHTTP(rec, req) - require.Equal(t, 200, rec.Code) - - require.NoError(t, provider.Shutdown(context.Background())) + require.NoError(t, provider.Shutdown(ctx)) +} + +func TestNewProviderAndTraces(t *testing.T) { + ctx := context.Background() + provider, sr, err := NewTestProviderWithTrace(ctx) + require.NoError(t, err) + require.NotNil(t, provider) + require.NotNil(t, provider.MeterProvider, "MeterProvider should be set") + require.NotNil(t, provider.TraceProvider, "TraceProvider should be set") + require.NotNil(t, sr, "SpanRecorder should be set") + + tracer := provider.TraceProvider.Tracer("test-instrumentation") + _, span := tracer.Start(ctx, "test-span") + span.End() + + ended := sr.Ended() + require.Len(t, ended, 1, "exactly one span should be recorded") + assert.Equal(t, "test-span", ended[0].Name(), "recorded span should have expected name") + + require.NoError(t, provider.Shutdown(ctx)) } diff --git a/pkg/telemetry/pluginMetrics.go b/pkg/telemetry/pluginMetrics.go index c6d83ce..7bb73c8 100644 --- a/pkg/telemetry/pluginMetrics.go +++ b/pkg/telemetry/pluginMetrics.go @@ -30,24 +30,52 @@ var ( // Attribute keys shared across instruments. var ( - AttrModule = attribute.Key("module") - AttrSubsystem = attribute.Key("subsystem") - AttrName = attribute.Key("name") - AttrStep = attribute.Key("step") - AttrRole = attribute.Key("role") - AttrAction = attribute.Key("action") - AttrHTTPMethod = attribute.Key("http_method") - AttrHTTPStatus = attribute.Key("http_status_code") - AttrStatus = attribute.Key("status") - AttrErrorType = attribute.Key("error_type") - AttrPluginID = attribute.Key("plugin_id") - AttrPluginType = attribute.Key("plugin_type") - AttrOperation = attribute.Key("operation") - AttrRouteType = attribute.Key("route_type") - AttrTargetType = attribute.Key("target_type") - AttrSchemaVersion = attribute.Key("schema_version") + AttrModule = attribute.Key("module") + AttrCaller = attribute.Key("caller") // who is calling bab/bpp with there name + AttrStep = attribute.Key("step") + AttrRole = attribute.Key("role") + AttrAction = attribute.Key("action") // action is context.action + AttrHTTPStatus = attribute.Key("http_status_code") // status code is 2xx/3xx/4xx/5xx + AttrStatus = attribute.Key("status") + AttrErrorType = attribute.Key("error_type") + AttrPluginID = attribute.Key("plugin_id") // id for the plugine + AttrPluginType = attribute.Key("plugin_type") // type for the plugine + AttrOperation = attribute.Key("operation") + AttrRouteType = attribute.Key("route_type") // publish/ uri + AttrTargetType = attribute.Key("target_type") + AttrSchemaVersion = attribute.Key("schema_version") + AttrMetricUUID = attribute.Key("metric_uuid") + AttrMetricCode = attribute.Key("metric.code") + AttrMetricCategory = attribute.Key("metric.category") + AttrMetricGranularity = attribute.Key("metric.granularity") + AttrMetricFrequency = attribute.Key("metric.frequency") + AttrObservedTimeUnixNano = attribute.Key("observedTimeUnixNano") + AttrMatricLabels = attribute.Key("metric.labels") ) +var ( + networkMetricsCfgMu sync.RWMutex + networkMetricsGranularity = "10mim" // default + networkMetricsFrequency = "10mim" // default +) + +func SetNetworkMetricsConfig(granularity, frequency string) { + networkMetricsCfgMu.Lock() + defer networkMetricsCfgMu.Unlock() + if granularity != "" { + networkMetricsGranularity = granularity + } + if frequency != "" { + networkMetricsFrequency = frequency + } +} + +func GetNetworkMetricsConfig() (granularity, frequency string) { + networkMetricsCfgMu.RLock() + defer networkMetricsCfgMu.RUnlock() + return networkMetricsGranularity, networkMetricsFrequency +} + // GetMetrics lazily initializes instruments and returns a cached reference. func GetMetrics(ctx context.Context) (*Metrics, error) { metricsOnce.Do(func() { @@ -58,8 +86,8 @@ func GetMetrics(ctx context.Context) (*Metrics, error) { func newMetrics() (*Metrics, error) { meter := otel.GetMeterProvider().Meter( - "github.com/beckn-one/beckn-onix/telemetry", - metric.WithInstrumentationVersion("1.0.0"), + ScopeName, + metric.WithInstrumentationVersion(ScopeVersion), ) m := &Metrics{} diff --git a/pkg/telemetry/telemetry.go b/pkg/telemetry/telemetry.go index c5b70df..3400e93 100644 --- a/pkg/telemetry/telemetry.go +++ b/pkg/telemetry/telemetry.go @@ -2,14 +2,21 @@ package telemetry import ( "context" - "net/http" + "go.opentelemetry.io/otel/sdk/log" "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/trace" +) + +const ( + ScopeName = "beckn-onix" + ScopeVersion = "v2.0.0" ) // Provider holds references to telemetry components that need coordinated shutdown. type Provider struct { - MeterProvider *metric.MeterProvider - MetricsHandler http.Handler - Shutdown func(context.Context) error + MeterProvider *metric.MeterProvider + TraceProvider *trace.TracerProvider + LogProvider *log.LoggerProvider + Shutdown func(context.Context) error } diff --git a/pkg/telemetry/test_helper.go b/pkg/telemetry/test_helper.go index 627965b..0e81dfc 100644 --- a/pkg/telemetry/test_helper.go +++ b/pkg/telemetry/test_helper.go @@ -4,12 +4,13 @@ import ( "context" clientprom "github.com/prometheus/client_golang/prometheus" - clientpromhttp "github.com/prometheus/client_golang/prometheus/promhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" otelprom "go.opentelemetry.io/otel/exporters/prometheus" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" ) // NewTestProvider creates a minimal telemetry provider for testing purposes. @@ -45,10 +46,56 @@ func NewTestProvider(ctx context.Context) (*Provider, error) { otel.SetMeterProvider(meterProvider) return &Provider{ - MeterProvider: meterProvider, - MetricsHandler: clientpromhttp.HandlerFor(registry, clientpromhttp.HandlerOpts{}), + MeterProvider: meterProvider, Shutdown: func(ctx context.Context) error { return meterProvider.Shutdown(ctx) }, }, nil } + +// NewTestProviderWithTrace creates a telemetry provider with both metrics and +// tracing enabled, using an in-memory span recorder. It returns the provider +// and the SpanRecorder so tests can assert on recorded spans. +func NewTestProviderWithTrace(ctx context.Context) (*Provider, *tracetest.SpanRecorder, error) { + provider, err := NewTestProvider(ctx) + if err != nil { + return nil, nil, err + } + + res, err := resource.New( + ctx, + resource.WithAttributes( + attribute.String("service.name", "test-service"), + attribute.String("service.version", "test"), + attribute.String("deployment.environment", "test"), + ), + ) + if err != nil { + return nil, nil, err + } + + sr := tracetest.NewSpanRecorder() + traceProvider := trace.NewTracerProvider( + trace.WithSpanProcessor(sr), + trace.WithResource(res), + ) + otel.SetTracerProvider(traceProvider) + + return &Provider{ + MeterProvider: provider.MeterProvider, + TraceProvider: traceProvider, + Shutdown: func(ctx context.Context) error { + var errs []error + if err := traceProvider.Shutdown(ctx); err != nil { + errs = append(errs, err) + } + if err := provider.MeterProvider.Shutdown(ctx); err != nil { + errs = append(errs, err) + } + if len(errs) > 0 { + return errs[0] + } + return nil + }, + }, sr, nil +} From c2f3dbce2cd650f0e8a328f18add14cb8c8cd848 Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Mon, 23 Feb 2026 23:16:49 +0530 Subject: [PATCH 4/9] fix: sender and recipient.id --- core/module/handler/stdHandler.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/core/module/handler/stdHandler.go b/core/module/handler/stdHandler.go index ab50ffb..facb668 100644 --- a/core/module/handler/stdHandler.go +++ b/core/module/handler/stdHandler.go @@ -8,6 +8,7 @@ import ( "net/http" "net/http/httputil" "strconv" + "strings" "time" "github.com/beckn-one/beckn-onix/pkg/log" @@ -376,18 +377,22 @@ func (h *stdHandler) initSteps(ctx context.Context, mgr PluginManager, cfg *Conf } func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { - recipientID := h.SubscriberID - - if v, ok := r.Context().Value(model.ContextKeySubscriberID).(string); ok { - recipientID = v - } - senderID := "" + selfID := h.SubscriberID + remoteID := "" if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok { - senderID = v + remoteID = v } + var senderID, receiverID string + if strings.Contains(h.moduleName, "Caller") { + senderID = selfID + receiverID = remoteID + } else { + senderID = remoteID + receiverID = selfID + } attrs := []attribute.KeyValue{ - attribute.String("recipient.id", recipientID), + attribute.String("recipient.id", receiverID), attribute.String("sender.id", senderID), attribute.String("span_uuid", uuid.New().String()), attribute.String("http.request.method", r.Method), From 18303eb1b5441f9d70d9506210be5e74cd303678 Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Tue, 24 Feb 2026 13:35:44 +0530 Subject: [PATCH 5/9] Fix : update the matric arrtibute --- core/module/handler/http_metric.go | 10 +++------- core/module/handler/stdHandler.go | 30 +++++++++++++++++++++++------- pkg/telemetry/pluginMetrics.go | 2 ++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/core/module/handler/http_metric.go b/core/module/handler/http_metric.go index 54e3323..f52fa2f 100644 --- a/core/module/handler/http_metric.go +++ b/core/module/handler/http_metric.go @@ -4,12 +4,9 @@ import ( "context" "fmt" "net/http" - "strconv" "sync" - "time" "github.com/beckn-one/beckn-onix/pkg/telemetry" - "github.com/google/uuid" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" @@ -66,7 +63,7 @@ func StatusClass(statusCode int) string { } } -func RecordHTTPRequest(ctx context.Context, statusCode int, action, role, caller string) { +func RecordHTTPRequest(ctx context.Context, statusCode int, action, role, senderID, recipientID string) { m, err := GetHTTPMetrics(ctx) if err != nil || m == nil { return @@ -76,7 +73,8 @@ func RecordHTTPRequest(ctx context.Context, statusCode int, action, role, caller telemetry.AttrHTTPStatus.String(status), telemetry.AttrAction.String(action), telemetry.AttrRole.String(role), - telemetry.AttrCaller.String(caller), + telemetry.AttrSenderID.String(senderID), + telemetry.AttrRecipientID.String(recipientID), } metric_code := action + "_api_total_count" @@ -110,11 +108,9 @@ func specHttpMetricAttr(metricCode, category string) []attribute.KeyValue { granularity, frequency := telemetry.GetNetworkMetricsConfig() return []attribute.KeyValue{ - telemetry.AttrMetricUUID.String(uuid.New().String()), telemetry.AttrMetricCode.String(metricCode), telemetry.AttrMetricCategory.String(category), telemetry.AttrMetricGranularity.String(granularity), telemetry.AttrMetricFrequency.String(frequency), - telemetry.AttrObservedTimeUnixNano.String(strconv.FormatInt(time.Now().UnixNano(), 10)), } } diff --git a/core/module/handler/stdHandler.go b/core/module/handler/stdHandler.go index facb668..6c2831a 100644 --- a/core/module/handler/stdHandler.go +++ b/core/module/handler/stdHandler.go @@ -122,14 +122,23 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { record: nil, } - caller := "unknown" - if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok && v != "" { - caller = v + selfID := h.SubscriberID + remoteID := "" + if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok { + remoteID = v + } + var senderID, receiverID string + if strings.Contains(h.moduleName, "Caller") { + senderID = selfID + receiverID = remoteID + } else { + senderID = remoteID + receiverID = selfID } httpMeter, _ := GetHTTPMetrics(r.Context()) if httpMeter != nil { recordOnce = func() { - RecordHTTPRequest(r.Context(), wrapped.statusCode, r.URL.Path, string(h.role), caller) + RecordHTTPRequest(r.Context(), wrapped.statusCode, r.URL.Path, string(h.role), senderID, receiverID) } wrapped.record = recordOnce } @@ -152,7 +161,7 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } body := stepCtx.Body - go telemetry.EmitAuditLogs(r.Context(), body, auditlog.Int("http.response.status_code", wrapped.statusCode)) + go telemetry.EmitAuditLogs(r.Context(), body, auditlog.Int("http.response.status_code", wrapped.statusCode), auditlog.String("http.response.error", errString(err))) span.End() }() @@ -392,8 +401,8 @@ func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { receiverID = selfID } attrs := []attribute.KeyValue{ - attribute.String("recipient.id", receiverID), - attribute.String("sender.id", senderID), + telemetry.AttrRecipientID.String(receiverID), + telemetry.AttrSenderID.String(senderID), attribute.String("span_uuid", uuid.New().String()), attribute.String("http.request.method", r.Method), attribute.String("http.route", r.URL.Path), @@ -418,3 +427,10 @@ func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { span.SetAttributes(attrs...) } + +func errString(e error) string { + if e == nil { + return "" + } + return e.Error() +} diff --git a/pkg/telemetry/pluginMetrics.go b/pkg/telemetry/pluginMetrics.go index 7bb73c8..a26601b 100644 --- a/pkg/telemetry/pluginMetrics.go +++ b/pkg/telemetry/pluginMetrics.go @@ -51,6 +51,8 @@ var ( AttrMetricFrequency = attribute.Key("metric.frequency") AttrObservedTimeUnixNano = attribute.Key("observedTimeUnixNano") AttrMatricLabels = attribute.Key("metric.labels") + AttrSenderID = attribute.Key("sender.id") + AttrRecipientID = attribute.Key("recipient.id") ) var ( From 9d57d3b8f1f896ef96dfdd533807feb40482214f Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Thu, 26 Feb 2026 14:45:35 +0530 Subject: [PATCH 6/9] Feat : Update PR as comment --- CONFIG.md | 108 +++++-- README.md | 19 +- cmd/adapter/main.go | 2 +- config/audit-fields.yaml | 89 ++++++ config/local-beckn-one-bap.yaml | 19 ++ config/local-beckn-one-bpp.yaml | 17 ++ config/onix/adapter.local.yaml | 221 --------------- config/onix/adapter.yaml | 220 +++++++++------ config/onix/audit-fields.yaml | 24 -- config/onix/bapTxnReciever-routing.yaml | 64 ++--- config/onix/bppTxnCaller-routing.yaml | 19 +- core/module/handler/http_metric.go | 8 +- core/module/handler/stdHandler.go | 6 +- core/module/handler/step_instrumentor.go | 1 + .../network-observability/docker-compose.yml | 264 ++++++++++++++++++ .../provisioning/dashboards/dashboards.yml | 23 ++ .../json/application/metrics-dashboard.json | 1 + .../json/application/traces-dashboard.json | 1 + .../network-observability-dashboard.json | 199 +++++++++++++ .../provisioning/datasources/datasources.yml | 32 +++ .../loki/loki-config.yml | 35 +++ .../otel-collector-bap/config.yaml | 83 ++++++ .../otel-collector-bpp/config.yaml | 83 ++++++ .../otel-collector-network/config.yaml | 60 ++++ .../prometheus/prometheus.yml | 20 ++ pkg/model/model.go | 6 +- .../implementation/otelsetup/cmd/plugin.go | 17 +- .../implementation/otelsetup/otelsetup.go | 8 +- .../reqpreprocessor/reqpreprocessor.go | 4 +- pkg/telemetry/audit_fields.go | 35 --- pkg/telemetry/pluginMetrics.go | 2 +- 31 files changed, 1212 insertions(+), 478 deletions(-) create mode 100644 config/audit-fields.yaml delete mode 100644 config/onix/adapter.local.yaml delete mode 100644 config/onix/audit-fields.yaml create mode 100644 install/network-observability/docker-compose.yml create mode 100644 install/network-observability/grafana/provisioning/dashboards/dashboards.yml create mode 100644 install/network-observability/grafana/provisioning/dashboards/json/application/metrics-dashboard.json create mode 100644 install/network-observability/grafana/provisioning/dashboards/json/application/traces-dashboard.json create mode 100644 install/network-observability/grafana/provisioning/dashboards/json/network/network-observability-dashboard.json create mode 100644 install/network-observability/grafana/provisioning/datasources/datasources.yml create mode 100644 install/network-observability/loki/loki-config.yml create mode 100644 install/network-observability/otel-collector-bap/config.yaml create mode 100644 install/network-observability/otel-collector-bpp/config.yaml create mode 100644 install/network-observability/otel-collector-network/config.yaml create mode 100644 install/network-observability/prometheus/prometheus.yml diff --git a/CONFIG.md b/CONFIG.md index e04a431..1c62d19 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -199,9 +199,7 @@ log: #### `plugins.otelsetup` **Type**: `object` **Required**: No -**Description**: OpenTelemetry configuration controlling whether the Prometheus exporter is enabled. - -**Important**: This block is optional—omit it to run without telemetry. When present, the `/metrics` endpoint is exposed on a separate port (configurable via `metricsPort`) only if `enableMetrics: true`. +**Description**: OpenTelemetry (OTLP) configuration for metrics, traces, and logs. When configured, telemetry is exported to an OTLP collector endpoint. Omit this block to run without telemetry. ##### Parameters: @@ -215,11 +213,10 @@ log: **Required**: Yes **Description**: Plugin configuration parameters. -###### `config.enableMetrics` -**Type**: `string` (boolean) -**Required**: No -**Default**: `"true"` -**Description**: Enables metrics collection and the `/metrics` endpoint. Must be `"true"` or `"false"` as a string. +###### `config.otlpEndpoint` +**Type**: `string` +**Required**: Yes (when OtelSetup is used) +**Description**: OTLP gRPC endpoint (host:port) for exporting metrics, traces, and logs. Example: `"localhost:4317"`, `"otel-collector-bap:4317"`. ###### `config.serviceName` **Type**: `string` @@ -238,47 +235,114 @@ log: **Default**: `"development"` **Description**: Sets the `deployment.environment` attribute (e.g., `development`, `staging`, `production`). -###### `config.metricsPort` +###### `config.domain` **Type**: `string` **Required**: No **Default**: `"9090"` **Description**: Port on which the metrics HTTP server will listen. The metrics endpoint is hosted on a separate server from the main application. -**Example - Enable Metrics** (matches `config/local-simple.yaml`): +###### `config.enableMetrics` +**Type**: `string` (boolean) +**Required**: No +**Default**: `"false"` +**Description**: Enables metrics collection and OTLP metric export. Use `"true"` or `"false"` as a string. + +###### `config.enableTracing` +**Type**: `string` (boolean) +**Required**: No +**Default**: `"false"` +**Description**: Enables trace export via OTLP. Use `"true"` or `"false"` as a string. + +###### `config.enableLogs` +**Type**: `string` (boolean) +**Required**: No +**Default**: `"false"` +**Description**: Enables log export via OTLP (e.g. audit logs). Use `"true"` or `"false"` as a string. + +###### `config.timeInterval` +**Type**: `string` (integer) +**Required**: No +**Default**: `"5"` +**Description**: Time interval in seconds used for periodic metric export or batching. + +###### `config.auditFieldsConfig` +**Type**: `string` +**Required**: No +**Description**: Path to a YAML file that defines which request/response fields are included in audit logs, per action. See [Audit fields configuration](#audit-fields-configuration). Example: `"/app/config/audit-fields.yaml"`. + + +**Example - OTLP export with audit logs** (e.g. `config/local-beckn-one-bap.yaml`): ```yaml plugins: otelsetup: id: otelsetup config: - serviceName: "beckn-onix" + serviceName: "beckn-one-bap" serviceVersion: "1.0.0" - enableMetrics: "true" environment: "development" - metricsPort: "9090" + domain: "ev_charging" + otlpEndpoint: "otel-collector-bap:4317" + enableMetrics: "true" + enableTracing: "true" + enableLogs: "true" + timeInterval: "5" + networkMetricsGranularity: "2min" + networkMetricsFrequency: "4min" + auditFieldsConfig: "/app/config/audit-fields.yaml" ``` -### Accessing Metrics -When `plugins.otelsetup.config.enableMetrics: "true"`, the metrics endpoint is hosted on a separate HTTP server. Scrape metrics at: -``` -http://your-server:9090/metrics +### Audit fields configuration + +When `config.auditFieldsConfig` points to a YAML file, audit logs (emitted via OTLP when `enableLogs: "true"`) include only the fields you list per action. The file format: + +```yaml +auditRules: + default: # Optional: fallback for actions without a specific list + - context.transaction_id + - context.message_id + - context.action + - context.domain + - context.bap_id + - context.bpp_id + discover: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.filters + - message.spatial + select: + - context.transaction_id + - context.message_id + - context.action + - message.order.beckn:buyer.beckn:id + # ... more dot-path fields ``` -**Note**: The metrics server runs on the port specified by `config.metricsPort` (default: `9090`), which is separate from the main application port configured in `http.port`. +- **Top-level key**: `auditRules`. +- **Action keys**: Use Beckn action names (e.g. `discover`, `select`, `init`, `confirm`, `update`, `track`, `cancel`, `rating`, `support`). Use `default` for actions that do not have a specific list. +- **Values**: List of dot-path strings into the request/response JSON (e.g. `context.transaction_id`, `message.order.beckn:id`). Namespaced keys use colons (e.g. `beckn:id`). + +See `config/audit-fields.yaml` for a full example. ### Metrics Collected +When OtelSetup is configured with `otlpEndpoint`, metrics and traces are exported via OTLP (no separate metrics HTTP server). Scrape metrics from your OTLP collector + Metrics are organized by module for better maintainability and encapsulation: #### OTel Setup (from `otelsetup` plugin) -- Prometheus exporter & `/metrics` endpoint on separate HTTP server -- Go runtime instrumentation (`go_*`), resource attributes, and meter provider wiring +- OTLP export for metrics, traces, and logs (gRPC endpoint). +- Go runtime instrumentation (`go_*`), resource attributes, and meter/tracer provider wiring. +- When `enableLogs: "true"` and `auditFieldsConfig` is set, audit logs are emitted via OTLP with fields defined in the audit-fields YAML. #### Step Execution Metrics (from `telemetry` package) - `onix_step_executions_total`, `onix_step_execution_duration_seconds`, `onix_step_errors_total` #### Handler Metrics (from `handler` module) +- `onix_http_request_count` – HTTP requests by status class, route, method, role, sender, recipient (and optional network metric attributes). - `beckn_signature_validations_total` - Signature validation attempts - `beckn_schema_validations_total` - Schema validation attempts - `onix_routing_decisions_total` - Routing decisions taken by handler @@ -752,12 +816,12 @@ publisher: middleware: - id: reqpreprocessor config: - uuidKeys: transaction_id,message_id role: bap + contextKeys: transaction_id,message_id,subscriber_id,module_id ``` **Parameters**: -- `uuidKeys`: Comma-separated list of fields to auto-generate UUIDs for if missing +- `contextKeys`: Comma-separated list of fields to auto-generate UUIDs for if missing - `role`: BAP or BPP role for request processing --- diff --git a/README.md b/README.md index 484c0f5..1ca152c 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ The **Beckn Protocol** is an open protocol that enables location-aware, local co - **Decrypter**: AES decryption for encrypted data processing - **ReqPreprocessor**: Request preprocessing (UUID generation, headers) - **ReqMapper**: Middleware to transform payload either between Beckn versions or against other platforms. -- **OtelSetup**: Observability Setup to make metrics, traces and logs available +- **OtelSetup**: Observability setup for metrics, traces, and logs (OTLP). Supports optional audit log configuration via `auditFieldsConfig` (YAML mapping actions to fields) . See [CONFIG.md](CONFIG.md) for details. ## Quick Start @@ -330,10 +330,11 @@ modules: ### Deployment Modes 1. **Combined Mode**: Single instance handling both BAP and BPP (`config/onix/`) - Uses `secretskeymanager` (HashiCorp Vault) for production key management -2. **BAP-Only Mode**: Dedicated buyer-side deployment (`config/onix-bap/`) -3. **BPP-Only Mode**: Dedicated seller-side deployment (`config/onix-bpp/`) -4. **Local Development Combined Mode**: Simplified configuration (`config/local-simple.yaml`) - Uses `simplekeymanager` with embedded Ed25519 keys, no vault setup needed. -5. **Local Development Combined Mode (Alternative)**: Development configuration (`config/local-dev.yaml`) - Uses `keymanager` vault setup needed +2. **BAP-Only Mode**: Dedicated buyer-side deployment (`config/onix-bap/`) +3. **BPP-Only Mode**: Dedicated seller-side deployment (`config/onix-bpp/`) +4. **Local Development Combined Mode**: Simplified configuration (`config/local-simple.yaml`) - Uses `simplekeymanager` with embedded Ed25519 keys, no vault setup needed +5. **Local Development Combined Mode (Alternative)**: Development configuration (`config/local-dev.yaml`) - Uses `keymanager`, vault setup needed +6. **Local with Observability (BAP/BPP)**: Configs `config/local-beckn-one-bap.yaml` and `config/local-beckn-one-bpp.yaml` include OtelSetup (metrics, traces, audit logs) for use with an OTLP collector. Audit fields are configured via `config/audit-fields.yaml`. For a full stack (collectors, Grafana, Loki), see `install/network-observability/` ## API Endpoints @@ -359,14 +360,6 @@ modules: | POST | `/bpp/receiver/*` | Receives all BAP requests | | POST | `/bpp/caller/on_*` | Sends responses back to BAP | -### Observability Endpoints - -| Method | Endpoint | Description | -|--------|----------|-------------| -| GET | `/health` | Health check endpoint | -| GET | `/metrics` | Prometheus metrics endpoint (when telemetry is enabled) | - -**Note**: The `/metrics` endpoint is available when `telemetry.enableMetrics: true` in the configuration file. It returns metrics in Prometheus format. ## Documentation diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 518a419..2af4540 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -53,7 +53,7 @@ var runFunc = run func main() { // Define and parse command-line flags. - flag.StringVar(&configPath, "config", "config/onix/adapter.yaml", "Path to the configuration file") + flag.StringVar(&configPath, "config", "../../config/onix/adapter.yaml", "Path to the configuration file") flag.Parse() // Use custom log for initial setup messages. diff --git a/config/audit-fields.yaml b/config/audit-fields.yaml new file mode 100644 index 0000000..4fe1fce --- /dev/null +++ b/config/audit-fields.yaml @@ -0,0 +1,89 @@ +auditRules: + default: + - context.transaction_id + - context.message_id + - context.action + - context.domain + - context.bap_id + - context.bpp_id + + discover: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.filters + - message.spatial + + select: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:buyer.beckn:id + - message.order.beckn:seller + - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:id + - message.order.beckn:orderAttributes + + init: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:buyer.beckn:id + - message.order.beckn:orderValue.value + - message.order.beckn:payment.beckn:paymentStatus + + confirm: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:orderStatus + - message.order.beckn:buyer.beckn:id + - message.order.beckn:payment.beckn:txnRef + - message.order.beckn:payment.beckn:paymentStatus + + update: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:orderStatus + - message.order.beckn:fulfillment.beckn:deliveryAttributes.sessionStatus + + track: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + + cancel: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:orderStatus + - message.order.beckn:buyer.beckn:id + + rating: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.id + - message.value + - message.category + + support: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.ref_id + - message.ref_type diff --git a/config/local-beckn-one-bap.yaml b/config/local-beckn-one-bap.yaml index 67ae76e..77a11e4 100644 --- a/config/local-beckn-one-bap.yaml +++ b/config/local-beckn-one-bap.yaml @@ -16,6 +16,25 @@ http: idle: 30 pluginManager: root: ./plugins + +# OpenTelemetry (OTLP) - metrics and traces sent to OTEL collector +plugins: + otelsetup: + id: otelsetup + config: + serviceName: "beckn-one-bap" + serviceVersion: "1.0.0" + environment: "development" + domain: "ev_charging" + otlpEndpoint: "otel-collector-bap:4317" + enableMetrics: "true" + networkMetricsGranularity: "2min" + networkMetricsFrequency: "4min" + enableTracing: "true" + enableLogs: "true" + timeInterval: "5" + auditFieldsConfig: "/app/config/audit-fields.yaml" + modules: - name: bapTxnReceiver path: /bap/receiver/ diff --git a/config/local-beckn-one-bpp.yaml b/config/local-beckn-one-bpp.yaml index af5ff8f..fa941ea 100644 --- a/config/local-beckn-one-bpp.yaml +++ b/config/local-beckn-one-bpp.yaml @@ -16,6 +16,23 @@ http: idle: 30 pluginManager: root: ./plugins +# OpenTelemetry (OTLP) - metrics and traces sent to OTEL collector, then to Loki/backend +plugins: + otelsetup: + id: otelsetup + config: + serviceName: "beckn-one-bpp" + serviceVersion: "1.0.0" + environment: "development" + domain: "ev_charging" + otlpEndpoint: "otel-collector-bpp:4317" + enableMetrics: "true" + networkMetricsGranularity: "2min" + networkMetricsFrequency: "4min" + enableTracing: "true" + enableLogs: "true" + timeInterval: "5" + auditFieldsConfig: "/app/config/audit-fields.yaml" modules: - name: bppTxnReceiver path: /bpp/receiver/ diff --git a/config/onix/adapter.local.yaml b/config/onix/adapter.local.yaml deleted file mode 100644 index ebaff50..0000000 --- a/config/onix/adapter.local.yaml +++ /dev/null @@ -1,221 +0,0 @@ -appName: "onix" -log: - level: debug - destinations: - - type: stdout - contextKeys: - - transaction_id - - message_id - - subscriber_id - - module_id -http: - port: 8080 - timeout: - read: 30 - write: 30 - idle: 30 -pluginManager: - root: ./plugins - remoteRoot: "" -modules: - - name: bapTxnReciever - path: /bap/reciever/ - handler: - type: std - role: bap - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 10.81.192.4:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signValidator: - id: signvalidator - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bapTxnReciever-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bap - steps: - - validateSign - - addRoute - - validateSchema - - name: bapTxnCaller - path: /bap/caller/ - handler: - type: std - role: bap - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signer: - id: signer - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bapTxnCaller-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bap - steps: - - validateSchema - - addRoute - - sign - - name: bppTxnReciever - path: /bpp/reciever/ - handler: - type: std - role: bpp - subscriberId: bpp1 - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signValidator: - id: signvalidator - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bppTxnReciever-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bpp - steps: - - validateSign - - addRoute - - validateSchema - - name: bppTxnCaller - path: /bpp/caller/ - handler: - type: std - role: bpp - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signer: - id: signer - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bppTxnCaller-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bpp - steps: - - validateSchema - - addRoute - - sign diff --git a/config/onix/adapter.yaml b/config/onix/adapter.yaml index 4349e57..90dc3c3 100644 --- a/config/onix/adapter.yaml +++ b/config/onix/adapter.yaml @@ -1,114 +1,113 @@ -appName: "onix-ev-charging" -log: - level: debug - destinations: - - type: stdout - contextKeys: - - transaction_id - - message_id - - subscriber_id - - module_id - - parent_id - - -# OpenTelemetry (OTLP) - metrics and traces sent to OTEL collector, then to Loki/backend -plugins: - otelsetup: - id: otelsetup - config: - serviceName: "onix-ev-charging-bap" - serviceVersion: "1.0.0" - environment: "development" - domain: "ev_charging" - otlpEndpoint: "otel-collector:4317" - enableMetrics: "true" - networkMetricsGranularity: "2min" - networkMetricsFrequency: "4min" - enableTracing: "true" - enableLogs: "true" - timeInterval: "5" - auditFieldsConfig: "/app/config/audit-fields.yaml" - - -# this is the port for the bap plugin where bap app can dump the requests to the plugin http: - port: 8001 + port: 8080 timeout: read: 30 write: 30 idle: 30 - pluginManager: root: /app/plugins - + remoteRoot: /mnt/gcs/plugins/plugins_bundle.zip modules: - # BAP Receiver - Receives callbacks from CDS (Phase 1) and BPPs (Phase 2+) - # Phase 1: Receives on_search from CDS with aggregated catalog - # Phase 2+: Receives callbacks from BPPs (on_select, on_init, on_confirm, etc.) - - name: bapTxnReceiver - path: /bap/receiver/ + - name: bapTxnReciever + path: /bap/reciever/ handler: type: std role: bap - subscriberId: ev-charging.sandbox1.com httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: registry: id: registry config: - url: http://mock-registry:3030 + url: http://localhost:8080/reg retry_max: 3 retry_wait_min: 100ms retry_wait_max: 500ms keyManager: - id: simplekeymanager + id: secretskeymanager config: - networkParticipant: example-bap.com - keyId: bap-key-1 - signingPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - signingPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= - encrPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - encrPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= + projectID: ${projectID} cache: - id: cache + id: redis config: - addr: redis-bap:6379 + addr: 10.81.192.4:6379 schemaValidator: - id: schemav2validator + id: schemavalidator config: - type: url - location: https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/api/beckn.yaml - cacheTTL: "3600" + schemaDir: /mnt/gcs/configs/schemas signValidator: id: signvalidator + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever router: id: router config: - routingConfig: /app/config/bapTxnReciever-routing.yaml + routingConfigPath: /mnt/gcs/configs/bapTxnReciever-routing.yaml middleware: - id: reqpreprocessor config: - contextKeys: transaction_id,message_id,parent_id + contextKeys: transaction_id,message_id role: bap steps: - validateSign - addRoute - validateSchema - - # BAP Caller - Entry point for all requests from BAP - # Phase 1: Routes search to external CDS for aggregation - # Phase 2+: Routes other requests directly to BPP (bypasses CDS) - # Uses bpp_uri from context for dynamic routing in Phase 2+ - name: bapTxnCaller path: /bap/caller/ handler: type: std role: bap - subscriberId: ev-charging.sandbox1.com + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 192.168.1.1:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signer: + id: signer + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bapTxnCaller-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bap + steps: + - validateSchema + - addRoute + - sign + - name: bppTxnReciever + path: /bpp/reciever/ + handler: + type: std + role: bpp + subscriberId: bpp1 httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 @@ -118,41 +117,88 @@ modules: registry: id: registry config: - url: http://mock-registry:3030 + url: http://localhost:8080/reg retry_max: 3 retry_wait_min: 100ms retry_wait_max: 500ms keyManager: - id: simplekeymanager + id: secretskeymanager config: - networkParticipant: example-bap.com - keyId: bap-key-1 - signingPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - signingPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= - encrPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - encrPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= + projectID: ${projectID} cache: - id: cache + id: redis config: - addr: redis-bap:6379 + addr: 192.168.1.1:6379 schemaValidator: - id: schemav2validator + id: schemavalidator config: - type: url - location: https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/api/beckn.yaml - cacheTTL: "3600" + schemaDir: /mnt/gcs/configs/schemas + signValidator: + id: signvalidator + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever router: id: router config: - routingConfig: /app/config/bapTxnCaller-routing.yaml - signer: - id: signer + routingConfigPath: /mnt/gcs/configs/bppTxnReciever-routing.yaml middleware: - id: reqpreprocessor config: - contextKeys: transaction_id,message_id,parent_id - role: bap - + contextKeys: transaction_id,message_id + role: bpp + steps: + - validateSign + - addRoute + - validateSchema + - name: bppTxnCaller + path: /bpp/caller/ + handler: + type: std + role: bpp + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 192.168.1.1:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signer: + id: signer + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bppTxnCaller-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bpp steps: - validateSchema - addRoute diff --git a/config/onix/audit-fields.yaml b/config/onix/audit-fields.yaml deleted file mode 100644 index 3e332a2..0000000 --- a/config/onix/audit-fields.yaml +++ /dev/null @@ -1,24 +0,0 @@ -auditRules: - default: - - context.transaction_id - - context.message_id - - context.action - - context.domain - - context.bap_id - - context.bpp_id - - search: - - context.transaction_id - - context.message_id - - context.action - - context.timestamp - - message.intent - - select: - - context.transaction_id - - context.message_id - - context.action - - context.timestamp - - message.order.beckn:buyer.beckn:id - - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:id - diff --git a/config/onix/bapTxnReciever-routing.yaml b/config/onix/bapTxnReciever-routing.yaml index dfdaa81..b1d5a44 100644 --- a/config/onix/bapTxnReciever-routing.yaml +++ b/config/onix/bapTxnReciever-routing.yaml @@ -1,47 +1,25 @@ -# ONIX BAP Receiver Routing Configuration - -# Supports Phase 1 (Discover Aggregation) and Phase 2+ (Direct BPP Callbacks) - - - -# Phase 1: Discover Aggregation - -# Phase 2+: Other Callbacks (Direct from BPPs to BAP, NO CDS involvement) - -# These routes use bap_uri from context to route callbacks back to originating BAP - - - routingRules: - - # Phase 1: on_discover callback to BAP (routed to mock-bap for testing) - - domain: ev_charging_network - version: "1.0.0" - targetType: url + - domain: "ONDC:TRV10" + version: "2.0.0" + routingType: "bpp" target: - url: http://mock-bap:9001 - excludeAction: false + url: "https://gateway.example.com" endpoints: - - on_discover - - - - - # Phase 2+: Other callbacks to BAP (routed to mock-bap for testing) - - - domain: ev_charging_network - version: "1.0.0" - targetType: url - target: - url: http://mock-bap:9001 - excludeAction: false + - search + - domain: "ONDC:TRV10" + version: "2.0.0" + routingType: "bpp" endpoints: - - on_select - - on_init - - on_confirm - - on_status - - on_track - - on_cancel - - on_update - - on_rating - - on_support + - select + - init + - confirm + - status + - cancel + - domain: "ONDC:TRV12" + version: "2.0.0" + routingType: "bpp" + endpoints: + - select + - init + - confirm + - status \ No newline at end of file diff --git a/config/onix/bppTxnCaller-routing.yaml b/config/onix/bppTxnCaller-routing.yaml index 0d9a670..ca4a478 100644 --- a/config/onix/bppTxnCaller-routing.yaml +++ b/config/onix/bppTxnCaller-routing.yaml @@ -1,23 +1,20 @@ routingRules: - domain: "ONDC:TRV10" version: "2.0.0" - routingType: "bap" + routingType: "url" + target: + url: "https://services-backend/trv/v1" endpoints: - - on_search - on_select - on_init - on_confirm - on_status - on_update - on_cancel - - domain: "ONDC:TRV11" + - domain: "ONDC:TRV10" version: "2.0.0" - routingType: "bap" + routingType: "msgq" + target: + topic_id: "trv_topic_id1" endpoints: - - on_search - - on_select - - on_init - - on_confirm - - on_status - - on_update - - on_cancel \ No newline at end of file + - on_search \ No newline at end of file diff --git a/core/module/handler/http_metric.go b/core/module/handler/http_metric.go index f52fa2f..9104978 100644 --- a/core/module/handler/http_metric.go +++ b/core/module/handler/http_metric.go @@ -17,7 +17,7 @@ type HTTPMetrics struct { } var ( - httlMetricsInstance *HTTPMetrics + httpMetricsInstance *HTTPMetrics httpMetricsOnce sync.Once httpMetricsErr error ) @@ -31,7 +31,7 @@ func newHTTPMetrics() (*HTTPMetrics, error) { if m.HttpRequestCount, err = meter.Int64Counter( "onix_http_request_count", - metric.WithDescription("Total HTTP requests by status, route, method, role and calle "), + metric.WithDescription("Total HTTP requests by status, route, method, role and caller"), metric.WithUnit("1"), ); err != nil { return nil, fmt.Errorf("onix_http_request_count: %w", err) @@ -42,9 +42,9 @@ func newHTTPMetrics() (*HTTPMetrics, error) { func GetHTTPMetrics(ctx context.Context) (*HTTPMetrics, error) { httpMetricsOnce.Do(func() { - httlMetricsInstance, httpMetricsErr = newHTTPMetrics() + httpMetricsInstance, httpMetricsErr = newHTTPMetrics() }) - return httlMetricsInstance, httpMetricsErr + return httpMetricsInstance, httpMetricsErr } // StatusClass returns the HTTP status class string (e.g. 200 -> "2xx"). diff --git a/core/module/handler/stdHandler.go b/core/module/handler/stdHandler.go index 6c2831a..39bb6f6 100644 --- a/core/module/handler/stdHandler.go +++ b/core/module/handler/stdHandler.go @@ -124,7 +124,7 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { selfID := h.SubscriberID remoteID := "" - if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok { + if v, ok := r.Context().Value(model.ContextKeyRemoteID).(string); ok { remoteID = v } var senderID, receiverID string @@ -176,7 +176,7 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // Restore request body before forwarding or publishing. r.Body = io.NopCloser(bytes.NewReader(stepCtx.Body)) if stepCtx.Route == nil { - response.SendAck(w) + response.SendAck(wrapped) return } @@ -388,7 +388,7 @@ func (h *stdHandler) initSteps(ctx context.Context, mgr PluginManager, cfg *Conf func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { selfID := h.SubscriberID remoteID := "" - if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok { + if v, ok := r.Context().Value(model.ContextKeyRemoteID).(string); ok { remoteID = v } diff --git a/core/module/handler/step_instrumentor.go b/core/module/handler/step_instrumentor.go index 8b1787f..15eade8 100644 --- a/core/module/handler/step_instrumentor.go +++ b/core/module/handler/step_instrumentor.go @@ -100,5 +100,6 @@ func (is *InstrumentedStep) Run(ctx *model.StepContext) error { if stepCtx.Route != nil { ctx.Route = stepCtx.Route } + ctx.WithContext(stepCtx.Context) return err } diff --git a/install/network-observability/docker-compose.yml b/install/network-observability/docker-compose.yml new file mode 100644 index 0000000..0c63e1b --- /dev/null +++ b/install/network-observability/docker-compose.yml @@ -0,0 +1,264 @@ +# Network observability + BAP/BPP adapters + BPP sandbox +# Builds Onix adapter once; BAP uses config/local-beckn-one-bap.yaml, BPP uses config/local-beckn-one-bpp.yaml. +# No mock-registry: BAP/BPP use real registry (api.testnet.beckn.one) from config. +# Run from repo root: docker compose -f install/network-observability/docker-compose.yml up -d +# UIs: Grafana http://localhost:3000 | Jaeger http://localhost:16686 | BAP http://localhost:8081 | BPP http://localhost:8082 + +services: + redis: + image: redis:alpine + pull_policy: always + container_name: redis-onix + ports: + - "6379:6379" + command: redis-server --requirepass your-redis-password + networks: + beckn_network: + aliases: + - redis + healthcheck: + test: ["CMD", "redis-cli", "-a", "your-redis-password", "ping"] + interval: 5s + timeout: 3s + retries: 5 + restart: unless-stopped + + onix-bap: + build: + context: ../.. + dockerfile: Dockerfile.adapter-with-plugins + image: onix-adapter:local + container_name: onix-bap + ports: + - "8081:8081" + volumes: + - ../../config:/app/config:ro + - ../../schemas:/app/schemas:ro + environment: + - CONFIG_FILE=/app/config/local-beckn-one-bap.yaml + - REDIS_PASSWORD=your-redis-password + - OTEL_EXPORTER_OTLP_INSECURE=true + - OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector-bap:4317 + command: ["./server", "--config=/app/config/local-beckn-one-bap.yaml"] + networks: + - beckn_network + - observability + restart: unless-stopped + depends_on: + redis: + condition: service_healthy + otel-collector-bap: + condition: service_started + + onix-bpp: + image: onix-adapter:local + container_name: onix-bpp + ports: + - "8082:8082" + volumes: + - ../../config:/app/config:ro + - ../../schemas:/app/schemas:ro + environment: + - CONFIG_FILE=/app/config/local-beckn-one-bpp.yaml + - REDIS_PASSWORD=your-redis-password + - OTEL_EXPORTER_OTLP_INSECURE=true + - OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector-bpp:4317 + command: ["./server", "--config=/app/config/local-beckn-one-bpp.yaml"] + networks: + - beckn_network + - observability + restart: unless-stopped + depends_on: + redis: + condition: service_healthy + otel-collector-bpp: + condition: service_started + + otel-collector-bap: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-collector-bap + command: ["--config=/etc/otel/config.yaml"] + volumes: + - ./otel-collector-bap/config.yaml:/etc/otel/config.yaml:ro + ports: + - "4317:4317" + - "4318:4318" + - "8889:8889" + networks: + - observability + - beckn_network + restart: unless-stopped + depends_on: + - otel-collector-network + + otel-collector-bpp: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-collector-bpp + command: ["--config=/etc/otel/config.yaml"] + volumes: + - ./otel-collector-bpp/config.yaml:/etc/otel/config.yaml:ro + ports: + - "4321:4317" + - "4322:4318" + - "8891:8891" + networks: + - observability + - beckn_network + restart: unless-stopped + depends_on: + - otel-collector-network + + otel-collector-network: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-collector-network + command: ["--config=/etc/otel/config.yaml"] + volumes: + - ./otel-collector-network/config.yaml:/etc/otel/config.yaml:ro + ports: + - "4319:4317" + - "4320:4318" + - "8890:8890" + networks: + - observability + restart: unless-stopped + + zipkin: + image: openzipkin/zipkin:latest + container_name: zipkin + ports: + - "9411:9411" + networks: + - observability + restart: unless-stopped + + loki: + image: grafana/loki:latest + container_name: loki + command: -config.file=/etc/loki/loki-config.yml + volumes: + - ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro + - loki_data:/loki + ports: + - "3100:3100" + networks: + - observability + restart: unless-stopped + + prometheus: + image: prom/prometheus:latest + container_name: prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-lifecycle + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus_data:/prometheus + ports: + - "9090:9090" + networks: + - observability + restart: unless-stopped + depends_on: + - otel-collector-bap + - otel-collector-bpp + + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + environment: + - COLLECTOR_OTLP_ENABLED=true + ports: + - "16686:16686" + networks: + - observability + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grafana + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + - GF_SERVER_ROOT_URL=http://localhost:3000 + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + ports: + - "3000:3000" + networks: + - observability + restart: unless-stopped + depends_on: + - prometheus + - jaeger + - zipkin + - loki + + bpp-client: + image: fidedocker/protocol-server + container_name: bpp-client + platform: linux/amd64 + networks: + - beckn_network + ports: + - "6001:6001" + restart: unless-stopped + volumes: + - bpp_client_config_volume:/usr/src/app/config + - bpp_client_schemas_volume:/usr/src/app/schemas + - bpp_client_logs_volume:/usr/src/app/logs + + bpp-network: + image: fidedocker/protocol-server + container_name: bpp-network + platform: linux/amd64 + networks: + - beckn_network + ports: + - "6002:6002" + restart: unless-stopped + volumes: + - bpp_network_config_volume:/usr/src/app/config + - bpp_network_schemas_volume:/usr/src/app/schemas + - bpp_network_logs_volume:/usr/src/app/logs + + sandbox-api: + image: fidedocker/sandbox-api + container_name: sandbox-api + platform: linux/amd64 + networks: + - beckn_network + ports: + - "4010:4000" + restart: unless-stopped + environment: + - PORT=4000 + - WEBHOOK_URL=http://host.docker.internal:3001/webhook + +networks: + observability: + driver: bridge + beckn_network: + name: beckn_network + driver: bridge + +volumes: + prometheus_data: + grafana_data: + loki_data: + bpp_client_config_volume: + name: bpp_client_config_volume + external: true + bpp_client_schemas_volume: + name: bpp_client_schemas_volume + bpp_client_logs_volume: + name: bpp_client_logs_volume + bpp_network_config_volume: + name: bpp_network_config_volume + external: true + bpp_network_schemas_volume: + name: bpp_network_schemas_volume + bpp_network_logs_volume: + name: bpp_network_logs_volume diff --git a/install/network-observability/grafana/provisioning/dashboards/dashboards.yml b/install/network-observability/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..169c8aa --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,23 @@ +# Dashboard provisioning - load JSON dashboards from the json folder +apiVersion: 1 + +providers: + - name: 'Application' + orgId: 1 + folder: 'Onix / Application' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards/json/application + + - name: 'Network' + orgId: 1 + folder: 'Onix / Network' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards/json/network diff --git a/install/network-observability/grafana/provisioning/dashboards/json/application/metrics-dashboard.json b/install/network-observability/grafana/provisioning/dashboards/json/application/metrics-dashboard.json new file mode 100644 index 0000000..cb97b3b --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/json/application/metrics-dashboard.json @@ -0,0 +1 @@ +{"annotations":{"list":[]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":1,"id":null,"links":[],"liveNow":false,"panels":[{"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":200,"title":"Step","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto"},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":1},"id":1,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"histogram_quantile(0.99, sum(rate(onix_onix_step_execution_duration_seconds_bucket[5m])) by (le, module, role, step)) or histogram_quantile(0.99, sum(rate(onix_step_execution_duration_seconds_bucket[5m])) by (le, module, role, step))","legendFormat":"{{module}} {{role}} {{step}}","refId":"A"}],"title":"Step execution duration (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":1},"id":2,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate(onix_onix_step_executions_total[5m])) by (module, role, step) or sum(rate(onix_step_executions_total[5m])) by (module, role, step)","legendFormat":"{{module}} {{role}} {{step}}","refId":"A"}],"title":"Step executions (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":9},"id":3,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate(onix_onix_step_errors_total[5m])) by (module, role, step) or sum(rate(onix_step_errors_total[5m])) by (module, role, step)","legendFormat":"{{module}} {{role}} {{step}}","refId":"A"}],"title":"Step errors (rate)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":17},"id":201,"title":"Plugin","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":18},"id":4,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_plugin_execution_duration_seconds_bucket[5m]) or rate(onix_plugin_execution_duration_seconds_bucket[5m])","legendFormat":"{{le}}","refId":"A"}],"title":"Plugin execution duration (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":18},"id":7,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_plugin_errors_total[5m]) or rate(onix_plugin_errors_total[5m])","legendFormat":"plugin errors/s","refId":"A"}],"title":"Plugin errors (rate)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":202,"title":"Handler","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":27},"id":5,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_beckn_signature_validations_total[5m]) or rate(onix_onix_beckn_signature_validations_total[5m])","legendFormat":"signature validations/s","refId":"A"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate(onix_beckn_schema_validations_total[5m])) by (schema_version, status) or sum(rate(onix_onix_beckn_schema_validations_total[5m])) by (schema_version, status)","legendFormat":"{{schema_version}} {{status}}","refId":"B"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_routing_decisions_total[5m]) or rate(onix_routing_decisions_total[5m])","legendFormat":"routing decisions/s","refId":"C"}],"title":"Handler (validations & routing)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":35},"id":203,"title":"Cache","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":36},"id":6,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_cache_operations_total[5m]) or rate(onix_cache_operations_total[5m])","legendFormat":"operations/s","refId":"A"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_cache_hits_total[5m]) or rate(onix_cache_hits_total[5m])","legendFormat":"hits/s","refId":"B"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_cache_misses_total[5m]) or rate(onix_cache_misses_total[5m])","legendFormat":"misses/s","refId":"C"}],"title":"Cache (operations, hits, misses)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":44},"id":100,"title":"HTTP","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":45},"id":101,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m]))","legendFormat":"Total","refId":"A"}],"title":"HTTP request rate (total)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":45},"id":102,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (http_status_code)","legendFormat":"{{http_status_code}}","refId":"A"}],"title":"HTTP request rate by status class","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":53},"id":103,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"HTTP request rate by path","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":53},"id":105,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (caller)","legendFormat":"{{caller}}","refId":"A"}],"title":"HTTP request rate by caller","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":61},"id":106,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\",http_status_code=\"2xx\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"2xx request rate by path","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":61},"id":107,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\",http_status_code=\"4xx\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"4xx request rate by path","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":69},"id":108,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\",http_status_code=\"5xx\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"5xx request rate by path","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":77},"id":204,"title":"Go runtime","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":78},"id":9,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_used or onix_onix_go_memory_used or onix_go_memory_used_bytes or onix_onix_go_memory_used_bytes","legendFormat":"{{go_memory_type}}","refId":"A"}],"title":"Go runtime Memory used","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":78},"id":10,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_limit or onix_onix_go_memory_limit","legendFormat":"memory limit","refId":"A"}],"title":"Go runtime – Memory limit","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":86},"id":11,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_allocated or onix_onix_go_memory_allocated or rate(onix_go_memory_allocated_bytes_total[5m]) or rate(onix_onix_go_memory_allocated_bytes_total[5m])","legendFormat":"allocated","refId":"A"}],"title":"Go runtime – Memory allocated (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":86},"id":12,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_allocations or onix_onix_go_memory_allocations or rate(onix_go_memory_allocations_total[5m]) or rate(onix_onix_go_memory_allocations_total[5m])","legendFormat":"allocations/s","refId":"A"}],"title":"Go runtime – Memory allocations (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":94},"id":13,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_gc_goal or onix_onix_go_memory_gc_goal or onix_go_memory_gc_goal_bytes or onix_onix_go_memory_gc_goal_bytes","legendFormat":"GC goal","refId":"A"}],"title":"Go runtime – GC goal","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":94},"id":14,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_goroutine_count or onix_onix_go_goroutine_count","legendFormat":"goroutines","refId":"A"}],"title":"Go runtime – Goroutine count","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":102},"id":15,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_processor_limit or onix_onix_go_processor_limit","legendFormat":"GOMAXPROCS","refId":"A"}],"title":"Go runtime – Processor limit","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto"},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":102},"id":16,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"histogram_quantile(0.99, sum(rate(onix_go_schedule_duration_bucket[5m])) by (le)) or histogram_quantile(0.99, sum(rate(onix_onix_go_schedule_duration_bucket[5m])) by (le))","legendFormat":"p99","refId":"A"}],"title":"Go runtime – Schedule duration (p99)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":110},"id":17,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_config_gogc or onix_onix_go_config_gogc or onix_go_config_gogc_percent or onix_onix_go_config_gogc_percent","legendFormat":"GOGC","refId":"A"}],"title":"Go runtime – GOGC","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":118},"id":205,"title":"Redis ","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":119},"id":301,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_max or onix_onix_db_client_connections_max","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connections max","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":119},"id":302,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_idle_max or onix_onix_db_client_connections_idle_max","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connections idle max","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":127},"id":303,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_idle_min or onix_onix_db_client_connections_idle_min","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connections idle min","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":127},"id":304,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_usage or onix_onix_db_client_connections_usage","legendFormat":"{{pool_name}} {{state}}","refId":"A"}],"title":"Redis client connections usage by state","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":135},"id":305,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_waits or onix_onix_db_client_connections_waits","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection waits","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"ns"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":135},"id":306,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_waits_duration or onix_onix_db_client_connections_waits_duration","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection waits duration","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":143},"id":307,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_timeouts or onix_onix_db_client_connections_timeouts","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection timeouts","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":143},"id":308,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_hits or onix_onix_db_client_connections_hits","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection pool hits","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":151},"id":309,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_misses or onix_onix_db_client_connections_misses","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection pool misses","type":"timeseries"}],"refresh":"10s","schemaVersion":38,"style":"dark","tags":["onix","metrics"],"templating":{"list":[]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Onix Metrics","uid":"onix-metrics","version":1,"weekStart":""} diff --git a/install/network-observability/grafana/provisioning/dashboards/json/application/traces-dashboard.json b/install/network-observability/grafana/provisioning/dashboards/json/application/traces-dashboard.json new file mode 100644 index 0000000..a858a0d --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/json/application/traces-dashboard.json @@ -0,0 +1 @@ +{"annotations":{"list":[]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"id":null,"links":[],"liveNow":false,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":8,"panels":[],"title":"Search by Trace ID","type":"row"},{"datasource":{"type":"jaeger","uid":"jaeger"},"gridPos":{"h":10,"w":24,"x":0,"y":1},"id":11,"options":{"dedupStrategy":"none","enableLogDetails":true,"showCommonLabels":false,"showProcess":false,"sortOrder":"Descend"},"targets":[{"datasource":{"type":"jaeger","uid":"jaeger"},"queryType":"traceId","query":"${traceID}","refId":"A"}],"title":"Trace by ID","type":"traces"},{"gridPos":{"h":4,"w":24,"x":0,"y":11},"id":9,"options":{"content":"**Search traces**: Use **Jaeger UI** at [http://localhost:16686](http://localhost:16686). Select service **onix-ev-charging-bap** and click **Find Traces**. Alternatively, paste a Trace ID in the panel above.","mode":"markdown"},"title":"Search traces (Jaeger UI)","type":"text"},{"datasource":{"type":"jaeger","uid":"jaeger"},"gridPos":{"h":4,"w":24,"x":0,"y":15},"id":10,"options":{"content":"**Trace ID**: Use the **full 32-character hex** from adapter logs (e.g. `7a385394ee77d4451a1c655c236422fc`). Paste above and refresh. **If you see \"No data\"**: (1) Wait 10–15 s after the request — the adapter batches spans before export. (2) Ensure time range (top right) includes the request time. (3) Use **Jaeger UI** at [http://localhost:16686](http://localhost:16686) (service: **onix-ev-charging-bap**) to search; check `otel-collector` logs for export errors if spans are missing.","mode":"markdown"},"title":"How to use","type":"text"}],"refresh":"30s","schemaVersion":38,"style":"dark","tags":["onix","traces","jaeger"],"templating":{"list":[{"current":{"selected":false,"text":"","value":""},"hide":0,"label":"Trace ID (full 32 hex chars)","name":"traceID","options":[{"selected":true,"text":"","value":""}],"query":"","skipUrlSync":false,"type":"textbox"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Onix Traces","uid":"onix-traces","version":1,"weekStart":""} diff --git a/install/network-observability/grafana/provisioning/dashboards/json/network/network-observability-dashboard.json b/install/network-observability/grafana/provisioning/dashboards/json/network/network-observability-dashboard.json new file mode 100644 index 0000000..bcf3edf --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/json/network/network-observability-dashboard.json @@ -0,0 +1,199 @@ +{ + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "Zipkin UI", + "tooltip": "Open Zipkin UI for network API traces", + "type": "link", + "url": "http://localhost:9411" + } + ], + "liveNow": false, + "panels": [ + { + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 1, + "title": "Network Metrics", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, + "id": 2, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (action)", + "legendFormat": "{{action}}", + "refId": "A" + } + ], + "title": "HTTP request rate by action", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, + "id": 3, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (http_status_code)", + "legendFormat": "{{http_status_code}}", + "refId": "A" + } + ], + "title": "HTTP request rate by status", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, + "id": 4, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (role)", + "legendFormat": "{{role}}", + "refId": "A" + } + ], + "title": "HTTP request rate by role", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, + "id": 5, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (caller)", + "legendFormat": "{{caller}}", + "refId": "A" + } + ], + "title": "HTTP request rate by caller", + "type": "timeseries" + }, + { + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 17 }, + "id": 6, + "title": "Network Logs (Beckn Audit)", + "type": "row" + }, + { + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 12, "w": 24, "x": 0, "y": 18 }, + "id": 7, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descend", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { "type": "loki", "uid": "loki" }, + "expr": "{service_name=~\"onix.*|beckn.*\"}", + "refId": "A" + } + ], + "title": "Beckn audit logs (onix/beckn)", + "type": "logs" + }, + { + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 }, + "id": 71, + "options": { "dedupStrategy": "none", "enableLogDetails": true, "showCommonLabels": true, "showLabels": true, "showTime": true, "sortOrder": "Descend" }, + "targets": [{ "datasource": { "type": "loki", "uid": "loki" }, "expr": "{}", "refId": "A" }], + "title": "All logs (debug: matches any)", + "type": "logs" + }, + { + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, + "id": 8, + "title": "Network Traces", + "type": "row" + }, + { + "datasource": { "type": "zipkin", "uid": "zipkin" }, + "gridPos": { "h": 12, "w": 24, "x": 0, "y": 33 }, + "id": 9, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "showCommonLabels": false, + "showProcess": false, + "sortOrder": "Descend" + }, + "targets": [ + { + "datasource": { "type": "zipkin", "uid": "zipkin" }, + "query": "", + "queryType": "traceqlSearch", + "refId": "A" + } + ], + "title": "Network API spans (Zipkin)", + "type": "traces" + }, + { + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 45 }, + "id": 10, + "options": { + "content": "**Network-level observability**: Beckn API spans, audit logs, and HTTP request metrics from the network pipeline.\n\n**No Loki/Zipkin data?** 1) Restart stack after config changes: `docker compose -f network_observability/docker-compose.yml up -d --force-recreate`. 2) Trigger requests to generate audit logs (EmitAuditLogs runs on each request). 3) Use [Zipkin UI](http://localhost:9411) to search traces. 4) In Grafana Explore (Loki), try `{}` or `{service_name=~\".+\"}` to see all logs.", + "mode": "markdown" + }, + "title": "About", + "type": "text" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": ["onix", "network", "observability"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Network Observability", + "uid": "network-observability", + "version": 1, + "weekStart": "" +} diff --git a/install/network-observability/grafana/provisioning/datasources/datasources.yml b/install/network-observability/grafana/provisioning/datasources/datasources.yml new file mode 100644 index 0000000..ec26b4b --- /dev/null +++ b/install/network-observability/grafana/provisioning/datasources/datasources.yml @@ -0,0 +1,32 @@ +# Grafana datasources - provisioned on startup +apiVersion: 1 + +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + + - name: Jaeger + uid: jaeger + type: jaeger + access: proxy + url: http://jaeger:16686 + editable: false + + - name: Loki + uid: loki + type: loki + access: proxy + url: http://loki:3100 + editable: false + + - name: Zipkin + uid: zipkin + type: zipkin + access: proxy + url: http://zipkin:9411 + editable: false diff --git a/install/network-observability/loki/loki-config.yml b/install/network-observability/loki/loki-config.yml new file mode 100644 index 0000000..fb3fd28 --- /dev/null +++ b/install/network-observability/loki/loki-config.yml @@ -0,0 +1,35 @@ +# Loki config for network-level audit logs (OTLP ingestion) +# OTLP requires allow_structured_metadata for Loki 3.x + +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +limits_config: + allow_structured_metadata: true + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +ruler: + alertmanager_url: http://localhost:9093 diff --git a/install/network-observability/otel-collector-bap/config.yaml b/install/network-observability/otel-collector-bap/config.yaml new file mode 100644 index 0000000..995fac6 --- /dev/null +++ b/install/network-observability/otel-collector-bap/config.yaml @@ -0,0 +1,83 @@ +# OpenTelemetry Collector BAP - receives OTLP from BAP adapter (local-beckn-one-bap.yaml) +# App-level: all signals to Prometheus and Jaeger. Network-level: filtered to otel-collector-network. + +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +processors: + batch: + send_batch_size: 1024 + timeout: 10s + batch/traces: + send_batch_size: 1024 + timeout: 2s + + filter/network_metrics: + error_mode: ignore + metrics: + metric: + - 'name != "onix_http_request_count"' + + filter/network_traces: + error_mode: ignore + traces: + span: + - 'attributes["sender.id"] == nil' + +exporters: + prometheus: + endpoint: "0.0.0.0:8889" + namespace: onix + const_labels: + observability: otel-collector-bap + service_name: beckn-one-bap + + otlp_grpc/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + + otlp_http/collector2: + endpoint: http://otel-collector-network:4318 + compression: gzip + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + zpages: + endpoint: 0.0.0.0:55679 + +service: + extensions: [health_check, zpages] + pipelines: + metrics/app: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + + metrics/network: + receivers: [otlp] + processors: [filter/network_metrics, batch] + exporters: [otlp_http/collector2] + + traces/app: + receivers: [otlp] + processors: [batch/traces] + exporters: [otlp_grpc/jaeger] + + traces/network: + receivers: [otlp] + processors: [filter/network_traces, batch/traces] + exporters: [otlp_http/collector2] + + logs/network: + receivers: [otlp] + processors: [batch] + exporters: [otlp_http/collector2] + + telemetry: + logs: + level: info diff --git a/install/network-observability/otel-collector-bpp/config.yaml b/install/network-observability/otel-collector-bpp/config.yaml new file mode 100644 index 0000000..65b0383 --- /dev/null +++ b/install/network-observability/otel-collector-bpp/config.yaml @@ -0,0 +1,83 @@ +# OpenTelemetry Collector BPP - receives OTLP from BPP adapter (local-beckn-one-bpp.yaml) +# App-level: all signals to Prometheus and Jaeger. Network-level: filtered to otel-collector-network. + +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +processors: + batch: + send_batch_size: 1024 + timeout: 10s + batch/traces: + send_batch_size: 1024 + timeout: 2s + + filter/network_metrics: + error_mode: ignore + metrics: + metric: + - 'name != "onix_http_request_count"' + + filter/network_traces: + error_mode: ignore + traces: + span: + - 'attributes["sender.id"] == nil' + +exporters: + prometheus: + endpoint: "0.0.0.0:8891" + namespace: onix + const_labels: + observability: otel-collector-bpp + service_name: beckn-one-bpp + + otlp_grpc/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + + otlp_http/collector2: + endpoint: http://otel-collector-network:4318 + compression: gzip + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + zpages: + endpoint: 0.0.0.0:55679 + +service: + extensions: [health_check, zpages] + pipelines: + metrics/app: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + + metrics/network: + receivers: [otlp] + processors: [filter/network_metrics, batch] + exporters: [otlp_http/collector2] + + traces/app: + receivers: [otlp] + processors: [batch/traces] + exporters: [otlp_grpc/jaeger] + + traces/network: + receivers: [otlp] + processors: [filter/network_traces, batch/traces] + exporters: [otlp_http/collector2] + + logs/network: + receivers: [otlp] + processors: [batch] + exporters: [otlp_http/collector2] + + telemetry: + logs: + level: info diff --git a/install/network-observability/otel-collector-network/config.yaml b/install/network-observability/otel-collector-network/config.yaml new file mode 100644 index 0000000..0a5ab69 --- /dev/null +++ b/install/network-observability/otel-collector-network/config.yaml @@ -0,0 +1,60 @@ +# Collector 2 - receives network-level OTLP from Collector 1, exports to Loki, Zipkin, Prometheus + +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + grpc: + endpoint: 0.0.0.0:4317 + +processors: + # Map Beckn transaction_id -> trace_id and message_id -> span_id for UI correlation. + # UUID format: remove hyphens for trace_id (32 hex chars); first 16 hex chars for span_id. + transform/beckn_ids: + error_mode: ignore + trace_statements: + - set(span.attributes["_beckn_tx"], span.attributes["transaction_id"]) where span.attributes["transaction_id"] != nil + - replace_pattern(span.attributes["_beckn_tx"], "-", "") where span.attributes["_beckn_tx"] != nil + - set(span.trace_id, TraceID(span.attributes["_beckn_tx"])) where span.attributes["_beckn_tx"] != nil + + + batch: + send_batch_size: 1024 + timeout: 10s + +exporters: + prometheus: + endpoint: "0.0.0.0:8890" + namespace: onix_network + const_labels: + observability: network-level + + zipkin: + endpoint: http://zipkin:9411/api/v2/spans + format: json + + otlphttp/loki: + endpoint: http://loki:3100/otlp + compression: gzip + +service: + pipelines: + metrics: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + + traces: + receivers: [otlp] + processors: [transform/beckn_ids, batch] + exporters: [zipkin] + + logs: + receivers: [otlp] + processors: [batch] + exporters: [otlphttp/loki] + + telemetry: + logs: + level: info diff --git a/install/network-observability/prometheus/prometheus.yml b/install/network-observability/prometheus/prometheus.yml new file mode 100644 index 0000000..773f90a --- /dev/null +++ b/install/network-observability/prometheus/prometheus.yml @@ -0,0 +1,20 @@ +# Prometheus - scrapes metrics from OTEL Collectors (BAP, BPP, network) +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "otel-collector-bap" + static_configs: + - targets: ["otel-collector-bap:8889"] + metrics_path: /metrics + + - job_name: "otel-collector-bpp" + static_configs: + - targets: ["otel-collector-bpp:8891"] + metrics_path: /metrics + + - job_name: "otel-collector-network" + static_configs: + - targets: ["otel-collector-network:8890"] + metrics_path: /metrics diff --git a/pkg/model/model.go b/pkg/model/model.go index ac3dc5b..c75c021 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -57,8 +57,8 @@ const ( // ContextKeyParentID is the context key for storing and retrieving the parent ID from a request context ContextKeyParentID ContextKey = "parent_id" - // ContextKeyCallerID is the context key for the caller who is calling the bap/bpp - ContextKeyCallerID ContextKey = "caller_id" + // ContextKeyRemoteID is the context key for the caller who is calling the bap/bpp + ContextKeyRemoteID ContextKey = "caller_id" ) var contextKeys = map[string]ContextKey{ @@ -67,7 +67,7 @@ var contextKeys = map[string]ContextKey{ "subscriber_id": ContextKeySubscriberID, "module_id": ContextKeyModuleID, "parent_id": ContextKeyParentID, - "caller_id": ContextKeyCallerID, + "caller_id": ContextKeyRemoteID, } // ParseContextKey converts a string into a valid ContextKey. diff --git a/pkg/plugin/implementation/otelsetup/cmd/plugin.go b/pkg/plugin/implementation/otelsetup/cmd/plugin.go index a0407d7..b8a92a4 100644 --- a/pkg/plugin/implementation/otelsetup/cmd/plugin.go +++ b/pkg/plugin/implementation/otelsetup/cmd/plugin.go @@ -41,9 +41,18 @@ func (m metricsProvider) New(ctx context.Context, config map[string]string) (*te if v := ctx.Value(model.ContextKeyParentID); v != nil { parentID := v.(string) p := strings.Split(parentID, ":") - deviceId = p[len(p)-1] - producerType = p[0] - producer = p[1] + if len(p) >= 3 { + producerType = p[0] + producer = p[1] + deviceId = p[len(p)-1] + } else if len(p) >= 2 { + producerType = p[0] + producer = p[1] + deviceId = p[1] + } else if len(p) >= 1 { + producerType = p[0] + deviceId = p[0] + } } if deviceId != "" { @@ -97,7 +106,7 @@ func (m metricsProvider) New(ctx context.Context, config map[string]string) (*te } } - //to set network leval matric frequency and granularity + //to set network level matric frequency and granularity if v, ok := config["networkMetricsGranularity"]; ok && v != "" { telemetry.SetNetworkMetricsConfig(v, "") } diff --git a/pkg/plugin/implementation/otelsetup/otelsetup.go b/pkg/plugin/implementation/otelsetup/otelsetup.go index adccc5e..6606437 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup.go @@ -108,7 +108,7 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) }, nil } - //this will be used by both matric and traces + //this will be used by both metric and traces // to build resource with envelope metadata baseAttrs := []attribute.KeyValue{ @@ -123,10 +123,10 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) resMetric, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "METRIC")...)) if err != nil { - return nil, fmt.Errorf("failed to create telemetry resource for matric: %w", err) + return nil, fmt.Errorf("failed to create telemetry resource for metric: %w", err) } - //OTLP matric + //OTLP metric var meterProvider *metric.MeterProvider if cfg.EnableMetrics { metricExpoter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(cfg.OtlpEndpoint), @@ -139,7 +139,7 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) otel.SetMeterProvider(meterProvider) log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s (OTLP endpoint=%s)", cfg.ServiceName, cfg.ServiceVersion, cfg.Environment, cfg.OtlpEndpoint) - // for the go runtime matrics + // for the go runtime metrics if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(runtime.DefaultMinimumReadMemStatsInterval)); err != nil { log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err) } diff --git a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go index 97d74a4..23f1461 100644 --- a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go +++ b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go @@ -75,8 +75,8 @@ func NewPreProcessor(cfg *Config) (func(http.Handler) http.Handler, error) { } if callerID != nil { - log.Debugf(ctx, "adding callerID to request:%s, %v", model.ContextKeyCallerID, callerID) - ctx = context.WithValue(ctx, model.ContextKeyCallerID, callerID) + log.Debugf(ctx, "adding callerID to request:%s, %v", model.ContextKeyRemoteID, callerID) + ctx = context.WithValue(ctx, model.ContextKeyRemoteID, callerID) } for _, key := range cfg.ContextKeys { ctxKey, _ := model.ParseContextKey(key) diff --git a/pkg/telemetry/audit_fields.go b/pkg/telemetry/audit_fields.go index d5e5635..be55789 100644 --- a/pkg/telemetry/audit_fields.go +++ b/pkg/telemetry/audit_fields.go @@ -108,41 +108,6 @@ func getFieldForAction(ctx context.Context, action string) []string { return auditRules["default"] } -//func getByPath(root map[string]interface{}, path string) (interface{}, bool) { -// -// parts := strings.Split(path, ".") -// var cur interface{} = root -// -// for _, part := range parts { -// m, ok := cur.(map[string]interface{}) -// if !ok { -// return nil, false -// } -// v, ok := m[part] -// if !ok { -// return nil, false -// } -// cur = v -// } -// return cur, true -//} -// -//func setByPath(root map[string]interface{}, path string, value interface{}) { -// parts := strings.Split(path, ".") -// cur := root -// -// for i := 0; i < len(parts)-1; i++ { -// k := parts[i] -// next, ok := cur[k].(map[string]interface{}) -// if !ok { -// next = map[string]interface{}{} -// cur[k] = next -// } -// cur = next -// } -// cur[parts[len(parts)-1]] = value -//} - func projectPath(cur interface{}, parts []string) (interface{}, bool) { if len(parts) == 0 { return cur, true diff --git a/pkg/telemetry/pluginMetrics.go b/pkg/telemetry/pluginMetrics.go index a26601b..4ef2912 100644 --- a/pkg/telemetry/pluginMetrics.go +++ b/pkg/telemetry/pluginMetrics.go @@ -50,7 +50,7 @@ var ( AttrMetricGranularity = attribute.Key("metric.granularity") AttrMetricFrequency = attribute.Key("metric.frequency") AttrObservedTimeUnixNano = attribute.Key("observedTimeUnixNano") - AttrMatricLabels = attribute.Key("metric.labels") + AttrMetricLabels = attribute.Key("metric.labels") AttrSenderID = attribute.Key("sender.id") AttrRecipientID = attribute.Key("recipient.id") ) From 80f1cc0f1072edb299e5130a6c0c86b215b93278 Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Thu, 26 Feb 2026 14:57:38 +0530 Subject: [PATCH 7/9] Fix : update the config --- config/onix/adapter.yaml | 16 +++++++++ config/onix/bapTxnCaller-routing.yaml | 48 +++++++++------------------ 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/config/onix/adapter.yaml b/config/onix/adapter.yaml index 90dc3c3..200388c 100644 --- a/config/onix/adapter.yaml +++ b/config/onix/adapter.yaml @@ -1,3 +1,13 @@ +appName: "onix" +log: + level: debug + destinations: + - type: stdout + contextKeys: + - transaction_id + - message_id + - subscriber_id + - module_id http: port: 8080 timeout: @@ -16,6 +26,9 @@ modules: httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: registry: id: registry config: @@ -63,6 +76,9 @@ modules: httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: registry: id: registry config: diff --git a/config/onix/bapTxnCaller-routing.yaml b/config/onix/bapTxnCaller-routing.yaml index 404d0fe..b1d5a44 100644 --- a/config/onix/bapTxnCaller-routing.yaml +++ b/config/onix/bapTxnCaller-routing.yaml @@ -1,41 +1,25 @@ -# ONIX BAP Caller Routing Configuration - -# Supports Phase 1 (Discover Aggregation) and Phase 2+ (Direct BPP Routing) - -# Phase 1: Discover (Aggregation via CDS) - -# Phase 2+: Other Requests (Direct to BPP, NO CDS involvement) - -# These routes use bpp_uri from context (provided in on_discover aggregated response) - routingRules: - - # Phase 1: Discover to CDS - - - domain: ev_charging_network - version: "1.0.0" - targetType: url + - domain: "ONDC:TRV10" + version: "2.0.0" + routingType: "bpp" target: - url: http://mock-cds:8082/csd - excludeAction: false + url: "https://gateway.example.com" endpoints: - - discover - - - - # Phase 2+: Other actions to BPP (via context_endpoint) - - - domain: ev_charging_network - version: "1.0.0" - targetType: bpp - target: {} + - search + - domain: "ONDC:TRV10" + version: "2.0.0" + routingType: "bpp" endpoints: - select - init - confirm - status - - track - cancel - - update - - rating - - support + - domain: "ONDC:TRV12" + version: "2.0.0" + routingType: "bpp" + endpoints: + - select + - init + - confirm + - status \ No newline at end of file From 1293c241f77c80579f04da554827968e4a95cf12 Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Thu, 26 Feb 2026 15:00:28 +0530 Subject: [PATCH 8/9] Fix : update the config --- config/onix/bapTxnReciever-routing.yaml | 29 ++++++++++--------------- config/onix/bppTxnCaller-routing.yaml | 19 +++++++++------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/config/onix/bapTxnReciever-routing.yaml b/config/onix/bapTxnReciever-routing.yaml index b1d5a44..ca4a478 100644 --- a/config/onix/bapTxnReciever-routing.yaml +++ b/config/onix/bapTxnReciever-routing.yaml @@ -1,25 +1,20 @@ routingRules: - domain: "ONDC:TRV10" version: "2.0.0" - routingType: "bpp" + routingType: "url" target: - url: "https://gateway.example.com" + url: "https://services-backend/trv/v1" endpoints: - - search + - on_select + - on_init + - on_confirm + - on_status + - on_update + - on_cancel - domain: "ONDC:TRV10" version: "2.0.0" - routingType: "bpp" + routingType: "msgq" + target: + topic_id: "trv_topic_id1" endpoints: - - select - - init - - confirm - - status - - cancel - - domain: "ONDC:TRV12" - version: "2.0.0" - routingType: "bpp" - endpoints: - - select - - init - - confirm - - status \ No newline at end of file + - on_search \ No newline at end of file diff --git a/config/onix/bppTxnCaller-routing.yaml b/config/onix/bppTxnCaller-routing.yaml index ca4a478..3382537 100644 --- a/config/onix/bppTxnCaller-routing.yaml +++ b/config/onix/bppTxnCaller-routing.yaml @@ -1,20 +1,23 @@ routingRules: - domain: "ONDC:TRV10" version: "2.0.0" - routingType: "url" - target: - url: "https://services-backend/trv/v1" + routingType: "bap" endpoints: + - on_search - on_select - on_init - on_confirm - on_status - on_update - on_cancel - - domain: "ONDC:TRV10" + - domain: "ONDC:TRV11" version: "2.0.0" - routingType: "msgq" - target: - topic_id: "trv_topic_id1" + routingType: "bap" endpoints: - - on_search \ No newline at end of file + - on_search + - on_select + - on_init + - on_confirm + - on_status + - on_update + - on_cancel From 59aa0589203a41e868a10e97b7f6c5113c1d59dd Mon Sep 17 00:00:00 2001 From: Manendra Pal Singh Date: Fri, 27 Feb 2026 19:05:09 +0530 Subject: [PATCH 9/9] Feat: update the pr as per comment --- cmd/adapter/main.go | 15 ++-- core/module/handler/handlerMetrics.go | 1 - core/module/handler/http_metric.go | 5 +- core/module/handler/stdHandler.go | 36 +++----- core/module/handler/step.go | 10 +-- .../network-observability/docker-compose.yml | 85 +++++++----------- pkg/log/log_test.go | 10 +-- pkg/model/model.go | 4 +- pkg/plugin/implementation/cache/cache.go | 22 +++-- .../implementation/otelsetup/otelsetup.go | 86 +++++++++---------- .../otelsetup/otelsetup_test.go | 57 +++++++----- pkg/telemetry/audit.go | 2 - pkg/telemetry/pluginMetrics.go | 4 +- 13 files changed, 159 insertions(+), 178 deletions(-) diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 2af4540..3e913e1 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -233,7 +233,7 @@ func addParentIdCtx(ctx context.Context, config *Config) context.Context { log.Infof(ctx, "Adding POD name: %s", p) podName = p } else { - log.Info(ctx, "POD_NAME environment variable not set falling back to hostname") + log.Info(ctx, "POD_NAME environment variable not set, falling back to hostname") if hostname, err := os.Hostname(); err == nil { log.Infof(ctx, "Setting POD name as hostname: %s", hostname) podName = hostname @@ -243,16 +243,21 @@ func addParentIdCtx(ctx context.Context, config *Config) context.Context { } for _, m := range config.Modules { - if m.Handler.Role != "" && m.Handler.SubscriberID != "" { - parentID = string(m.Handler.Role) + ":" + m.Handler.SubscriberID + ":" + podName - break + if m.Handler.Role == "" || m.Handler.SubscriberID == "" { + continue + } + candidate := string(m.Handler.Role) + ":" + m.Handler.SubscriberID + ":" + podName + if parentID == "" { + parentID = candidate + } else if candidate != parentID { + log.Warnf(ctx, "Multiple distinct role:subscriberID pairs found in modules (using %q, also saw %q); consider explicit parent_id config", parentID, candidate) } } if parentID != "" { ctx = context.WithValue(ctx, model.ContextKeyParentID, parentID) } else { - log.Warnf(ctx, "Failed to find parent ID in config please add the role and subscriber_id in the handler config ") + log.Warnf(ctx, "Failed to find parent ID in config; add role and subscriber_id to the handler config") } return ctx } diff --git a/core/module/handler/handlerMetrics.go b/core/module/handler/handlerMetrics.go index ccc5932..0ed0618 100644 --- a/core/module/handler/handlerMetrics.go +++ b/core/module/handler/handlerMetrics.go @@ -65,4 +65,3 @@ func newHandlerMetrics() (*HandlerMetrics, error) { return m, nil } - diff --git a/core/module/handler/http_metric.go b/core/module/handler/http_metric.go index 9104978..837d03d 100644 --- a/core/module/handler/http_metric.go +++ b/core/module/handler/http_metric.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/http" + "strings" "sync" "github.com/beckn-one/beckn-onix/pkg/telemetry" @@ -79,10 +80,10 @@ func RecordHTTPRequest(ctx context.Context, statusCode int, action, role, sender metric_code := action + "_api_total_count" category := "NetworkHealth" - if action == "/search" || action == "/discovery" { + if strings.HasSuffix(action, "/search") || strings.HasSuffix(action, "/discovery") { category = "Discovery" } - attributes = append(attributes, specHttpMetricAttr(metric_code, category)...) //TODO: need to update as per the furthur discussion + attributes = append(attributes, specHttpMetricAttr(metric_code, category)...) m.HttpRequestCount.Add(ctx, 1, metric.WithAttributes(attributes...)) } diff --git a/core/module/handler/stdHandler.go b/core/module/handler/stdHandler.go index 39bb6f6..63444cc 100644 --- a/core/module/handler/stdHandler.go +++ b/core/module/handler/stdHandler.go @@ -122,19 +122,7 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { record: nil, } - selfID := h.SubscriberID - remoteID := "" - if v, ok := r.Context().Value(model.ContextKeyRemoteID).(string); ok { - remoteID = v - } - var senderID, receiverID string - if strings.Contains(h.moduleName, "Caller") { - senderID = selfID - receiverID = remoteID - } else { - senderID = remoteID - receiverID = selfID - } + senderID, receiverID := h.resolveDirection(r.Context()) httpMeter, _ := GetHTTPMetrics(r.Context()) if httpMeter != nil { recordOnce = func() { @@ -161,7 +149,7 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } body := stepCtx.Body - go telemetry.EmitAuditLogs(r.Context(), body, auditlog.Int("http.response.status_code", wrapped.statusCode), auditlog.String("http.response.error", errString(err))) + telemetry.EmitAuditLogs(r.Context(), body, auditlog.Int("http.response.status_code", wrapped.statusCode), auditlog.String("http.response.error", errString(err))) span.End() }() @@ -385,21 +373,17 @@ func (h *stdHandler) initSteps(ctx context.Context, mgr PluginManager, cfg *Conf return nil } -func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { +func (h *stdHandler) resolveDirection(ctx context.Context) (senderID, receiverID string) { selfID := h.SubscriberID - remoteID := "" - if v, ok := r.Context().Value(model.ContextKeyRemoteID).(string); ok { - remoteID = v - } - - var senderID, receiverID string + remoteID, _ := ctx.Value(model.ContextKeyRemoteID).(string) if strings.Contains(h.moduleName, "Caller") { - senderID = selfID - receiverID = remoteID - } else { - senderID = remoteID - receiverID = selfID + return selfID, remoteID } + return remoteID, selfID +} + +func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { + senderID, receiverID := h.resolveDirection(r.Context()) attrs := []attribute.KeyValue{ telemetry.AttrRecipientID.String(receiverID), telemetry.AttrSenderID.String(senderID), diff --git a/core/module/handler/step.go b/core/module/handler/step.go index 2ea061f..04d6536 100644 --- a/core/module/handler/step.go +++ b/core/module/handler/step.go @@ -41,28 +41,25 @@ func (s *signStep) Run(ctx *model.StepContext) error { return model.NewBadReqErr(fmt.Errorf("subscriberID not set")) } - tracer := otel.Tracer("beckn-onix") + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) var keySet *model.Keyset { - // to create span to finding the key set keySetCtx, keySetSpan := tracer.Start(ctx.Context, "keyset") - defer keySetSpan.End() ks, err := s.km.Keyset(keySetCtx, ctx.SubID) + keySetSpan.End() if err != nil { return fmt.Errorf("failed to get signing key: %w", err) } keySet = ks - } { - // to create span for the signa signerCtx, signerSpan := tracer.Start(ctx.Context, "sign") - defer signerSpan.End() createdAt := time.Now().Unix() validTill := time.Now().Add(5 * time.Minute).Unix() sign, err := s.signer.Sign(signerCtx, ctx.Body, keySet.SigningPrivate, createdAt, validTill) + signerSpan.End() if err != nil { return fmt.Errorf("failed to sign request: %w", err) } @@ -73,7 +70,6 @@ func (s *signStep) Run(ctx *model.StepContext) error { header = model.AuthHeaderGateway } ctx.Request.Header.Set(header, authHeader) - } return nil diff --git a/install/network-observability/docker-compose.yml b/install/network-observability/docker-compose.yml index 0c63e1b..d354c53 100644 --- a/install/network-observability/docker-compose.yml +++ b/install/network-observability/docker-compose.yml @@ -196,46 +196,41 @@ services: - zipkin - loki - bpp-client: - image: fidedocker/protocol-server - container_name: bpp-client + sandbox-bap: + container_name: sandbox-bap + image: fidedocker/sandbox-2.0:latest platform: linux/amd64 - networks: - - beckn_network - ports: - - "6001:6001" - restart: unless-stopped - volumes: - - bpp_client_config_volume:/usr/src/app/config - - bpp_client_schemas_volume:/usr/src/app/schemas - - bpp_client_logs_volume:/usr/src/app/logs - - bpp-network: - image: fidedocker/protocol-server - container_name: bpp-network - platform: linux/amd64 - networks: - - beckn_network - ports: - - "6002:6002" - restart: unless-stopped - volumes: - - bpp_network_config_volume:/usr/src/app/config - - bpp_network_schemas_volume:/usr/src/app/schemas - - bpp_network_logs_volume:/usr/src/app/logs - - sandbox-api: - image: fidedocker/sandbox-api - container_name: sandbox-api - platform: linux/amd64 - networks: - - beckn_network - ports: - - "4010:4000" - restart: unless-stopped environment: - - PORT=4000 - - WEBHOOK_URL=http://host.docker.internal:3001/webhook + - NODE_ENV=production + - PORT=3001 + ports: + - "3001:3001" + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3001/api/health"] + interval: 10s + timeout: 3s + retries: 5 + start_period: 10s + networks: + - beckn_network + + sandbox-bpp: + container_name: sandbox-bpp + image: fidedocker/sandbox-2.0:latest + platform: linux/amd64 + environment: + - NODE_ENV=production + - PORT=3002 + ports: + - "3002:3002" + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3002/api/health"] + interval: 10s + timeout: 3s + retries: 5 + start_period: 10s + networks: + - beckn_network networks: observability: @@ -248,17 +243,3 @@ volumes: prometheus_data: grafana_data: loki_data: - bpp_client_config_volume: - name: bpp_client_config_volume - external: true - bpp_client_schemas_volume: - name: bpp_client_schemas_volume - bpp_client_logs_volume: - name: bpp_client_logs_volume - bpp_network_config_volume: - name: bpp_network_config_volume - external: true - bpp_network_schemas_volume: - name: bpp_network_schemas_volume - bpp_network_logs_volume: - name: bpp_network_logs_volume diff --git a/pkg/log/log_test.go b/pkg/log/log_test.go index 334788b..12ef241 100644 --- a/pkg/log/log_test.go +++ b/pkg/log/log_test.go @@ -250,11 +250,11 @@ func TestError(t *testing.T) { func TestRequest(t *testing.T) { logPath := setupLogger(t, InfoLevel) ctx := context.WithValue(context.Background(), requestID, "abc-123") - ctx = context.WithValue(context.Background(), transaction_id, "transaction-id-123-") - ctx = context.WithValue(context.Background(), message_id, "message-id-123") - ctx = context.WithValue(context.Background(), subscriber_id, "subscriber-id-123") - ctx = context.WithValue(context.Background(), module_id, "module-id-123") - ctx = context.WithValue(context.Background(), parent_id, "parent-id-123") + ctx = context.WithValue(ctx, transaction_id, "transaction-id-123-") + ctx = context.WithValue(ctx, message_id, "message-id-123") + ctx = context.WithValue(ctx, subscriber_id, "subscriber-id-123") + ctx = context.WithValue(ctx, module_id, "module-id-123") + ctx = context.WithValue(ctx, parent_id, "parent-id-123") req, _ := http.NewRequest("POST", "/api/test", bytes.NewBuffer([]byte(`{"key":"value"}`))) req.RemoteAddr = "127.0.0.1:8080" diff --git a/pkg/model/model.go b/pkg/model/model.go index c75c021..5c7fb7e 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -58,7 +58,7 @@ const ( ContextKeyParentID ContextKey = "parent_id" // ContextKeyRemoteID is the context key for the caller who is calling the bap/bpp - ContextKeyRemoteID ContextKey = "caller_id" + ContextKeyRemoteID ContextKey = "remote_id" ) var contextKeys = map[string]ContextKey{ @@ -67,7 +67,7 @@ var contextKeys = map[string]ContextKey{ "subscriber_id": ContextKeySubscriberID, "module_id": ContextKeyModuleID, "parent_id": ContextKeyParentID, - "caller_id": ContextKeyRemoteID, + "remote_id": ContextKeyRemoteID, } // ParseContextKey converts a string into a valid ContextKey. diff --git a/pkg/plugin/implementation/cache/cache.go b/pkg/plugin/implementation/cache/cache.go index 334e0f1..c99ec16 100644 --- a/pkg/plugin/implementation/cache/cache.go +++ b/pkg/plugin/implementation/cache/cache.go @@ -114,22 +114,25 @@ func New(ctx context.Context, cfg *Config) (*Cache, func() error, error) { // Get retrieves the value for the specified key from Redis. func (c *Cache) Get(ctx context.Context, key string) (string, error) { - result, err := c.Client.Get(ctx, key).Result() + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) + spanCtx, span := tracer.Start(ctx, "redis_get") + defer span.End() + result, err := c.Client.Get(spanCtx, key).Result() if c.metrics != nil { attrs := []attribute.KeyValue{ telemetry.AttrOperation.String("get"), } switch { case err == redis.Nil: - c.metrics.CacheMissesTotal.Add(ctx, 1, metric.WithAttributes(attrs...)) - c.metrics.CacheOperationsTotal.Add(ctx, 1, + c.metrics.CacheMissesTotal.Add(spanCtx, 1, metric.WithAttributes(attrs...)) + c.metrics.CacheOperationsTotal.Add(spanCtx, 1, metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("miss"))...)) case err != nil: - c.metrics.CacheOperationsTotal.Add(ctx, 1, + c.metrics.CacheOperationsTotal.Add(spanCtx, 1, metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("error"))...)) default: - c.metrics.CacheHitsTotal.Add(ctx, 1, metric.WithAttributes(attrs...)) - c.metrics.CacheOperationsTotal.Add(ctx, 1, + c.metrics.CacheHitsTotal.Add(spanCtx, 1, metric.WithAttributes(attrs...)) + c.metrics.CacheOperationsTotal.Add(spanCtx, 1, metric.WithAttributes(append(attrs, telemetry.AttrStatus.String("hit"))...)) } } @@ -149,8 +152,11 @@ func (c *Cache) Set(ctx context.Context, key, value string, ttl time.Duration) e // Delete removes the specified key from Redis. func (c *Cache) Delete(ctx context.Context, key string) error { - err := c.Client.Del(ctx, key).Err() - c.recordOperation(ctx, "delete", err) + tracer := otel.Tracer(telemetry.ScopeName, trace.WithInstrumentationVersion(telemetry.ScopeVersion)) + spanCtx, span := tracer.Start(ctx, "redis_delete") + defer span.End() + err := c.Client.Del(spanCtx, key).Err() + c.recordOperation(spanCtx, "delete", err) return err } diff --git a/pkg/plugin/implementation/otelsetup/otelsetup.go b/pkg/plugin/implementation/otelsetup/otelsetup.go index 6606437..b9c4703 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup.go @@ -30,18 +30,19 @@ type Setup struct{} // Config represents OpenTelemetry related configuration. type Config struct { - ServiceName string `yaml:"serviceName"` - ServiceVersion string `yaml:"serviceVersion"` - Environment string `yaml:"environment"` - Domain string `yaml:"domain"` - DeviceID string `yaml:"deviceID"` - EnableMetrics bool `yaml:"enableMetrics"` - EnableTracing bool `yaml:"enableTracing"` - EnableLogs bool `yaml:"enableLogs"` - OtlpEndpoint string `yaml:"otlpEndpoint"` - TimeInterval int64 `yaml:"timeInterval"` - Producer string `yaml:"producer"` - ProducerType string `yaml:"producerType"` + ServiceName string `yaml:"serviceName"` + ServiceVersion string `yaml:"serviceVersion"` + Environment string `yaml:"environment"` + Domain string `yaml:"domain"` + DeviceID string `yaml:"deviceID"` + EnableMetrics bool `yaml:"enableMetrics"` + EnableTracing bool `yaml:"enableTracing"` + EnableLogs bool `yaml:"enableLogs"` + OtlpEndpoint string `yaml:"otlpEndpoint"` + TimeInterval int64 `yaml:"timeInterval"` + AuditFieldsConfig string `yaml:"auditFieldsConfig"` + Producer string `yaml:"producer"` + ProducerType string `yaml:"producerType"` } // DefaultConfig returns sensible defaults for telemetry configuration. @@ -62,13 +63,17 @@ func ToPluginConfig(cfg *Config) *plugin.Config { return &plugin.Config{ ID: "otelsetup", Config: map[string]string{ - "serviceName": cfg.ServiceName, - "serviceVersion": cfg.ServiceVersion, - "environment": cfg.Environment, - "enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics), - "enableTracing": fmt.Sprintf("%t", cfg.EnableTracing), - "otelEndpoint": cfg.OtlpEndpoint, - "deviceID": cfg.DeviceID, + "serviceName": cfg.ServiceName, + "serviceVersion": cfg.ServiceVersion, + "environment": cfg.Environment, + "domain": cfg.Domain, + "enableMetrics": fmt.Sprintf("%t", cfg.EnableMetrics), + "enableTracing": fmt.Sprintf("%t", cfg.EnableTracing), + "enableLogs": fmt.Sprintf("%t", cfg.EnableLogs), + "otlpEndpoint": cfg.OtlpEndpoint, + "deviceID": cfg.DeviceID, + "timeInterval": fmt.Sprintf("%d", cfg.TimeInterval), + "auditFieldsConfig": cfg.AuditFieldsConfig, }, } } @@ -101,16 +106,13 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) cfg.TimeInterval = DefaultConfig().TimeInterval } - if !cfg.EnableMetrics && !cfg.EnableTracing { - log.Info(ctx, "OpenTelemetry metrics and tracing are disabled") + if !cfg.EnableMetrics && !cfg.EnableTracing && !cfg.EnableLogs { + log.Info(ctx, "OpenTelemetry metrics, tracing, and logs are all disabled") return &telemetry.Provider{ Shutdown: func(context.Context) error { return nil }, }, nil } - //this will be used by both metric and traces - - // to build resource with envelope metadata baseAttrs := []attribute.KeyValue{ attribute.String("service.name", cfg.ServiceName), attribute.String("service.version", cfg.ServiceVersion), @@ -121,53 +123,49 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) attribute.String("producer", cfg.Producer), } - resMetric, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "METRIC")...)) - if err != nil { - return nil, fmt.Errorf("failed to create telemetry resource for metric: %w", err) - } - - //OTLP metric var meterProvider *metric.MeterProvider if cfg.EnableMetrics { - metricExpoter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(cfg.OtlpEndpoint), + resMetric, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "METRIC")...)) + if err != nil { + return nil, fmt.Errorf("failed to create telemetry resource for metric: %w", err) + } + metricExporter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(cfg.OtlpEndpoint), otlpmetricgrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials()))) if err != nil { return nil, fmt.Errorf("failed to create OTLP metric exporter: %w", err) } - reader := metric.NewPeriodicReader(metricExpoter, metric.WithInterval(time.Second*time.Duration(cfg.TimeInterval))) + reader := metric.NewPeriodicReader(metricExporter, metric.WithInterval(time.Second*time.Duration(cfg.TimeInterval))) meterProvider = metric.NewMeterProvider(metric.WithReader(reader), metric.WithResource(resMetric)) otel.SetMeterProvider(meterProvider) log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s (OTLP endpoint=%s)", cfg.ServiceName, cfg.ServiceVersion, cfg.Environment, cfg.OtlpEndpoint) - // for the go runtime metrics if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(runtime.DefaultMinimumReadMemStatsInterval)); err != nil { log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err) } } - //OTLP traces - restrace, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "API")...)) - if err != nil { - return nil, fmt.Errorf("failed to create trace resource: %w", err) - } var traceProvider *trace.TracerProvider if cfg.EnableTracing { - traceExpoter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithEndpoint(cfg.OtlpEndpoint), otlptracegrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials()))) + resTrace, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "API")...)) + if err != nil { + return nil, fmt.Errorf("failed to create trace resource: %w", err) + } + traceExporter, err := otlptracegrpc.New(ctx, otlptracegrpc.WithEndpoint(cfg.OtlpEndpoint), otlptracegrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials()))) if err != nil { return nil, fmt.Errorf("failed to create OTLP trace exporter: %w", err) } - traceProvider = trace.NewTracerProvider(trace.WithBatcher(traceExpoter), trace.WithResource(restrace)) //TODO: need to add the trace sampleing rate + traceProvider = trace.NewTracerProvider(trace.WithBatcher(traceExporter), trace.WithResource(resTrace)) otel.SetTracerProvider(traceProvider) log.Infof(ctx, "OpenTelemetry tracing initialized for service=%s (OTLP endpoint=%s)", cfg.ServiceName, cfg.OtlpEndpoint) } - resAudit, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "AUDIT")...)) - if err != nil { - return nil, fmt.Errorf("failed to create audit resource: %w", err) - } var logProvider *logsdk.LoggerProvider if cfg.EnableLogs { + resAudit, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "AUDIT")...)) + if err != nil { + return nil, fmt.Errorf("failed to create audit resource: %w", err) + } logExporter, err := otlploggrpc.New(ctx, otlploggrpc.WithEndpoint(cfg.OtlpEndpoint), otlploggrpc.WithDialOption(grpc.WithTransportCredentials(insecure.NewCredentials()))) if err != nil { return nil, fmt.Errorf("failed to create OTLP logs exporter: %w", err) diff --git a/pkg/plugin/implementation/otelsetup/otelsetup_test.go b/pkg/plugin/implementation/otelsetup/otelsetup_test.go index 81d5afb..4fc3f1b 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup_test.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup_test.go @@ -169,6 +169,7 @@ func TestToPluginConfig_Success(t *testing.T) { ServiceVersion: "1.0.0", EnableMetrics: true, EnableTracing: true, + EnableLogs: true, Environment: "test", Domain: "test-domain", DeviceID: "test-device", @@ -177,13 +178,17 @@ func TestToPluginConfig_Success(t *testing.T) { }, expectedID: "otelsetup", expectedConfig: map[string]string{ - "serviceName": "test-service", - "serviceVersion": "1.0.0", - "environment": "test", - "enableMetrics": "true", - "enableTracing": "true", - "otelEndpoint": "localhost:4317", - "deviceID": "test-device", + "serviceName": "test-service", + "serviceVersion": "1.0.0", + "environment": "test", + "domain": "test-domain", + "enableMetrics": "true", + "enableTracing": "true", + "enableLogs": "true", + "otlpEndpoint": "localhost:4317", + "deviceID": "test-device", + "timeInterval": "5", + "auditFieldsConfig": "", }, }, { @@ -197,13 +202,17 @@ func TestToPluginConfig_Success(t *testing.T) { }, expectedID: "otelsetup", expectedConfig: map[string]string{ - "serviceName": "my-service", - "serviceVersion": "2.0.0", - "environment": "production", - "enableMetrics": "false", - "enableTracing": "false", - "otelEndpoint": "", - "deviceID": "", + "serviceName": "my-service", + "serviceVersion": "2.0.0", + "environment": "production", + "domain": "", + "enableMetrics": "false", + "enableTracing": "false", + "enableLogs": "false", + "otlpEndpoint": "", + "deviceID": "", + "timeInterval": "0", + "auditFieldsConfig": "", }, }, { @@ -220,13 +229,17 @@ func TestToPluginConfig_Success(t *testing.T) { }, expectedID: "otelsetup", expectedConfig: map[string]string{ - "serviceName": "", - "serviceVersion": "", - "environment": "", - "enableMetrics": "true", - "enableTracing": "false", - "otelEndpoint": "", - "deviceID": "", + "serviceName": "", + "serviceVersion": "", + "environment": "", + "domain": "", + "enableMetrics": "true", + "enableTracing": "false", + "enableLogs": "false", + "otlpEndpoint": "", + "deviceID": "", + "timeInterval": "0", + "auditFieldsConfig": "", }, }, } @@ -298,7 +311,7 @@ func TestToPluginConfig_BooleanConversion(t *testing.T) { require.NotNil(t, result) assert.Equal(t, tt.expectedMetric, result.Config["enableMetrics"], "enableMetrics should be converted to string correctly") assert.Equal(t, tt.expectedTrace, result.Config["enableTracing"], "enableTracing should be converted to string correctly") - assert.Equal(t, "localhost:4317", result.Config["otelEndpoint"], "otelEndpoint should be included") + assert.Equal(t, "localhost:4317", result.Config["otlpEndpoint"], "otlpEndpoint should be included") assert.Equal(t, "test-device", result.Config["deviceID"], "deviceID should be included") }) } diff --git a/pkg/telemetry/audit.go b/pkg/telemetry/audit.go index 2398c3c..25308eb 100644 --- a/pkg/telemetry/audit.go +++ b/pkg/telemetry/audit.go @@ -23,8 +23,6 @@ func EmitAuditLogs(ctx context.Context, body []byte, attrs ...log.KeyValue) { return } - //maskedBody := MaskPIIInAuditBody(body) - sum := sha256.Sum256(body) auditBody := selectAuditPayload(ctx, body) auditlog := provider.Logger(auditLoggerName) diff --git a/pkg/telemetry/pluginMetrics.go b/pkg/telemetry/pluginMetrics.go index 4ef2912..f9843fa 100644 --- a/pkg/telemetry/pluginMetrics.go +++ b/pkg/telemetry/pluginMetrics.go @@ -57,8 +57,8 @@ var ( var ( networkMetricsCfgMu sync.RWMutex - networkMetricsGranularity = "10mim" // default - networkMetricsFrequency = "10mim" // default + networkMetricsGranularity = "10min" // default + networkMetricsFrequency = "10min" // default ) func SetNetworkMetricsConfig(granularity, frequency string) {