diff --git a/CONFIG.md b/CONFIG.md index e04a431..1c62d19 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -199,9 +199,7 @@ log: #### `plugins.otelsetup` **Type**: `object` **Required**: No -**Description**: OpenTelemetry configuration controlling whether the Prometheus exporter is enabled. - -**Important**: This block is optional—omit it to run without telemetry. When present, the `/metrics` endpoint is exposed on a separate port (configurable via `metricsPort`) only if `enableMetrics: true`. +**Description**: OpenTelemetry (OTLP) configuration for metrics, traces, and logs. When configured, telemetry is exported to an OTLP collector endpoint. Omit this block to run without telemetry. ##### Parameters: @@ -215,11 +213,10 @@ log: **Required**: Yes **Description**: Plugin configuration parameters. -###### `config.enableMetrics` -**Type**: `string` (boolean) -**Required**: No -**Default**: `"true"` -**Description**: Enables metrics collection and the `/metrics` endpoint. Must be `"true"` or `"false"` as a string. +###### `config.otlpEndpoint` +**Type**: `string` +**Required**: Yes (when OtelSetup is used) +**Description**: OTLP gRPC endpoint (host:port) for exporting metrics, traces, and logs. Example: `"localhost:4317"`, `"otel-collector-bap:4317"`. ###### `config.serviceName` **Type**: `string` @@ -238,47 +235,114 @@ log: **Default**: `"development"` **Description**: Sets the `deployment.environment` attribute (e.g., `development`, `staging`, `production`). -###### `config.metricsPort` +###### `config.domain` **Type**: `string` **Required**: No **Default**: `"9090"` **Description**: Port on which the metrics HTTP server will listen. The metrics endpoint is hosted on a separate server from the main application. -**Example - Enable Metrics** (matches `config/local-simple.yaml`): +###### `config.enableMetrics` +**Type**: `string` (boolean) +**Required**: No +**Default**: `"false"` +**Description**: Enables metrics collection and OTLP metric export. Use `"true"` or `"false"` as a string. + +###### `config.enableTracing` +**Type**: `string` (boolean) +**Required**: No +**Default**: `"false"` +**Description**: Enables trace export via OTLP. Use `"true"` or `"false"` as a string. + +###### `config.enableLogs` +**Type**: `string` (boolean) +**Required**: No +**Default**: `"false"` +**Description**: Enables log export via OTLP (e.g. audit logs). Use `"true"` or `"false"` as a string. + +###### `config.timeInterval` +**Type**: `string` (integer) +**Required**: No +**Default**: `"5"` +**Description**: Time interval in seconds used for periodic metric export or batching. + +###### `config.auditFieldsConfig` +**Type**: `string` +**Required**: No +**Description**: Path to a YAML file that defines which request/response fields are included in audit logs, per action. See [Audit fields configuration](#audit-fields-configuration). Example: `"/app/config/audit-fields.yaml"`. + + +**Example - OTLP export with audit logs** (e.g. `config/local-beckn-one-bap.yaml`): ```yaml plugins: otelsetup: id: otelsetup config: - serviceName: "beckn-onix" + serviceName: "beckn-one-bap" serviceVersion: "1.0.0" - enableMetrics: "true" environment: "development" - metricsPort: "9090" + domain: "ev_charging" + otlpEndpoint: "otel-collector-bap:4317" + enableMetrics: "true" + enableTracing: "true" + enableLogs: "true" + timeInterval: "5" + networkMetricsGranularity: "2min" + networkMetricsFrequency: "4min" + auditFieldsConfig: "/app/config/audit-fields.yaml" ``` -### Accessing Metrics -When `plugins.otelsetup.config.enableMetrics: "true"`, the metrics endpoint is hosted on a separate HTTP server. Scrape metrics at: -``` -http://your-server:9090/metrics +### Audit fields configuration + +When `config.auditFieldsConfig` points to a YAML file, audit logs (emitted via OTLP when `enableLogs: "true"`) include only the fields you list per action. The file format: + +```yaml +auditRules: + default: # Optional: fallback for actions without a specific list + - context.transaction_id + - context.message_id + - context.action + - context.domain + - context.bap_id + - context.bpp_id + discover: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.filters + - message.spatial + select: + - context.transaction_id + - context.message_id + - context.action + - message.order.beckn:buyer.beckn:id + # ... more dot-path fields ``` -**Note**: The metrics server runs on the port specified by `config.metricsPort` (default: `9090`), which is separate from the main application port configured in `http.port`. +- **Top-level key**: `auditRules`. +- **Action keys**: Use Beckn action names (e.g. `discover`, `select`, `init`, `confirm`, `update`, `track`, `cancel`, `rating`, `support`). Use `default` for actions that do not have a specific list. +- **Values**: List of dot-path strings into the request/response JSON (e.g. `context.transaction_id`, `message.order.beckn:id`). Namespaced keys use colons (e.g. `beckn:id`). + +See `config/audit-fields.yaml` for a full example. ### Metrics Collected +When OtelSetup is configured with `otlpEndpoint`, metrics and traces are exported via OTLP (no separate metrics HTTP server). Scrape metrics from your OTLP collector + Metrics are organized by module for better maintainability and encapsulation: #### OTel Setup (from `otelsetup` plugin) -- Prometheus exporter & `/metrics` endpoint on separate HTTP server -- Go runtime instrumentation (`go_*`), resource attributes, and meter provider wiring +- OTLP export for metrics, traces, and logs (gRPC endpoint). +- Go runtime instrumentation (`go_*`), resource attributes, and meter/tracer provider wiring. +- When `enableLogs: "true"` and `auditFieldsConfig` is set, audit logs are emitted via OTLP with fields defined in the audit-fields YAML. #### Step Execution Metrics (from `telemetry` package) - `onix_step_executions_total`, `onix_step_execution_duration_seconds`, `onix_step_errors_total` #### Handler Metrics (from `handler` module) +- `onix_http_request_count` – HTTP requests by status class, route, method, role, sender, recipient (and optional network metric attributes). - `beckn_signature_validations_total` - Signature validation attempts - `beckn_schema_validations_total` - Schema validation attempts - `onix_routing_decisions_total` - Routing decisions taken by handler @@ -752,12 +816,12 @@ publisher: middleware: - id: reqpreprocessor config: - uuidKeys: transaction_id,message_id role: bap + contextKeys: transaction_id,message_id,subscriber_id,module_id ``` **Parameters**: -- `uuidKeys`: Comma-separated list of fields to auto-generate UUIDs for if missing +- `contextKeys`: Comma-separated list of fields to auto-generate UUIDs for if missing - `role`: BAP or BPP role for request processing --- diff --git a/README.md b/README.md index 484c0f5..1ca152c 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ The **Beckn Protocol** is an open protocol that enables location-aware, local co - **Decrypter**: AES decryption for encrypted data processing - **ReqPreprocessor**: Request preprocessing (UUID generation, headers) - **ReqMapper**: Middleware to transform payload either between Beckn versions or against other platforms. -- **OtelSetup**: Observability Setup to make metrics, traces and logs available +- **OtelSetup**: Observability setup for metrics, traces, and logs (OTLP). Supports optional audit log configuration via `auditFieldsConfig` (YAML mapping actions to fields) . See [CONFIG.md](CONFIG.md) for details. ## Quick Start @@ -330,10 +330,11 @@ modules: ### Deployment Modes 1. **Combined Mode**: Single instance handling both BAP and BPP (`config/onix/`) - Uses `secretskeymanager` (HashiCorp Vault) for production key management -2. **BAP-Only Mode**: Dedicated buyer-side deployment (`config/onix-bap/`) -3. **BPP-Only Mode**: Dedicated seller-side deployment (`config/onix-bpp/`) -4. **Local Development Combined Mode**: Simplified configuration (`config/local-simple.yaml`) - Uses `simplekeymanager` with embedded Ed25519 keys, no vault setup needed. -5. **Local Development Combined Mode (Alternative)**: Development configuration (`config/local-dev.yaml`) - Uses `keymanager` vault setup needed +2. **BAP-Only Mode**: Dedicated buyer-side deployment (`config/onix-bap/`) +3. **BPP-Only Mode**: Dedicated seller-side deployment (`config/onix-bpp/`) +4. **Local Development Combined Mode**: Simplified configuration (`config/local-simple.yaml`) - Uses `simplekeymanager` with embedded Ed25519 keys, no vault setup needed +5. **Local Development Combined Mode (Alternative)**: Development configuration (`config/local-dev.yaml`) - Uses `keymanager`, vault setup needed +6. **Local with Observability (BAP/BPP)**: Configs `config/local-beckn-one-bap.yaml` and `config/local-beckn-one-bpp.yaml` include OtelSetup (metrics, traces, audit logs) for use with an OTLP collector. Audit fields are configured via `config/audit-fields.yaml`. For a full stack (collectors, Grafana, Loki), see `install/network-observability/` ## API Endpoints @@ -359,14 +360,6 @@ modules: | POST | `/bpp/receiver/*` | Receives all BAP requests | | POST | `/bpp/caller/on_*` | Sends responses back to BAP | -### Observability Endpoints - -| Method | Endpoint | Description | -|--------|----------|-------------| -| GET | `/health` | Health check endpoint | -| GET | `/metrics` | Prometheus metrics endpoint (when telemetry is enabled) | - -**Note**: The `/metrics` endpoint is available when `telemetry.enableMetrics: true` in the configuration file. It returns metrics in Prometheus format. ## Documentation diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 518a419..2af4540 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -53,7 +53,7 @@ var runFunc = run func main() { // Define and parse command-line flags. - flag.StringVar(&configPath, "config", "config/onix/adapter.yaml", "Path to the configuration file") + flag.StringVar(&configPath, "config", "../../config/onix/adapter.yaml", "Path to the configuration file") flag.Parse() // Use custom log for initial setup messages. diff --git a/config/audit-fields.yaml b/config/audit-fields.yaml new file mode 100644 index 0000000..4fe1fce --- /dev/null +++ b/config/audit-fields.yaml @@ -0,0 +1,89 @@ +auditRules: + default: + - context.transaction_id + - context.message_id + - context.action + - context.domain + - context.bap_id + - context.bpp_id + + discover: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.filters + - message.spatial + + select: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:buyer.beckn:id + - message.order.beckn:seller + - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:id + - message.order.beckn:orderAttributes + + init: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:buyer.beckn:id + - message.order.beckn:orderValue.value + - message.order.beckn:payment.beckn:paymentStatus + + confirm: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:orderStatus + - message.order.beckn:buyer.beckn:id + - message.order.beckn:payment.beckn:txnRef + - message.order.beckn:payment.beckn:paymentStatus + + update: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:orderStatus + - message.order.beckn:fulfillment.beckn:deliveryAttributes.sessionStatus + + track: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + + cancel: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.order.beckn:id + - message.order.beckn:orderStatus + - message.order.beckn:buyer.beckn:id + + rating: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.id + - message.value + - message.category + + support: + - context.transaction_id + - context.message_id + - context.action + - context.timestamp + - message.ref_id + - message.ref_type diff --git a/config/local-beckn-one-bap.yaml b/config/local-beckn-one-bap.yaml index 67ae76e..77a11e4 100644 --- a/config/local-beckn-one-bap.yaml +++ b/config/local-beckn-one-bap.yaml @@ -16,6 +16,25 @@ http: idle: 30 pluginManager: root: ./plugins + +# OpenTelemetry (OTLP) - metrics and traces sent to OTEL collector +plugins: + otelsetup: + id: otelsetup + config: + serviceName: "beckn-one-bap" + serviceVersion: "1.0.0" + environment: "development" + domain: "ev_charging" + otlpEndpoint: "otel-collector-bap:4317" + enableMetrics: "true" + networkMetricsGranularity: "2min" + networkMetricsFrequency: "4min" + enableTracing: "true" + enableLogs: "true" + timeInterval: "5" + auditFieldsConfig: "/app/config/audit-fields.yaml" + modules: - name: bapTxnReceiver path: /bap/receiver/ diff --git a/config/local-beckn-one-bpp.yaml b/config/local-beckn-one-bpp.yaml index af5ff8f..fa941ea 100644 --- a/config/local-beckn-one-bpp.yaml +++ b/config/local-beckn-one-bpp.yaml @@ -16,6 +16,23 @@ http: idle: 30 pluginManager: root: ./plugins +# OpenTelemetry (OTLP) - metrics and traces sent to OTEL collector, then to Loki/backend +plugins: + otelsetup: + id: otelsetup + config: + serviceName: "beckn-one-bpp" + serviceVersion: "1.0.0" + environment: "development" + domain: "ev_charging" + otlpEndpoint: "otel-collector-bpp:4317" + enableMetrics: "true" + networkMetricsGranularity: "2min" + networkMetricsFrequency: "4min" + enableTracing: "true" + enableLogs: "true" + timeInterval: "5" + auditFieldsConfig: "/app/config/audit-fields.yaml" modules: - name: bppTxnReceiver path: /bpp/receiver/ diff --git a/config/onix/adapter.local.yaml b/config/onix/adapter.local.yaml deleted file mode 100644 index ebaff50..0000000 --- a/config/onix/adapter.local.yaml +++ /dev/null @@ -1,221 +0,0 @@ -appName: "onix" -log: - level: debug - destinations: - - type: stdout - contextKeys: - - transaction_id - - message_id - - subscriber_id - - module_id -http: - port: 8080 - timeout: - read: 30 - write: 30 - idle: 30 -pluginManager: - root: ./plugins - remoteRoot: "" -modules: - - name: bapTxnReciever - path: /bap/reciever/ - handler: - type: std - role: bap - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 10.81.192.4:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signValidator: - id: signvalidator - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bapTxnReciever-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bap - steps: - - validateSign - - addRoute - - validateSchema - - name: bapTxnCaller - path: /bap/caller/ - handler: - type: std - role: bap - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signer: - id: signer - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bapTxnCaller-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bap - steps: - - validateSchema - - addRoute - - sign - - name: bppTxnReciever - path: /bpp/reciever/ - handler: - type: std - role: bpp - subscriberId: bpp1 - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signValidator: - id: signvalidator - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bppTxnReciever-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bpp - steps: - - validateSign - - addRoute - - validateSchema - - name: bppTxnCaller - path: /bpp/caller/ - handler: - type: std - role: bpp - httpClientConfig: - maxIdleConns: 1000 - maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: - registry: - id: registry - config: - url: http://localhost:8080/reg - retry_max: 3 - retry_wait_min: 100ms - retry_wait_max: 500ms - keyManager: - id: secretskeymanager - config: - projectID: ${projectID} - cache: - id: redis - config: - addr: 192.168.1.1:6379 - schemaValidator: - id: schemavalidator - config: - schemaDir: /mnt/gcs/configs/schemas - signer: - id: signer - publisher: - id: publisher - config: - project: ${projectID} - topic: bapNetworkReciever - router: - id: router - config: - routingConfigPath: /mnt/gcs/configs/bppTxnCaller-routing.yaml - middleware: - - id: reqpreprocessor - config: - contextKeys: transaction_id,message_id - role: bpp - steps: - - validateSchema - - addRoute - - sign diff --git a/config/onix/adapter.yaml b/config/onix/adapter.yaml index 4349e57..90dc3c3 100644 --- a/config/onix/adapter.yaml +++ b/config/onix/adapter.yaml @@ -1,114 +1,113 @@ -appName: "onix-ev-charging" -log: - level: debug - destinations: - - type: stdout - contextKeys: - - transaction_id - - message_id - - subscriber_id - - module_id - - parent_id - - -# OpenTelemetry (OTLP) - metrics and traces sent to OTEL collector, then to Loki/backend -plugins: - otelsetup: - id: otelsetup - config: - serviceName: "onix-ev-charging-bap" - serviceVersion: "1.0.0" - environment: "development" - domain: "ev_charging" - otlpEndpoint: "otel-collector:4317" - enableMetrics: "true" - networkMetricsGranularity: "2min" - networkMetricsFrequency: "4min" - enableTracing: "true" - enableLogs: "true" - timeInterval: "5" - auditFieldsConfig: "/app/config/audit-fields.yaml" - - -# this is the port for the bap plugin where bap app can dump the requests to the plugin http: - port: 8001 + port: 8080 timeout: read: 30 write: 30 idle: 30 - pluginManager: root: /app/plugins - + remoteRoot: /mnt/gcs/plugins/plugins_bundle.zip modules: - # BAP Receiver - Receives callbacks from CDS (Phase 1) and BPPs (Phase 2+) - # Phase 1: Receives on_search from CDS with aggregated catalog - # Phase 2+: Receives callbacks from BPPs (on_select, on_init, on_confirm, etc.) - - name: bapTxnReceiver - path: /bap/receiver/ + - name: bapTxnReciever + path: /bap/reciever/ handler: type: std role: bap - subscriberId: ev-charging.sandbox1.com httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 - idleConnTimeout: 300s - responseHeaderTimeout: 5s - plugins: registry: id: registry config: - url: http://mock-registry:3030 + url: http://localhost:8080/reg retry_max: 3 retry_wait_min: 100ms retry_wait_max: 500ms keyManager: - id: simplekeymanager + id: secretskeymanager config: - networkParticipant: example-bap.com - keyId: bap-key-1 - signingPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - signingPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= - encrPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - encrPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= + projectID: ${projectID} cache: - id: cache + id: redis config: - addr: redis-bap:6379 + addr: 10.81.192.4:6379 schemaValidator: - id: schemav2validator + id: schemavalidator config: - type: url - location: https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/api/beckn.yaml - cacheTTL: "3600" + schemaDir: /mnt/gcs/configs/schemas signValidator: id: signvalidator + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever router: id: router config: - routingConfig: /app/config/bapTxnReciever-routing.yaml + routingConfigPath: /mnt/gcs/configs/bapTxnReciever-routing.yaml middleware: - id: reqpreprocessor config: - contextKeys: transaction_id,message_id,parent_id + contextKeys: transaction_id,message_id role: bap steps: - validateSign - addRoute - validateSchema - - # BAP Caller - Entry point for all requests from BAP - # Phase 1: Routes search to external CDS for aggregation - # Phase 2+: Routes other requests directly to BPP (bypasses CDS) - # Uses bpp_uri from context for dynamic routing in Phase 2+ - name: bapTxnCaller path: /bap/caller/ handler: type: std role: bap - subscriberId: ev-charging.sandbox1.com + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 192.168.1.1:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signer: + id: signer + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bapTxnCaller-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bap + steps: + - validateSchema + - addRoute + - sign + - name: bppTxnReciever + path: /bpp/reciever/ + handler: + type: std + role: bpp + subscriberId: bpp1 httpClientConfig: maxIdleConns: 1000 maxIdleConnsPerHost: 200 @@ -118,41 +117,88 @@ modules: registry: id: registry config: - url: http://mock-registry:3030 + url: http://localhost:8080/reg retry_max: 3 retry_wait_min: 100ms retry_wait_max: 500ms keyManager: - id: simplekeymanager + id: secretskeymanager config: - networkParticipant: example-bap.com - keyId: bap-key-1 - signingPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - signingPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= - encrPrivateKey: xnKF3BIg3Ei+ZEvxBtK0Mm4GRG1Mr0+K9IrxT6CnHEE= - encrPublicKey: MKA6fln8vmU2Qn80Y7dLzagpaPNqQWOlvGglMo5s0IU= + projectID: ${projectID} cache: - id: cache + id: redis config: - addr: redis-bap:6379 + addr: 192.168.1.1:6379 schemaValidator: - id: schemav2validator + id: schemavalidator config: - type: url - location: https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/main/api/beckn.yaml - cacheTTL: "3600" + schemaDir: /mnt/gcs/configs/schemas + signValidator: + id: signvalidator + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever router: id: router config: - routingConfig: /app/config/bapTxnCaller-routing.yaml - signer: - id: signer + routingConfigPath: /mnt/gcs/configs/bppTxnReciever-routing.yaml middleware: - id: reqpreprocessor config: - contextKeys: transaction_id,message_id,parent_id - role: bap - + contextKeys: transaction_id,message_id + role: bpp + steps: + - validateSign + - addRoute + - validateSchema + - name: bppTxnCaller + path: /bpp/caller/ + handler: + type: std + role: bpp + httpClientConfig: + maxIdleConns: 1000 + maxIdleConnsPerHost: 200 + idleConnTimeout: 300s + responseHeaderTimeout: 5s + plugins: + registry: + id: registry + config: + url: http://localhost:8080/reg + retry_max: 3 + retry_wait_min: 100ms + retry_wait_max: 500ms + keyManager: + id: secretskeymanager + config: + projectID: ${projectID} + cache: + id: redis + config: + addr: 192.168.1.1:6379 + schemaValidator: + id: schemavalidator + config: + schemaDir: /mnt/gcs/configs/schemas + signer: + id: signer + publisher: + id: publisher + config: + project: ${projectID} + topic: bapNetworkReciever + router: + id: router + config: + routingConfigPath: /mnt/gcs/configs/bppTxnCaller-routing.yaml + middleware: + - id: reqpreprocessor + config: + contextKeys: transaction_id,message_id + role: bpp steps: - validateSchema - addRoute diff --git a/config/onix/audit-fields.yaml b/config/onix/audit-fields.yaml deleted file mode 100644 index 3e332a2..0000000 --- a/config/onix/audit-fields.yaml +++ /dev/null @@ -1,24 +0,0 @@ -auditRules: - default: - - context.transaction_id - - context.message_id - - context.action - - context.domain - - context.bap_id - - context.bpp_id - - search: - - context.transaction_id - - context.message_id - - context.action - - context.timestamp - - message.intent - - select: - - context.transaction_id - - context.message_id - - context.action - - context.timestamp - - message.order.beckn:buyer.beckn:id - - message.order.beckn:orderItems.beckn:acceptedOffer.beckn:id - diff --git a/config/onix/bapTxnReciever-routing.yaml b/config/onix/bapTxnReciever-routing.yaml index dfdaa81..b1d5a44 100644 --- a/config/onix/bapTxnReciever-routing.yaml +++ b/config/onix/bapTxnReciever-routing.yaml @@ -1,47 +1,25 @@ -# ONIX BAP Receiver Routing Configuration - -# Supports Phase 1 (Discover Aggregation) and Phase 2+ (Direct BPP Callbacks) - - - -# Phase 1: Discover Aggregation - -# Phase 2+: Other Callbacks (Direct from BPPs to BAP, NO CDS involvement) - -# These routes use bap_uri from context to route callbacks back to originating BAP - - - routingRules: - - # Phase 1: on_discover callback to BAP (routed to mock-bap for testing) - - domain: ev_charging_network - version: "1.0.0" - targetType: url + - domain: "ONDC:TRV10" + version: "2.0.0" + routingType: "bpp" target: - url: http://mock-bap:9001 - excludeAction: false + url: "https://gateway.example.com" endpoints: - - on_discover - - - - - # Phase 2+: Other callbacks to BAP (routed to mock-bap for testing) - - - domain: ev_charging_network - version: "1.0.0" - targetType: url - target: - url: http://mock-bap:9001 - excludeAction: false + - search + - domain: "ONDC:TRV10" + version: "2.0.0" + routingType: "bpp" endpoints: - - on_select - - on_init - - on_confirm - - on_status - - on_track - - on_cancel - - on_update - - on_rating - - on_support + - select + - init + - confirm + - status + - cancel + - domain: "ONDC:TRV12" + version: "2.0.0" + routingType: "bpp" + endpoints: + - select + - init + - confirm + - status \ No newline at end of file diff --git a/config/onix/bppTxnCaller-routing.yaml b/config/onix/bppTxnCaller-routing.yaml index 0d9a670..ca4a478 100644 --- a/config/onix/bppTxnCaller-routing.yaml +++ b/config/onix/bppTxnCaller-routing.yaml @@ -1,23 +1,20 @@ routingRules: - domain: "ONDC:TRV10" version: "2.0.0" - routingType: "bap" + routingType: "url" + target: + url: "https://services-backend/trv/v1" endpoints: - - on_search - on_select - on_init - on_confirm - on_status - on_update - on_cancel - - domain: "ONDC:TRV11" + - domain: "ONDC:TRV10" version: "2.0.0" - routingType: "bap" + routingType: "msgq" + target: + topic_id: "trv_topic_id1" endpoints: - - on_search - - on_select - - on_init - - on_confirm - - on_status - - on_update - - on_cancel \ No newline at end of file + - on_search \ No newline at end of file diff --git a/core/module/handler/http_metric.go b/core/module/handler/http_metric.go index f52fa2f..9104978 100644 --- a/core/module/handler/http_metric.go +++ b/core/module/handler/http_metric.go @@ -17,7 +17,7 @@ type HTTPMetrics struct { } var ( - httlMetricsInstance *HTTPMetrics + httpMetricsInstance *HTTPMetrics httpMetricsOnce sync.Once httpMetricsErr error ) @@ -31,7 +31,7 @@ func newHTTPMetrics() (*HTTPMetrics, error) { if m.HttpRequestCount, err = meter.Int64Counter( "onix_http_request_count", - metric.WithDescription("Total HTTP requests by status, route, method, role and calle "), + metric.WithDescription("Total HTTP requests by status, route, method, role and caller"), metric.WithUnit("1"), ); err != nil { return nil, fmt.Errorf("onix_http_request_count: %w", err) @@ -42,9 +42,9 @@ func newHTTPMetrics() (*HTTPMetrics, error) { func GetHTTPMetrics(ctx context.Context) (*HTTPMetrics, error) { httpMetricsOnce.Do(func() { - httlMetricsInstance, httpMetricsErr = newHTTPMetrics() + httpMetricsInstance, httpMetricsErr = newHTTPMetrics() }) - return httlMetricsInstance, httpMetricsErr + return httpMetricsInstance, httpMetricsErr } // StatusClass returns the HTTP status class string (e.g. 200 -> "2xx"). diff --git a/core/module/handler/stdHandler.go b/core/module/handler/stdHandler.go index 6c2831a..39bb6f6 100644 --- a/core/module/handler/stdHandler.go +++ b/core/module/handler/stdHandler.go @@ -124,7 +124,7 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { selfID := h.SubscriberID remoteID := "" - if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok { + if v, ok := r.Context().Value(model.ContextKeyRemoteID).(string); ok { remoteID = v } var senderID, receiverID string @@ -176,7 +176,7 @@ func (h *stdHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // Restore request body before forwarding or publishing. r.Body = io.NopCloser(bytes.NewReader(stepCtx.Body)) if stepCtx.Route == nil { - response.SendAck(w) + response.SendAck(wrapped) return } @@ -388,7 +388,7 @@ func (h *stdHandler) initSteps(ctx context.Context, mgr PluginManager, cfg *Conf func setBecknAttr(span trace.Span, r *http.Request, h *stdHandler) { selfID := h.SubscriberID remoteID := "" - if v, ok := r.Context().Value(model.ContextKeyCallerID).(string); ok { + if v, ok := r.Context().Value(model.ContextKeyRemoteID).(string); ok { remoteID = v } diff --git a/core/module/handler/step_instrumentor.go b/core/module/handler/step_instrumentor.go index 8b1787f..15eade8 100644 --- a/core/module/handler/step_instrumentor.go +++ b/core/module/handler/step_instrumentor.go @@ -100,5 +100,6 @@ func (is *InstrumentedStep) Run(ctx *model.StepContext) error { if stepCtx.Route != nil { ctx.Route = stepCtx.Route } + ctx.WithContext(stepCtx.Context) return err } diff --git a/install/network-observability/docker-compose.yml b/install/network-observability/docker-compose.yml new file mode 100644 index 0000000..0c63e1b --- /dev/null +++ b/install/network-observability/docker-compose.yml @@ -0,0 +1,264 @@ +# Network observability + BAP/BPP adapters + BPP sandbox +# Builds Onix adapter once; BAP uses config/local-beckn-one-bap.yaml, BPP uses config/local-beckn-one-bpp.yaml. +# No mock-registry: BAP/BPP use real registry (api.testnet.beckn.one) from config. +# Run from repo root: docker compose -f install/network-observability/docker-compose.yml up -d +# UIs: Grafana http://localhost:3000 | Jaeger http://localhost:16686 | BAP http://localhost:8081 | BPP http://localhost:8082 + +services: + redis: + image: redis:alpine + pull_policy: always + container_name: redis-onix + ports: + - "6379:6379" + command: redis-server --requirepass your-redis-password + networks: + beckn_network: + aliases: + - redis + healthcheck: + test: ["CMD", "redis-cli", "-a", "your-redis-password", "ping"] + interval: 5s + timeout: 3s + retries: 5 + restart: unless-stopped + + onix-bap: + build: + context: ../.. + dockerfile: Dockerfile.adapter-with-plugins + image: onix-adapter:local + container_name: onix-bap + ports: + - "8081:8081" + volumes: + - ../../config:/app/config:ro + - ../../schemas:/app/schemas:ro + environment: + - CONFIG_FILE=/app/config/local-beckn-one-bap.yaml + - REDIS_PASSWORD=your-redis-password + - OTEL_EXPORTER_OTLP_INSECURE=true + - OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector-bap:4317 + command: ["./server", "--config=/app/config/local-beckn-one-bap.yaml"] + networks: + - beckn_network + - observability + restart: unless-stopped + depends_on: + redis: + condition: service_healthy + otel-collector-bap: + condition: service_started + + onix-bpp: + image: onix-adapter:local + container_name: onix-bpp + ports: + - "8082:8082" + volumes: + - ../../config:/app/config:ro + - ../../schemas:/app/schemas:ro + environment: + - CONFIG_FILE=/app/config/local-beckn-one-bpp.yaml + - REDIS_PASSWORD=your-redis-password + - OTEL_EXPORTER_OTLP_INSECURE=true + - OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector-bpp:4317 + command: ["./server", "--config=/app/config/local-beckn-one-bpp.yaml"] + networks: + - beckn_network + - observability + restart: unless-stopped + depends_on: + redis: + condition: service_healthy + otel-collector-bpp: + condition: service_started + + otel-collector-bap: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-collector-bap + command: ["--config=/etc/otel/config.yaml"] + volumes: + - ./otel-collector-bap/config.yaml:/etc/otel/config.yaml:ro + ports: + - "4317:4317" + - "4318:4318" + - "8889:8889" + networks: + - observability + - beckn_network + restart: unless-stopped + depends_on: + - otel-collector-network + + otel-collector-bpp: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-collector-bpp + command: ["--config=/etc/otel/config.yaml"] + volumes: + - ./otel-collector-bpp/config.yaml:/etc/otel/config.yaml:ro + ports: + - "4321:4317" + - "4322:4318" + - "8891:8891" + networks: + - observability + - beckn_network + restart: unless-stopped + depends_on: + - otel-collector-network + + otel-collector-network: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-collector-network + command: ["--config=/etc/otel/config.yaml"] + volumes: + - ./otel-collector-network/config.yaml:/etc/otel/config.yaml:ro + ports: + - "4319:4317" + - "4320:4318" + - "8890:8890" + networks: + - observability + restart: unless-stopped + + zipkin: + image: openzipkin/zipkin:latest + container_name: zipkin + ports: + - "9411:9411" + networks: + - observability + restart: unless-stopped + + loki: + image: grafana/loki:latest + container_name: loki + command: -config.file=/etc/loki/loki-config.yml + volumes: + - ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro + - loki_data:/loki + ports: + - "3100:3100" + networks: + - observability + restart: unless-stopped + + prometheus: + image: prom/prometheus:latest + container_name: prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-lifecycle + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus_data:/prometheus + ports: + - "9090:9090" + networks: + - observability + restart: unless-stopped + depends_on: + - otel-collector-bap + - otel-collector-bpp + + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + environment: + - COLLECTOR_OTLP_ENABLED=true + ports: + - "16686:16686" + networks: + - observability + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grafana + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + - GF_SERVER_ROOT_URL=http://localhost:3000 + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + ports: + - "3000:3000" + networks: + - observability + restart: unless-stopped + depends_on: + - prometheus + - jaeger + - zipkin + - loki + + bpp-client: + image: fidedocker/protocol-server + container_name: bpp-client + platform: linux/amd64 + networks: + - beckn_network + ports: + - "6001:6001" + restart: unless-stopped + volumes: + - bpp_client_config_volume:/usr/src/app/config + - bpp_client_schemas_volume:/usr/src/app/schemas + - bpp_client_logs_volume:/usr/src/app/logs + + bpp-network: + image: fidedocker/protocol-server + container_name: bpp-network + platform: linux/amd64 + networks: + - beckn_network + ports: + - "6002:6002" + restart: unless-stopped + volumes: + - bpp_network_config_volume:/usr/src/app/config + - bpp_network_schemas_volume:/usr/src/app/schemas + - bpp_network_logs_volume:/usr/src/app/logs + + sandbox-api: + image: fidedocker/sandbox-api + container_name: sandbox-api + platform: linux/amd64 + networks: + - beckn_network + ports: + - "4010:4000" + restart: unless-stopped + environment: + - PORT=4000 + - WEBHOOK_URL=http://host.docker.internal:3001/webhook + +networks: + observability: + driver: bridge + beckn_network: + name: beckn_network + driver: bridge + +volumes: + prometheus_data: + grafana_data: + loki_data: + bpp_client_config_volume: + name: bpp_client_config_volume + external: true + bpp_client_schemas_volume: + name: bpp_client_schemas_volume + bpp_client_logs_volume: + name: bpp_client_logs_volume + bpp_network_config_volume: + name: bpp_network_config_volume + external: true + bpp_network_schemas_volume: + name: bpp_network_schemas_volume + bpp_network_logs_volume: + name: bpp_network_logs_volume diff --git a/install/network-observability/grafana/provisioning/dashboards/dashboards.yml b/install/network-observability/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..169c8aa --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,23 @@ +# Dashboard provisioning - load JSON dashboards from the json folder +apiVersion: 1 + +providers: + - name: 'Application' + orgId: 1 + folder: 'Onix / Application' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards/json/application + + - name: 'Network' + orgId: 1 + folder: 'Onix / Network' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards/json/network diff --git a/install/network-observability/grafana/provisioning/dashboards/json/application/metrics-dashboard.json b/install/network-observability/grafana/provisioning/dashboards/json/application/metrics-dashboard.json new file mode 100644 index 0000000..cb97b3b --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/json/application/metrics-dashboard.json @@ -0,0 +1 @@ +{"annotations":{"list":[]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":1,"id":null,"links":[],"liveNow":false,"panels":[{"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":200,"title":"Step","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto"},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":1},"id":1,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"histogram_quantile(0.99, sum(rate(onix_onix_step_execution_duration_seconds_bucket[5m])) by (le, module, role, step)) or histogram_quantile(0.99, sum(rate(onix_step_execution_duration_seconds_bucket[5m])) by (le, module, role, step))","legendFormat":"{{module}} {{role}} {{step}}","refId":"A"}],"title":"Step execution duration (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":1},"id":2,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate(onix_onix_step_executions_total[5m])) by (module, role, step) or sum(rate(onix_step_executions_total[5m])) by (module, role, step)","legendFormat":"{{module}} {{role}} {{step}}","refId":"A"}],"title":"Step executions (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":9},"id":3,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate(onix_onix_step_errors_total[5m])) by (module, role, step) or sum(rate(onix_step_errors_total[5m])) by (module, role, step)","legendFormat":"{{module}} {{role}} {{step}}","refId":"A"}],"title":"Step errors (rate)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":17},"id":201,"title":"Plugin","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":18},"id":4,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_plugin_execution_duration_seconds_bucket[5m]) or rate(onix_plugin_execution_duration_seconds_bucket[5m])","legendFormat":"{{le}}","refId":"A"}],"title":"Plugin execution duration (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":18},"id":7,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_plugin_errors_total[5m]) or rate(onix_plugin_errors_total[5m])","legendFormat":"plugin errors/s","refId":"A"}],"title":"Plugin errors (rate)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":202,"title":"Handler","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":27},"id":5,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_beckn_signature_validations_total[5m]) or rate(onix_onix_beckn_signature_validations_total[5m])","legendFormat":"signature validations/s","refId":"A"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate(onix_beckn_schema_validations_total[5m])) by (schema_version, status) or sum(rate(onix_onix_beckn_schema_validations_total[5m])) by (schema_version, status)","legendFormat":"{{schema_version}} {{status}}","refId":"B"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_routing_decisions_total[5m]) or rate(onix_routing_decisions_total[5m])","legendFormat":"routing decisions/s","refId":"C"}],"title":"Handler (validations & routing)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":35},"id":203,"title":"Cache","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":36},"id":6,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_cache_operations_total[5m]) or rate(onix_cache_operations_total[5m])","legendFormat":"operations/s","refId":"A"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_cache_hits_total[5m]) or rate(onix_cache_hits_total[5m])","legendFormat":"hits/s","refId":"B"},{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"rate(onix_onix_cache_misses_total[5m]) or rate(onix_cache_misses_total[5m])","legendFormat":"misses/s","refId":"C"}],"title":"Cache (operations, hits, misses)","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":44},"id":100,"title":"HTTP","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":45},"id":101,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m]))","legendFormat":"Total","refId":"A"}],"title":"HTTP request rate (total)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":45},"id":102,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (http_status_code)","legendFormat":"{{http_status_code}}","refId":"A"}],"title":"HTTP request rate by status class","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":53},"id":103,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"HTTP request rate by path","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":53},"id":105,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (caller)","legendFormat":"{{caller}}","refId":"A"}],"title":"HTTP request rate by caller","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":61},"id":106,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\",http_status_code=\"2xx\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"2xx request rate by path","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":61},"id":107,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\",http_status_code=\"4xx\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"4xx request rate by path","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisPlacement":"auto"},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":69},"id":108,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"sum(rate({__name__=~\"onix.*http_request_count(_total)?\",http_status_code=\"5xx\"}[5m])) by (action)","legendFormat":"{{action}}","refId":"A"}],"title":"5xx request rate by path","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":77},"id":204,"title":"Go runtime","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":78},"id":9,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_used or onix_onix_go_memory_used or onix_go_memory_used_bytes or onix_onix_go_memory_used_bytes","legendFormat":"{{go_memory_type}}","refId":"A"}],"title":"Go runtime Memory used","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":78},"id":10,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_limit or onix_onix_go_memory_limit","legendFormat":"memory limit","refId":"A"}],"title":"Go runtime – Memory limit","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":86},"id":11,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_allocated or onix_onix_go_memory_allocated or rate(onix_go_memory_allocated_bytes_total[5m]) or rate(onix_onix_go_memory_allocated_bytes_total[5m])","legendFormat":"allocated","refId":"A"}],"title":"Go runtime – Memory allocated (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":86},"id":12,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_allocations or onix_onix_go_memory_allocations or rate(onix_go_memory_allocations_total[5m]) or rate(onix_onix_go_memory_allocations_total[5m])","legendFormat":"allocations/s","refId":"A"}],"title":"Go runtime – Memory allocations (rate)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"bytes"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":94},"id":13,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_memory_gc_goal or onix_onix_go_memory_gc_goal or onix_go_memory_gc_goal_bytes or onix_onix_go_memory_gc_goal_bytes","legendFormat":"GC goal","refId":"A"}],"title":"Go runtime – GC goal","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":94},"id":14,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_goroutine_count or onix_onix_go_goroutine_count","legendFormat":"goroutines","refId":"A"}],"title":"Go runtime – Goroutine count","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":102},"id":15,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_processor_limit or onix_onix_go_processor_limit","legendFormat":"GOMAXPROCS","refId":"A"}],"title":"Go runtime – Processor limit","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto"},"unit":"s"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":102},"id":16,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"histogram_quantile(0.99, sum(rate(onix_go_schedule_duration_bucket[5m])) by (le)) or histogram_quantile(0.99, sum(rate(onix_onix_go_schedule_duration_bucket[5m])) by (le))","legendFormat":"p99","refId":"A"}],"title":"Go runtime – Schedule duration (p99)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"percent"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":110},"id":17,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_go_config_gogc or onix_onix_go_config_gogc or onix_go_config_gogc_percent or onix_onix_go_config_gogc_percent","legendFormat":"GOGC","refId":"A"}],"title":"Go runtime – GOGC","type":"timeseries"},{"gridPos":{"h":1,"w":24,"x":0,"y":118},"id":205,"title":"Redis ","type":"row"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":119},"id":301,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_max or onix_onix_db_client_connections_max","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connections max","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":119},"id":302,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_idle_max or onix_onix_db_client_connections_idle_max","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connections idle max","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":127},"id":303,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_idle_min or onix_onix_db_client_connections_idle_min","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connections idle min","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":127},"id":304,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_usage or onix_onix_db_client_connections_usage","legendFormat":"{{pool_name}} {{state}}","refId":"A"}],"title":"Redis client connections usage by state","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":135},"id":305,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_waits or onix_onix_db_client_connections_waits","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection waits","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"ns"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":135},"id":306,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_waits_duration or onix_onix_db_client_connections_waits_duration","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection waits duration","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":143},"id":307,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_timeouts or onix_onix_db_client_connections_timeouts","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection timeouts","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":143},"id":308,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_hits or onix_onix_db_client_connections_hits","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection pool hits","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"prometheus"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"unit":"short"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":151},"id":309,"options":{"legend":{"displayMode":"list","placement":"bottom"}},"targets":[{"datasource":{"type":"prometheus","uid":"prometheus"},"expr":"onix_db_client_connections_misses or onix_onix_db_client_connections_misses","legendFormat":"{{pool_name}}","refId":"A"}],"title":"Redis client connection pool misses","type":"timeseries"}],"refresh":"10s","schemaVersion":38,"style":"dark","tags":["onix","metrics"],"templating":{"list":[]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Onix Metrics","uid":"onix-metrics","version":1,"weekStart":""} diff --git a/install/network-observability/grafana/provisioning/dashboards/json/application/traces-dashboard.json b/install/network-observability/grafana/provisioning/dashboards/json/application/traces-dashboard.json new file mode 100644 index 0000000..a858a0d --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/json/application/traces-dashboard.json @@ -0,0 +1 @@ +{"annotations":{"list":[]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"id":null,"links":[],"liveNow":false,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":8,"panels":[],"title":"Search by Trace ID","type":"row"},{"datasource":{"type":"jaeger","uid":"jaeger"},"gridPos":{"h":10,"w":24,"x":0,"y":1},"id":11,"options":{"dedupStrategy":"none","enableLogDetails":true,"showCommonLabels":false,"showProcess":false,"sortOrder":"Descend"},"targets":[{"datasource":{"type":"jaeger","uid":"jaeger"},"queryType":"traceId","query":"${traceID}","refId":"A"}],"title":"Trace by ID","type":"traces"},{"gridPos":{"h":4,"w":24,"x":0,"y":11},"id":9,"options":{"content":"**Search traces**: Use **Jaeger UI** at [http://localhost:16686](http://localhost:16686). Select service **onix-ev-charging-bap** and click **Find Traces**. Alternatively, paste a Trace ID in the panel above.","mode":"markdown"},"title":"Search traces (Jaeger UI)","type":"text"},{"datasource":{"type":"jaeger","uid":"jaeger"},"gridPos":{"h":4,"w":24,"x":0,"y":15},"id":10,"options":{"content":"**Trace ID**: Use the **full 32-character hex** from adapter logs (e.g. `7a385394ee77d4451a1c655c236422fc`). Paste above and refresh. **If you see \"No data\"**: (1) Wait 10–15 s after the request — the adapter batches spans before export. (2) Ensure time range (top right) includes the request time. (3) Use **Jaeger UI** at [http://localhost:16686](http://localhost:16686) (service: **onix-ev-charging-bap**) to search; check `otel-collector` logs for export errors if spans are missing.","mode":"markdown"},"title":"How to use","type":"text"}],"refresh":"30s","schemaVersion":38,"style":"dark","tags":["onix","traces","jaeger"],"templating":{"list":[{"current":{"selected":false,"text":"","value":""},"hide":0,"label":"Trace ID (full 32 hex chars)","name":"traceID","options":[{"selected":true,"text":"","value":""}],"query":"","skipUrlSync":false,"type":"textbox"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{},"timezone":"","title":"Onix Traces","uid":"onix-traces","version":1,"weekStart":""} diff --git a/install/network-observability/grafana/provisioning/dashboards/json/network/network-observability-dashboard.json b/install/network-observability/grafana/provisioning/dashboards/json/network/network-observability-dashboard.json new file mode 100644 index 0000000..bcf3edf --- /dev/null +++ b/install/network-observability/grafana/provisioning/dashboards/json/network/network-observability-dashboard.json @@ -0,0 +1,199 @@ +{ + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "Zipkin UI", + "tooltip": "Open Zipkin UI for network API traces", + "type": "link", + "url": "http://localhost:9411" + } + ], + "liveNow": false, + "panels": [ + { + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 1, + "title": "Network Metrics", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, + "id": 2, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (action)", + "legendFormat": "{{action}}", + "refId": "A" + } + ], + "title": "HTTP request rate by action", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, + "id": 3, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (http_status_code)", + "legendFormat": "{{http_status_code}}", + "refId": "A" + } + ], + "title": "HTTP request rate by status", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, + "id": 4, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (role)", + "legendFormat": "{{role}}", + "refId": "A" + } + ], + "title": "HTTP request rate by role", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisPlacement": "auto" }, "unit": "reqps" }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, + "id": 5, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate({__name__=~\"onix.*http_request_count(_total)?\"}[5m])) by (caller)", + "legendFormat": "{{caller}}", + "refId": "A" + } + ], + "title": "HTTP request rate by caller", + "type": "timeseries" + }, + { + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 17 }, + "id": 6, + "title": "Network Logs (Beckn Audit)", + "type": "row" + }, + { + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 12, "w": 24, "x": 0, "y": 18 }, + "id": 7, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descend", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { "type": "loki", "uid": "loki" }, + "expr": "{service_name=~\"onix.*|beckn.*\"}", + "refId": "A" + } + ], + "title": "Beckn audit logs (onix/beckn)", + "type": "logs" + }, + { + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 }, + "id": 71, + "options": { "dedupStrategy": "none", "enableLogDetails": true, "showCommonLabels": true, "showLabels": true, "showTime": true, "sortOrder": "Descend" }, + "targets": [{ "datasource": { "type": "loki", "uid": "loki" }, "expr": "{}", "refId": "A" }], + "title": "All logs (debug: matches any)", + "type": "logs" + }, + { + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, + "id": 8, + "title": "Network Traces", + "type": "row" + }, + { + "datasource": { "type": "zipkin", "uid": "zipkin" }, + "gridPos": { "h": 12, "w": 24, "x": 0, "y": 33 }, + "id": 9, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "showCommonLabels": false, + "showProcess": false, + "sortOrder": "Descend" + }, + "targets": [ + { + "datasource": { "type": "zipkin", "uid": "zipkin" }, + "query": "", + "queryType": "traceqlSearch", + "refId": "A" + } + ], + "title": "Network API spans (Zipkin)", + "type": "traces" + }, + { + "gridPos": { "h": 4, "w": 24, "x": 0, "y": 45 }, + "id": 10, + "options": { + "content": "**Network-level observability**: Beckn API spans, audit logs, and HTTP request metrics from the network pipeline.\n\n**No Loki/Zipkin data?** 1) Restart stack after config changes: `docker compose -f network_observability/docker-compose.yml up -d --force-recreate`. 2) Trigger requests to generate audit logs (EmitAuditLogs runs on each request). 3) Use [Zipkin UI](http://localhost:9411) to search traces. 4) In Grafana Explore (Loki), try `{}` or `{service_name=~\".+\"}` to see all logs.", + "mode": "markdown" + }, + "title": "About", + "type": "text" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": ["onix", "network", "observability"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Network Observability", + "uid": "network-observability", + "version": 1, + "weekStart": "" +} diff --git a/install/network-observability/grafana/provisioning/datasources/datasources.yml b/install/network-observability/grafana/provisioning/datasources/datasources.yml new file mode 100644 index 0000000..ec26b4b --- /dev/null +++ b/install/network-observability/grafana/provisioning/datasources/datasources.yml @@ -0,0 +1,32 @@ +# Grafana datasources - provisioned on startup +apiVersion: 1 + +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + + - name: Jaeger + uid: jaeger + type: jaeger + access: proxy + url: http://jaeger:16686 + editable: false + + - name: Loki + uid: loki + type: loki + access: proxy + url: http://loki:3100 + editable: false + + - name: Zipkin + uid: zipkin + type: zipkin + access: proxy + url: http://zipkin:9411 + editable: false diff --git a/install/network-observability/loki/loki-config.yml b/install/network-observability/loki/loki-config.yml new file mode 100644 index 0000000..fb3fd28 --- /dev/null +++ b/install/network-observability/loki/loki-config.yml @@ -0,0 +1,35 @@ +# Loki config for network-level audit logs (OTLP ingestion) +# OTLP requires allow_structured_metadata for Loki 3.x + +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +limits_config: + allow_structured_metadata: true + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +ruler: + alertmanager_url: http://localhost:9093 diff --git a/install/network-observability/otel-collector-bap/config.yaml b/install/network-observability/otel-collector-bap/config.yaml new file mode 100644 index 0000000..995fac6 --- /dev/null +++ b/install/network-observability/otel-collector-bap/config.yaml @@ -0,0 +1,83 @@ +# OpenTelemetry Collector BAP - receives OTLP from BAP adapter (local-beckn-one-bap.yaml) +# App-level: all signals to Prometheus and Jaeger. Network-level: filtered to otel-collector-network. + +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +processors: + batch: + send_batch_size: 1024 + timeout: 10s + batch/traces: + send_batch_size: 1024 + timeout: 2s + + filter/network_metrics: + error_mode: ignore + metrics: + metric: + - 'name != "onix_http_request_count"' + + filter/network_traces: + error_mode: ignore + traces: + span: + - 'attributes["sender.id"] == nil' + +exporters: + prometheus: + endpoint: "0.0.0.0:8889" + namespace: onix + const_labels: + observability: otel-collector-bap + service_name: beckn-one-bap + + otlp_grpc/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + + otlp_http/collector2: + endpoint: http://otel-collector-network:4318 + compression: gzip + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + zpages: + endpoint: 0.0.0.0:55679 + +service: + extensions: [health_check, zpages] + pipelines: + metrics/app: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + + metrics/network: + receivers: [otlp] + processors: [filter/network_metrics, batch] + exporters: [otlp_http/collector2] + + traces/app: + receivers: [otlp] + processors: [batch/traces] + exporters: [otlp_grpc/jaeger] + + traces/network: + receivers: [otlp] + processors: [filter/network_traces, batch/traces] + exporters: [otlp_http/collector2] + + logs/network: + receivers: [otlp] + processors: [batch] + exporters: [otlp_http/collector2] + + telemetry: + logs: + level: info diff --git a/install/network-observability/otel-collector-bpp/config.yaml b/install/network-observability/otel-collector-bpp/config.yaml new file mode 100644 index 0000000..65b0383 --- /dev/null +++ b/install/network-observability/otel-collector-bpp/config.yaml @@ -0,0 +1,83 @@ +# OpenTelemetry Collector BPP - receives OTLP from BPP adapter (local-beckn-one-bpp.yaml) +# App-level: all signals to Prometheus and Jaeger. Network-level: filtered to otel-collector-network. + +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +processors: + batch: + send_batch_size: 1024 + timeout: 10s + batch/traces: + send_batch_size: 1024 + timeout: 2s + + filter/network_metrics: + error_mode: ignore + metrics: + metric: + - 'name != "onix_http_request_count"' + + filter/network_traces: + error_mode: ignore + traces: + span: + - 'attributes["sender.id"] == nil' + +exporters: + prometheus: + endpoint: "0.0.0.0:8891" + namespace: onix + const_labels: + observability: otel-collector-bpp + service_name: beckn-one-bpp + + otlp_grpc/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + + otlp_http/collector2: + endpoint: http://otel-collector-network:4318 + compression: gzip + +extensions: + health_check: + endpoint: 0.0.0.0:13133 + zpages: + endpoint: 0.0.0.0:55679 + +service: + extensions: [health_check, zpages] + pipelines: + metrics/app: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + + metrics/network: + receivers: [otlp] + processors: [filter/network_metrics, batch] + exporters: [otlp_http/collector2] + + traces/app: + receivers: [otlp] + processors: [batch/traces] + exporters: [otlp_grpc/jaeger] + + traces/network: + receivers: [otlp] + processors: [filter/network_traces, batch/traces] + exporters: [otlp_http/collector2] + + logs/network: + receivers: [otlp] + processors: [batch] + exporters: [otlp_http/collector2] + + telemetry: + logs: + level: info diff --git a/install/network-observability/otel-collector-network/config.yaml b/install/network-observability/otel-collector-network/config.yaml new file mode 100644 index 0000000..0a5ab69 --- /dev/null +++ b/install/network-observability/otel-collector-network/config.yaml @@ -0,0 +1,60 @@ +# Collector 2 - receives network-level OTLP from Collector 1, exports to Loki, Zipkin, Prometheus + +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + grpc: + endpoint: 0.0.0.0:4317 + +processors: + # Map Beckn transaction_id -> trace_id and message_id -> span_id for UI correlation. + # UUID format: remove hyphens for trace_id (32 hex chars); first 16 hex chars for span_id. + transform/beckn_ids: + error_mode: ignore + trace_statements: + - set(span.attributes["_beckn_tx"], span.attributes["transaction_id"]) where span.attributes["transaction_id"] != nil + - replace_pattern(span.attributes["_beckn_tx"], "-", "") where span.attributes["_beckn_tx"] != nil + - set(span.trace_id, TraceID(span.attributes["_beckn_tx"])) where span.attributes["_beckn_tx"] != nil + + + batch: + send_batch_size: 1024 + timeout: 10s + +exporters: + prometheus: + endpoint: "0.0.0.0:8890" + namespace: onix_network + const_labels: + observability: network-level + + zipkin: + endpoint: http://zipkin:9411/api/v2/spans + format: json + + otlphttp/loki: + endpoint: http://loki:3100/otlp + compression: gzip + +service: + pipelines: + metrics: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + + traces: + receivers: [otlp] + processors: [transform/beckn_ids, batch] + exporters: [zipkin] + + logs: + receivers: [otlp] + processors: [batch] + exporters: [otlphttp/loki] + + telemetry: + logs: + level: info diff --git a/install/network-observability/prometheus/prometheus.yml b/install/network-observability/prometheus/prometheus.yml new file mode 100644 index 0000000..773f90a --- /dev/null +++ b/install/network-observability/prometheus/prometheus.yml @@ -0,0 +1,20 @@ +# Prometheus - scrapes metrics from OTEL Collectors (BAP, BPP, network) +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "otel-collector-bap" + static_configs: + - targets: ["otel-collector-bap:8889"] + metrics_path: /metrics + + - job_name: "otel-collector-bpp" + static_configs: + - targets: ["otel-collector-bpp:8891"] + metrics_path: /metrics + + - job_name: "otel-collector-network" + static_configs: + - targets: ["otel-collector-network:8890"] + metrics_path: /metrics diff --git a/pkg/model/model.go b/pkg/model/model.go index ac3dc5b..c75c021 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -57,8 +57,8 @@ const ( // ContextKeyParentID is the context key for storing and retrieving the parent ID from a request context ContextKeyParentID ContextKey = "parent_id" - // ContextKeyCallerID is the context key for the caller who is calling the bap/bpp - ContextKeyCallerID ContextKey = "caller_id" + // ContextKeyRemoteID is the context key for the caller who is calling the bap/bpp + ContextKeyRemoteID ContextKey = "caller_id" ) var contextKeys = map[string]ContextKey{ @@ -67,7 +67,7 @@ var contextKeys = map[string]ContextKey{ "subscriber_id": ContextKeySubscriberID, "module_id": ContextKeyModuleID, "parent_id": ContextKeyParentID, - "caller_id": ContextKeyCallerID, + "caller_id": ContextKeyRemoteID, } // ParseContextKey converts a string into a valid ContextKey. diff --git a/pkg/plugin/implementation/otelsetup/cmd/plugin.go b/pkg/plugin/implementation/otelsetup/cmd/plugin.go index a0407d7..b8a92a4 100644 --- a/pkg/plugin/implementation/otelsetup/cmd/plugin.go +++ b/pkg/plugin/implementation/otelsetup/cmd/plugin.go @@ -41,9 +41,18 @@ func (m metricsProvider) New(ctx context.Context, config map[string]string) (*te if v := ctx.Value(model.ContextKeyParentID); v != nil { parentID := v.(string) p := strings.Split(parentID, ":") - deviceId = p[len(p)-1] - producerType = p[0] - producer = p[1] + if len(p) >= 3 { + producerType = p[0] + producer = p[1] + deviceId = p[len(p)-1] + } else if len(p) >= 2 { + producerType = p[0] + producer = p[1] + deviceId = p[1] + } else if len(p) >= 1 { + producerType = p[0] + deviceId = p[0] + } } if deviceId != "" { @@ -97,7 +106,7 @@ func (m metricsProvider) New(ctx context.Context, config map[string]string) (*te } } - //to set network leval matric frequency and granularity + //to set network level matric frequency and granularity if v, ok := config["networkMetricsGranularity"]; ok && v != "" { telemetry.SetNetworkMetricsConfig(v, "") } diff --git a/pkg/plugin/implementation/otelsetup/otelsetup.go b/pkg/plugin/implementation/otelsetup/otelsetup.go index adccc5e..6606437 100644 --- a/pkg/plugin/implementation/otelsetup/otelsetup.go +++ b/pkg/plugin/implementation/otelsetup/otelsetup.go @@ -108,7 +108,7 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) }, nil } - //this will be used by both matric and traces + //this will be used by both metric and traces // to build resource with envelope metadata baseAttrs := []attribute.KeyValue{ @@ -123,10 +123,10 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) resMetric, err := resource.New(ctx, resource.WithAttributes(buildAtts(baseAttrs, "METRIC")...)) if err != nil { - return nil, fmt.Errorf("failed to create telemetry resource for matric: %w", err) + return nil, fmt.Errorf("failed to create telemetry resource for metric: %w", err) } - //OTLP matric + //OTLP metric var meterProvider *metric.MeterProvider if cfg.EnableMetrics { metricExpoter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(cfg.OtlpEndpoint), @@ -139,7 +139,7 @@ func (Setup) New(ctx context.Context, cfg *Config) (*telemetry.Provider, error) otel.SetMeterProvider(meterProvider) log.Infof(ctx, "OpenTelemetry metrics initialized for service=%s version=%s env=%s (OTLP endpoint=%s)", cfg.ServiceName, cfg.ServiceVersion, cfg.Environment, cfg.OtlpEndpoint) - // for the go runtime matrics + // for the go runtime metrics if err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(runtime.DefaultMinimumReadMemStatsInterval)); err != nil { log.Warnf(ctx, "Failed to start Go runtime instrumentation: %v", err) } diff --git a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go index 97d74a4..23f1461 100644 --- a/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go +++ b/pkg/plugin/implementation/reqpreprocessor/reqpreprocessor.go @@ -75,8 +75,8 @@ func NewPreProcessor(cfg *Config) (func(http.Handler) http.Handler, error) { } if callerID != nil { - log.Debugf(ctx, "adding callerID to request:%s, %v", model.ContextKeyCallerID, callerID) - ctx = context.WithValue(ctx, model.ContextKeyCallerID, callerID) + log.Debugf(ctx, "adding callerID to request:%s, %v", model.ContextKeyRemoteID, callerID) + ctx = context.WithValue(ctx, model.ContextKeyRemoteID, callerID) } for _, key := range cfg.ContextKeys { ctxKey, _ := model.ParseContextKey(key) diff --git a/pkg/telemetry/audit_fields.go b/pkg/telemetry/audit_fields.go index d5e5635..be55789 100644 --- a/pkg/telemetry/audit_fields.go +++ b/pkg/telemetry/audit_fields.go @@ -108,41 +108,6 @@ func getFieldForAction(ctx context.Context, action string) []string { return auditRules["default"] } -//func getByPath(root map[string]interface{}, path string) (interface{}, bool) { -// -// parts := strings.Split(path, ".") -// var cur interface{} = root -// -// for _, part := range parts { -// m, ok := cur.(map[string]interface{}) -// if !ok { -// return nil, false -// } -// v, ok := m[part] -// if !ok { -// return nil, false -// } -// cur = v -// } -// return cur, true -//} -// -//func setByPath(root map[string]interface{}, path string, value interface{}) { -// parts := strings.Split(path, ".") -// cur := root -// -// for i := 0; i < len(parts)-1; i++ { -// k := parts[i] -// next, ok := cur[k].(map[string]interface{}) -// if !ok { -// next = map[string]interface{}{} -// cur[k] = next -// } -// cur = next -// } -// cur[parts[len(parts)-1]] = value -//} - func projectPath(cur interface{}, parts []string) (interface{}, bool) { if len(parts) == 0 { return cur, true diff --git a/pkg/telemetry/pluginMetrics.go b/pkg/telemetry/pluginMetrics.go index a26601b..4ef2912 100644 --- a/pkg/telemetry/pluginMetrics.go +++ b/pkg/telemetry/pluginMetrics.go @@ -50,7 +50,7 @@ var ( AttrMetricGranularity = attribute.Key("metric.granularity") AttrMetricFrequency = attribute.Key("metric.frequency") AttrObservedTimeUnixNano = attribute.Key("observedTimeUnixNano") - AttrMatricLabels = attribute.Key("metric.labels") + AttrMetricLabels = attribute.Key("metric.labels") AttrSenderID = attribute.Key("sender.id") AttrRecipientID = attribute.Key("recipient.id") )