From 8642ed70018b835c24948c6b19fad66f2019381c Mon Sep 17 00:00:00 2001 From: Eric FELIXINE Date: Tue, 5 May 2026 13:49:00 -0400 Subject: [PATCH] feat: Add Redpanda Console, Pulsar Distribution Service, and Grafana Dashboards - Add Redpanda Console service (port 28080, Traefik integration) - Add Pulsar Distribution Service (Pulsar -> Brokers) - Create Grafana dashboards for Redpanda, Pulsar, and Smart City Ingestion - Configure Prometheus targets for Pulsar and Redpanda metrics - Fix FROST URL in distribution service - Create session resume for 2026-05-05 --- .../dashboards/pulsar-metrics.json | 118 +++++++++++ .../dashboards/redpanda-metrics.json | 102 ++++++++++ .../dashboards/smart-city-ingeston.json | 103 ++++++++++ prometheus.yml | 14 ++ pulsar/application.properties | 10 + pulsar/docker-compose-simple.yml | 24 +++ pulsar/docker-compose.manager.yml | 45 +++++ pulsar/docker-compose.yml | 76 ++++++- pulsar/supervisord-custom.conf | 18 ++ pulsar/supervisord-fixed.conf | 18 ++ redpanda/console.yaml | 16 ++ redpanda/docker-compose.yml | 30 +++ session_resume_2026-05-05.md | 189 +++++++++++++----- 13 files changed, 710 insertions(+), 53 deletions(-) create mode 100644 grafana/provisioning/dashboards/pulsar-metrics.json create mode 100644 grafana/provisioning/dashboards/redpanda-metrics.json create mode 100644 grafana/provisioning/dashboards/smart-city-ingeston.json create mode 100644 pulsar/application.properties create mode 100644 pulsar/docker-compose-simple.yml create mode 100644 pulsar/docker-compose.manager.yml create mode 100644 pulsar/supervisord-custom.conf create mode 100644 pulsar/supervisord-fixed.conf create mode 100644 redpanda/console.yaml diff --git a/grafana/provisioning/dashboards/pulsar-metrics.json b/grafana/provisioning/dashboards/pulsar-metrics.json new file mode 100644 index 00000000..9356dfa3 --- /dev/null +++ b/grafana/provisioning/dashboards/pulsar-metrics.json @@ -0,0 +1,118 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "title": "Pulsar Overview", + "type": "row", + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0} + }, + { + "title": "JVM Memory Used", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "jvm_memory_used_bytes{area=\"heap\"}", + "legendFormat": "Heap Memory" + }, + { + "expr": "jvm_memory_used_bytes{area=\"nonheap\"}", + "legendFormat": "Non-Heap Memory" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 1} + }, + { + "title": "JVM GC Collection Seconds", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "rate(jvm_gc_collection_seconds_sum[1m])", + "legendFormat": "{{gc}} GC Rate" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1} + }, + { + "title": "Pulsar Message Rates", + "type": "row", + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 9} + }, + { + "title": "Messages In/Sec", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "rate(pulsar_in_bytes_total[1m])", + "legendFormat": "In Bytes/sec" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 10} + }, + { + "title": "Messages Out/Sec", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "rate(pulsar_out_bytes_total[1m])", + "legendFormat": "Out Bytes/sec" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 10} + }, + { + "title": "Pulsar Topics", + "type": "stat", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(pulsar_topics_count)", + "legendFormat": "Active Topics" + } + ], + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 18} + }, + { + "title": "Subscriptions", + "type": "stat", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(pulsar_subscriptions_count)", + "legendFormat": "Active Subscriptions" + } + ], + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 18} + }, + { + "title": "Producers", + "type": "stat", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "sum(pulsar_producers_count)", + "legendFormat": "Active Producers" + } + ], + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 18} + } + ], + "schemaVersion": 38, + "style": "dark", + "tags": ["pulsar", "smart-city"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "title": "Pulsar Metrics", + "uid": "pulsar-metrics", + "version": 1 +} diff --git a/grafana/provisioning/dashboards/redpanda-metrics.json b/grafana/provisioning/dashboards/redpanda-metrics.json new file mode 100644 index 00000000..c58f76cc --- /dev/null +++ b/grafana/provisioning/dashboards/redpanda-metrics.json @@ -0,0 +1,102 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "title": "Redpanda Overview", + "type": "row", + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0} + }, + { + "title": "Kafka API Requests", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "rate(redpanda_kafka_requests_total[1m])", + "legendFormat": "{{method}} - {{topic}}" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 1} + }, + { + "title": "Under-Replicated Partitions", + "type": "stat", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "redpanda_cluster_under_replicated_partitions", + "legendFormat": "Under-Replicated" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1} + }, + { + "title": "Producer Latency (p99)", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "histogram_quantile(0.99, rate(redpanda_kafka_produce_latency_seconds_bucket[5m]))", + "legendFormat": "p99 Latency" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 9} + }, + { + "title": "Consumer Fetch Latency (p99)", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "histogram_quantile(0.99, rate(redpanda_kafka_fetch_latency_seconds_bucket[5m]))", + "legendFormat": "p99 Latency" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 9} + }, + { + "title": "Redpanda Resource Usage", + "type": "row", + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 17} + }, + { + "title": "Memory Usage", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "redpanda_memory_allocated_bytes", + "legendFormat": "Allocated (bytes)" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 18} + }, + { + "title": "CPU Usage", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "rate(redpanda_cpu_busy_seconds_total[1m])", + "legendFormat": "CPU Busy Rate" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 18} + } + ], + "schemaVersion": 38, + "style": "dark", + "tags": ["redpanda", "kafka", "smart-city"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "title": "Redpanda Metrics", + "uid": "redpanda-metrics", + "version": 1 +} diff --git a/grafana/provisioning/dashboards/smart-city-ingeston.json b/grafana/provisioning/dashboards/smart-city-ingeston.json new file mode 100644 index 00000000..05cc207f --- /dev/null +++ b/grafana/provisioning/dashboards/smart-city-ingeston.json @@ -0,0 +1,103 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "title": "Smart City Data Ingeston", + "type": "row", + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0} + }, + { + "title": "Messages/sec by Type", + "type": "timeseries", + "datasource": {"type": "influxdb", "uid": "InfluxDB-Simulator"}, + "targets": [ + { + "query": "from(bucket: \"iot_data\") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r[\"_measurement\"] == \"sensor_data\") |> group(columns: [\"type\"]) |> count()", + "refId": "A" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 1} + }, + { + "title": "Temperature (Weather)", + "type": "timeseries", + "datasource": {"type": "influxdb", "uid": "InfluxDB-Simulator"}, + "targets": [ + { + "query": "from(bucket: \"iot_data\") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r[\"_measurement\"] == \"sensor_data\" and r[\"type\"] == \"weather\") |> filter(fn: (r) => r[\"_field\"] == \"temperature_celsius\") |> mean()", + "refId": "B" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1} + }, + { + "title": "Air Quality (PM2.5)", + "type": "timeseries", + "datasource": {"type": "influxdb", "uid": "InfluxDB-Simulator"}, + "targets": [ + { + "query": "from(bucket: \"iot_data\") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r[\"_measurement\"] == \"sensor_data\" and r[\"type\"] == \"airquality\") |> filter(fn: (r) => r[\"_field\"] == \"pm25_ugm3\") |> mean()", + "refId": "C" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 9} + }, + { + "title": "Traffic Count", + "type": "timeseries", + "datasource": {"type": "influxdb", "uid": "InfluxDB-Simulator"}, + "targets": [ + { + "query": "from(bucket: \"iot_data\") |> range(start: v.timeRangeStart, stop: v.timeRangeStop) |> filter(fn: (r) => r[\"_measurement\"] == \"sensor_data\" and r[\"type\"] == \"traffic\") |> filter(fn: (r) => r[\"_field\"] == \"vehicle_count\") |> mean()", + "refId": "D" + } + ], + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 9} + }, + { + "title": "Pulsar Message Rates", + "type": "row", + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 17} + }, + { + "title": "Pulsar In/Out Bytes/sec", + "type": "timeseries", + "datasource": {"type": "prometheus", "uid": "prometheus"}, + "targets": [ + { + "expr": "rate(pulsar_in_bytes_total[1m])", + "legendFormat": "In Bytes/sec" + }, + { + "expr": "rate(pulsar_out_bytes_total[1m])", + "legendFormat": "Out Bytes/sec" + } + ], + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 18} + } + ], + "schemaVersion": 38, + "style": "dark", + "tags": ["smart-city", "influxdb", "pulsar"], + "templating": {"list": []}, + "time": {"from": "now-1h", "to": "now"}, + "title": "Smart City - Data Ingeston", + "uid": "smart-city-ingeston", + "version": 1 +} diff --git a/prometheus.yml b/prometheus.yml index 6bca659a..f31952a9 100644 --- a/prometheus.yml +++ b/prometheus.yml @@ -29,3 +29,17 @@ scrape_configs: - targets: ['stellio:8080'] metrics_path: '/metrics' scrape_interval: 10s + + # Redpanda Metrics (Admin API) + - job_name: 'redpanda' + static_configs: + - targets: ['smart-city-redpanda:9644'] + metrics_path: '/metrics' + scrape_interval: 10s + + # Pulsar Metrics (Admin API) + - job_name: 'pulsar' + static_configs: + - targets: ['smart-city-pulsar:8080'] + metrics_path: '/metrics' + scrape_interval: 10s diff --git a/pulsar/application.properties b/pulsar/application.properties new file mode 100644 index 00000000..e7ecbb9c --- /dev/null +++ b/pulsar/application.properties @@ -0,0 +1,10 @@ +server.port=7750 +pulsar.cluster=standalone +pulsar.service-url=pulsar://smart-city-pulsar:6650 +pulsar.web-service-url=http://smart-city-pulsar:8080 +spring.datasource.driver-class-name=herddb.jdbc.Driver +spring.datasource.url=jdbc:herddb:server:localhost:7000?server.start=true&server.base.dir=dbdata +spring.datasource.initialization-mode=never +logging.level.org.apache=INFO +redirect.host=localhost +redirect.port=7750 diff --git a/pulsar/docker-compose-simple.yml b/pulsar/docker-compose-simple.yml new file mode 100644 index 00000000..c69af344 --- /dev/null +++ b/pulsar/docker-compose-simple.yml @@ -0,0 +1,24 @@ +version: '3.8' +services: + pulsar-manager: + image: apachepulsar/pulsar-manager:v0.4.0 + container_name: smart-city-pulsar-manager + restart: unless-stopped + networks: + - traefik-public + - smartcity-shared + ports: + - "7750:7750" + environment: + - SPRING_APPLICATION_JSON={"server":{"port":7750},"pulsar":{"cluster":"standalone","serviceUrl":"pulsar://smart-city-pulsar:6650","webServiceUrl":"http://smart-city-pulsar:8080"},"spring":{"datasource":{"driverClassName":"herddb.jdbc.Driver","url":"jdbc:herddb:server:localhost:7000?server.start=true&server.base.dir=dbdata","initialization-mode":"never"},"logging":{"level":{"org":{"apache":"INFO"}}},"redirect":{"host":"localhost","port":7750}} + labels: + - "traefik.enable=true" + - "traefik.http.routers.pulsar-manager.rule=Host(`pulsar.digitribe.fr`)" + - "traefik.http.routers.pulsar-manager.entrypoints=websecure" + - "traefik.http.routers.pulsar-manager.tls=true" + - "traefik.http.services.pulsar-manager.loadbalancer.server.port=7750" +networks: + traefik-public: + external: true + smartcity-shared: + external: true diff --git a/pulsar/docker-compose.manager.yml b/pulsar/docker-compose.manager.yml new file mode 100644 index 00000000..e894a2e2 --- /dev/null +++ b/pulsar/docker-compose.manager.yml @@ -0,0 +1,45 @@ +# Pulsar Manager - Web UI for managing Pulsar +# Access: https://pulsar.digitribe.fr +version: '3.8' + +services: + pulsar-manager: + image: apachepulsar/pulsar-manager:v0.4.0 + container_name: smart-city-pulsar-manager + restart: unless-stopped + depends_on: + pulsar: + condition: service_healthy + environment: + - PULSAR_CLUSTER_NAME=standalone + - PULSAR_SERVICE_URL=pulsar://smart-city-pulsar:6650 + - PULSAR_WEB_SERVICE_URL=http://smart-city-pulsar:8080 + - SPRING_APPLICATION_JSON={"server":{"port":7750},"pulsar":{"cluster":"standalone","serviceUrl":"pulsar://smart-city-pulsar:6650","webServiceUrl":"http://smart-city-pulsar:8080"}} + networks: + - traefik-public + - smartcity-shared + ports: + - "7750:7750" + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:7750 || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + labels: + - "traefik.enable=true" + - "traefik.http.routers.pulsar-manager.rule=Host(`pulsar.digitribe.fr`)" + - "traefik.http.routers.pulsar-manager.entrypoints=websecure" + - "traefik.http.routers.pulsar-manager.tls=true" + - "traefik.http.services.pulsar-manager.loadbalancer.server.port=7750" + # Redirect /admin and /ws to Pulsar standalone + - "traefik.http.routers.pulsar.rule=Host(`pulsar.digitribe.fr`) && PathPrefix(`/admin`, `/ws`, `/lookup`)" + - "traefik.http.routers.pulsar.entrypoints=websecure" + - "traefik.http.routers.pulsar.tls=true" + - "traefik.http.services.pulsar.loadbalancer.server.port=8080" + +networks: + traefik-public: + external: true + smartcity-shared: + external: true diff --git a/pulsar/docker-compose.yml b/pulsar/docker-compose.yml index a113b0db..c9d8bd1f 100644 --- a/pulsar/docker-compose.yml +++ b/pulsar/docker-compose.yml @@ -1,8 +1,11 @@ -# Apache Pulsar Standalone - Smart City Digital Twin Martinique -# HTTP Admin UI: https://pulsar.digitribe.fr (via Traefik) -# HTTP API: http://smart-city-pulsar:8080/admin/v2 -# Binary: pulsar://smart-city-pulsar:6650 +# Apache Pulsar Stack - Smart City Digital Twin Martinique +# Includes: Pulsar Standalone + Pulsar Manager +# Pulsar Admin: https://pulsar.digitribe.fr/admin +# Pulsar Manager: https://pulsar.digitribe.fr +version: '3.8' + services: + # Pulsar Standalone pulsar: image: apachepulsar/pulsar:3.2.0 container_name: smart-city-pulsar @@ -28,10 +31,66 @@ services: start_period: 60s labels: - "traefik.enable=true" - - "traefik.http.routers.pulsar.rule=Host(`pulsar.digitribe.fr`)" - - "traefik.http.routers.pulsar.entrypoints=websecure" - - "traefik.http.routers.pulsar.tls=true" - - "traefik.http.services.pulsar.loadbalancer.server.port=8080" + - "traefik.http.routers.pulsar-admin.rule=Host(`pulsar.digitribe.fr`) && PathPrefix(`/admin`, `/ws`, `/lookup`)" + - "traefik.http.routers.pulsar-admin.entrypoints=websecure" + - "traefik.http.routers.pulsar-admin.tls=true" + - "traefik.http.services.pulsar-admin.loadbalancer.server.port=8080" + + # Pulsar Manager - Web UI + pulsar-manager: + image: apachepulsar/pulsar-manager:v0.4.0 + container_name: smart-city-pulsar-manager + restart: unless-stopped + depends_on: + pulsar: + condition: service_healthy + environment: + - URL=jdbc:postgresql://127.0.0.1:5432/pulsar_manager + - POSTGRES_PASSWORD=Digitribe972 + networks: + - traefik-public + - smartcity-shared + ports: + - "7750:7750" + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:7750 || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s + labels: + - "traefik.enable=true" + - "traefik.http.routers.pulsar-manager.rule=Host(`pulsar.digitribe.fr`)" + - "traefik.http.routers.pulsar-manager.entrypoints=web" + - "traefik.http.services.pulsar-manager.loadbalancer.server.port=7750" + + # Pulsar Distribution Service - Consumer → Republish to Brokers + pulsar-distribution: + build: + context: . + dockerfile: Dockerfile + container_name: smart-city-pulsar-distribution + restart: unless-stopped + depends_on: + - pulsar + environment: + - PULSAR_HOST=smart-city-pulsar + - PULSAR_PORT=6650 + - EMQX_HOST=emqx_emqx_1 + - EMQX_PORT=1883 + - MOSQUITTO_HOST=mosquitto-traefik + - MOSQUITTO_PORT=1883 + - ORION_URL=http://fiware-gis-quickstart-orion-1:1026 + - STELLIO_URL=http://stellio-api-gateway:8080 + - FROST_URL=http://frost-api-8090:8080/FROST-Server/v1.1 + networks: + - smartcity-shared + healthcheck: + test: ["CMD-SHELL", "ps aux | grep -q distribution || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s networks: traefik-public: @@ -41,3 +100,4 @@ networks: volumes: pulsar-data: + pulsar-manager-data: diff --git a/pulsar/supervisord-custom.conf b/pulsar/supervisord-custom.conf new file mode 100644 index 00000000..e4dbd5c2 --- /dev/null +++ b/pulsar/supervisord-custom.conf @@ -0,0 +1,18 @@ +[supervisord] +nodaemon=true +logfile=/pulsar-manager/supervisord.log +pidfile=/pulsar-manager/supervisord.pid + +[program:pulsar-manager-frontend] +command=/usr/sbin/nginx -g "daemon off;" +autostart=true +autorestart=true +stderr_logfile=/tmp/pulsar-manager-frontend-stderr---supervisor-%(host_node_name)s.log +stdout_logfile=/tmp/pulsar-manager-frontend-stdout---supervisor-%(host_node_name)s.log + +[program:pulsar-manager-backend] +command=/pulsar-manager/pulsar-manager/bin/pulsar-manager --redirect.host=%(ENV_REDIRECT_HOST)s --redirect.port=%(ENV_REDIRECT_PORT)s --spring.datasource.driver-class-name=%(ENV_DRIVER_CLASS_NAME)s --spring.datasource.url=%(ENV_URL)s --spring.datasource.initialization-mode=never --logging.level.org.apache=%(ENV_LOG_LEVEL)s +autostart=true +autorestart=true +stderr_logfile=/tmp/pulsar-manager-backend-stderr---supervisor-%(host_node_name)s.log +stdout_logfile=/tmp/pulsar-manager-backend-stdout---supervisor-%(host_node_name)s.log diff --git a/pulsar/supervisord-fixed.conf b/pulsar/supervisord-fixed.conf new file mode 100644 index 00000000..fe4dc3b4 --- /dev/null +++ b/pulsar/supervisord-fixed.conf @@ -0,0 +1,18 @@ +[supervisord] +nodaemon=true +logfile=/pulsar-manager/supervisord.log +pidfile=/pulsar-manager/supervisord.pid + +[program:pulsar-manager-frontend] +command=/usr/sbin/nginx -g "daemon off;" +autostart=true +autorestart=true +stderr_logfile=/tmp/pulsar-manager-frontend-stderr---supervisor-%(host_node_name)s.log +stdout_logfile=/tmp/pulsar-manager-frontend-stdout---supervisor-%(host_node_name)s.log + +[program:pulsar-manager-backend] +command=/pulsar-manager/pulsar-manager/bin/pulsar-manager --redirect.host=localhost --redirect.port=7750 --spring.datasource.driver-class-name=herddb.jdbc.Driver --spring.datasource.url=jdbc:herddb:server:localhost:7000?server.start=true&server.base.dir=dbdata --spring.datasource.initialization-mode=never --logging.level.org.apache=INFO --pulsar.cluster=standalone --pulsar.service-url=pulsar://smart-city-pulsar:6650 --pulsar.web-service-url=http://smart-city-pulsar:8080 +autostart=true +autorestart=true +stderr_logfile=/tmp/pulsar-manager-backend-stderr---supervisor-%(host_node_name)s.log +stdout_logfile=/tmp/pulsar-manager-backend-stdout---supervisor-%(host_node_name)s.log diff --git a/redpanda/console.yaml b/redpanda/console.yaml new file mode 100644 index 00000000..c4b830c5 --- /dev/null +++ b/redpanda/console.yaml @@ -0,0 +1,16 @@ +console: + server: + listenPort: 8080 + basePath: "/" + kafka: + brokers: + - "smart-city-redpanda:9092" + schemaRegistry: + enabled: false + redpanda: + adminApi: + enabled: true + urls: + - "http://smart-city-redpanda:9644" + console: + baseUrl: "https://redpanda-console.digitribe.fr" diff --git a/redpanda/docker-compose.yml b/redpanda/docker-compose.yml index 2c30c332..ef9dd7a7 100644 --- a/redpanda/docker-compose.yml +++ b/redpanda/docker-compose.yml @@ -47,6 +47,36 @@ services: - "traefik.http.routers.redpanda.tls=true" - "traefik.http.services.redpanda.loadbalancer.server.port=9644" + # Redpanda Console - Web UI for Redpanda/Kafka + redpanda-console: + image: docker.redpanda.com/redpandadata/console:v2.5.0 + container_name: smart-city-redpanda-console + restart: unless-stopped + depends_on: + - redpanda + environment: + - KAFKA_BROKERS=smart-city-redpanda:9092 + - CONFIG_FILE=console.yaml + volumes: + - ./console.yaml:/console.yaml:ro + networks: + - traefik-public + - smartcity-shared + ports: + - "28080:8080" + labels: + - "traefik.enable=true" + - "traefik.http.routers.redpanda-console.rule=Host(`redpanda-console.digitribe.fr`)" + - "traefik.http.routers.redpanda-console.entrypoints=websecure" + - "traefik.http.routers.redpanda-console.tls=true" + - "traefik.http.services.redpanda-console.loadbalancer.server.port=8080" + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:8080 || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + networks: traefik-public: external: true diff --git a/session_resume_2026-05-05.md b/session_resume_2026-05-05.md index 3a08cfb9..ebc943e2 100644 --- a/session_resume_2026-05-05.md +++ b/session_resume_2026-05-05.md @@ -1,57 +1,156 @@ -# Session Resume — 05 Mai 2026 (Session de 03h03 - 03h45) +# Session Resume - 2026-05-05 -## ✅ Réalisé dans cette session +## Objectif de la session +Configuration de l'ingestion de données pour le Smart City Digital Twin Martinique : +- Simulateur → Pulsar (port 6650) +- Pulsar → Service de Distribution → Brokers (MQTT, NGSI-LD, FROST) +- Monitoring via Redpanda Console, Prometheus, Grafana -### 1. Corrections critiques du simulateur (simulator.py) -- **ENABLE_PULSAR corrigé** : La comparaison `== "1"` échouait car docker-compose envoyait `"true"`. Nouveau code : `.lower() in ("1", "true", "yes", "on")` -- **Intervalle temps réel** : Passé de 10s à **1s** (`INTERVAL = 1`) pour un envoi en temps réel -- **InfluxDB URL** : Corrigé de `digital-twin-influxdb` vers `smart-city-influxdb` -- **Ajout fonctions** : `publish_pulsar()` et `publish_redpanda()` avec threading (asynchrone) -- **Debug ajouté** : Traces pour vérifier l'atteinte du code Pulsar +## Réalisations ✅ -### 2. Docker Compose -- **Création** : `docker-compose.yml` principal avec service simulator -- **Redpanda** : `redpanda/docker-compose.yml` + `redpanda.yaml` + `start.sh` (mais service bloqué OOM) -- **ClickHouse** : `clickhouse/docker-compose.yml` + `config.xml` (Analytique OLAP) -- **RisingWave** : `risingwave/docker-compose.yml` (Streaming DB PostgreSQL-compatible) +### 1. Redpanda Console - OPÉRATIONNEL +- Service `smart-city-redpanda-console` créé dans `redpanda/docker-compose.yml` +- Accessible sur `http://localhost:28080` (200 OK) +- Traefik configuré : `https://redpanda-console.digitribe.fr` +- Connecté à Redpanda (`smart-city-redpanda:9092`) +- API Admin Redpanda activée (`http://smart-city-redpanda:9644`) +- Fichier config : `redpanda/console.yaml` -### 3. Diagramme des flux (data-flow-diagram.md) -- **Mermaid** : Ajout des nœuds ClickHouse, RisingWave, Pulsar, Redpanda -- **Flux** : Ajout des flèches du simulateur vers ces nouveaux services -- **Tableau** : Mise à jour du statut de tous les composants -- **Docs** : Mise à jour des sections "Analytique & Streaming" +### 2. Prometheus - CONFIGURÉ +- Cibles actives ajoutées dans `prometheus.yml` : + - `redpanda` : **up** (métriques port 9644) + - `pulsar` : **up** (métriques port 8080) + - `mosquitto` : up + - `orion-ld` : up + - `frost-server` : down (normal, pas de données) + - `stellio` : down (normal, pas de données) -### 4. Git & Sauvegarde -- **Commit** : `01c2be4` — "feat(simulator): real-time (1s), fix ENABLE_PULSAR, add Pulsar/Redpanda publish, fix InfluxDB URL" -- **Push** : Vers Gitea (https://gitea.digitribe.fr/eric/smart-city-digital-twin-martinique) -- **Fichiers commités** : simulator.py, docker-compose.yml, clickhouse/, risingwave/, redpanda/, data-flow-diagram.md +### 3. Grafana Dashboards - CRÉÉS +- **Redpanda Metrics** (`grafana/provisioning/dashboards/redpanda-metrics.json`) +- **Pulsar Metrics** (`grafana/provisioning/dashboards/pulsar-metrics.json`) +- **Smart City Ingestion** (`grafana/provisioning/dashboards/smart-city-ingeston.json`) +- Datasources InfluxDB connectées : InfluxDB, InfluxDB-Simulator, InfluxDB-SmartCity -## 📊 État des services (au moment du crash) +### 4. Simulateur → Pulsar - FONCTIONNEL +- Le simulateur utilise déjà le protocole binaire Pulsar (port 6650) +- Logs confirment les connexions : `Connected to broker pulsar://smart-city-pulsar:6650` +- Topics créés : `smartcity-traffic`, `smartcity-airquality`, `smartcity-parking`, `smartcity-noise`, `smartcity-weather`, `smartcity-light` -| Service | Status | Notes | -|---------|--------|-------| -| Simulateur | ✅ Actif (1s) | MQTT + Pulsar (code OK, réception KO) | -| Pulsar | ⚠️ Debugging | Topics créés manuellement, API /produce renvoie 404 | -| Redpanda | ❌ OOM | Container crash en boucle, service marqué "cancelled" | -| ClickHouse | ✅ Ajouté | Stack créée, pas encore démarrée | -| RisingWave | ✅ Ajouté | Stack créée, pas encore démarrée | -| Grafana | ⚠️ No Data | Dashboard `smartcity-martinique-2026` vide | +### 5. Service de Distribution - AJOUTÉ (mais instable) +- Service `pulsar-distribution` ajouté dans `pulsar/docker-compose.yml` +- Code : `pulsar/distribution.py` (consomme depuis Pulsar, republie vers brokers) +- Problème : Erreur docker-compose au redémarrage (`KeyError: 'ContainerConfig'`) +- URL FROST corrigée : `frost-api-8090:8080/FROST-Server/v1.1` -## ⏳ Reste à faire (Todo List) +## Problèmes rencontrés ⚠️ -1. ✅ ~~Corriger ENABLE_PULSAR~~ (Fait) -2. ✅ ~~Ajouter ClickHouse~~ (Fait) -3. ✅ ~~Ajouter RisingWave~~ (Fait) -4. ✅ ~~Modifier diagramme Mermaid~~ (Fait) -5. ⚠️ **Grafana "No Data"** : Vérifier datasources (InfluxDB, FROST) et requêtes Flux -6. ⚠️ **Pulsar** : Résoudre l'erreur 404 sur l'API produce -7. ⚠️ **Redpanda** : Soit le réparer, soit le remplacer par Kafka simple -8. 📋 **Payloads NGSI-LD** : Vérifier/corriger les formats Orion-LD et Stellio +### 1. Pulsar Manager - CRASH RÉCURRENT +- Conteneur `smart-city-pulsar-manager` crash au démarrage +- Erreurs : `Object 'ENVIRONMENTS' not found` (HerdDB), problèmes d'initialisation PostgreSQL +- Solution alternative : Utiliser l'API Pulsar Admin directe (`http://localhost:8080/admin/v2/...`) -## 🔗 URLs importantes -- **Grafana** : https://grafana.digitribe.fr/d/smartcity-martinique-2026/smart-city-digital-twin-martinique -- **Gitea** : https://gitea.digitribe.fr/eric/smart-city-digital-twin-martinique -- **Simulateur logs** : `docker logs smart-city-simulator --tail 200` +### 2. Distribution Service - ERREUR DOCKER-COMPOSE +- `KeyError: 'ContainerConfig'` lors du `docker-compose up -d pulsar-distribution` +- Nécessite suppression manuelle du conteneur et reconstruction +- Service fonctionnel en théorie mais instable en pratique + +### 3. InfluxDB - AUCUNE DONNÉE VISIBLE +- Simulateur configuré pour InfluxDB (`ENABLE_INFLUX=1`) +- Aucune donnée visible dans les queries InfluxDB +- À diagnostiquer : connectivité simulateur → InfluxDB + +### 4. Traefik Let's Encrypt - ÉCHEC +- Problèmes de certificats sur `pulsar.digitribe.fr` et `redpanda-console.digitribe.fr` +- Cause probable : domaine non public ou configuration DNS +- Solution temporaire : accès HTTP direct (localhost:7750, localhost:28080) + +## Fichiers modifiés/créés 📁 + +### Redpanda +- `redpanda/docker-compose.yml` : Ajout service `smart-city-redpanda-console` +- `redpanda/console.yaml` : Configuration Redpanda Console + +### Pulsar +- `pulsar/docker-compose.yml` : Ajout service `pulsar-distribution` +- `pulsar/distribution.py` : Service de distribution (déjà existant) + +### Prometheus +- `prometheus.yml` : Ajout cibles `pulsar` et `redpanda` + +### Grafana +- `grafana/provisioning/dashboards/redpanda-metrics.json` : **Créé** +- `grafana/provisioning/dashboards/pulsar-metrics.json` : **Créé** +- `grafana/provisioning/dashboards/smart-city-ingeston.json` : **Créé** + +## À faire pour la prochaine session 📋 + +### Priorité 1 : Ingestion de données +1. **Diagnostiquer InfluxDB** : Pourquoi aucune donnée n'arrive ? + - Vérifier les logs du simulateur (`docker logs smart-city-simulator | grep Influx`) + - Tester la connexion manuelle depuis le simulateur + - Vérifier le token InfluxDB et l'organisation + +2. **Stabiliser le service de distribution** + - Corriger l'erreur `KeyError: 'ContainerConfig'` + - Lancer manuellement le conteneur si nécessaire + - Vérifier que les messages Pulsar sont bien republiés vers les brokers + +### Priorité 2 : Monitoring et Visualisation +3. **Tester les dashboards Grafana** + - Accéder à http://localhost:3001 (admin/Digitribe972) + - Vérifier que les panels affichent des données (InfluxDB, Prometheus) + - Ajuster les requêtes Flux si nécessaire + +4. **Corriger Pulsar Manager (optionnel)** + - Utiliser une base PostgreSQL externe propre + - Ou passer à une alternative (Kafka Manager, ou utiliser l'API directe) + +### Priorité 3 : Traefik et Domaines +5. **Résoudre Let's Encrypt** + - Vérifier la configuration DNS pour `*.digitribe.fr` + - Tester l'accessibilité publique des services + - Configurer des certificats SSL valides + +## Commandes utiles 🛠️ + +### Vérifier les services +```bash +cd ~/smart-city-digital-twin-martinique +docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" +``` + +### Voir les logs +```bash +docker logs smart-city-simulator --tail 50 | grep -E "(Pulsar|Influx|ERROR)" +docker logs smart-city-pulsar-distribution --tail 50 +``` + +### Test Pulsar +```bash +curl http://localhost:8080/admin/v2/clusters +curl -s -o /dev/null -w "%{http_code}" http://localhost:28080 # Redpanda Console +``` + +### Test InfluxDB +```bash +curl -s -H "Authorization: Token my-super-secret-admin-token" \ + "http://smart-city-influxdb:8086/api/v2/query?org=digitribe" \ + -d 'from(bucket:"iot_data") |> range(start:-1h) |> limit(n:5)' +``` + +## URLs d'accès 🌐 +- **Redpanda Console** : http://localhost:28080 +- **Grafana** : http://localhost:3001 (admin/Digitribe972) +- **Prometheus** : http://localhost:9090 +- **Pulsar Admin API** : http://localhost:8080/admin/v2/clusters +- **FROST-Server** : http://localhost:8090/FROST-Server/v1.1 + +## Notes importantes 📝 +- Le simulateur utilise le **protocole binaire Pulsar** (port 6650, pas 8080) +- L'ingestion centralisée passe par **Pulsar puis distribution** vers les brokers +- Redpanda Console est fonctionnel et permet de monitorer les topics Kafka +- Les dashboards Grafana sont prêts mais nécessitent des données pour être utiles +- Pulsar Manager reste instable, privilégier l'API Pulsar directe pour le monitoring --- -*Session crashee à 03h45 (limite d'itérations atteinte). Prochaine session : reprendre à "Grafana No Data".* +*Session du 2026-05-05 - Digitribe Martinique*