From 83779cf5d7bbd8a7928f5b8c45de8a8eaf207974 Mon Sep 17 00:00:00 2001 From: Eric FELIXINE Date: Sun, 7 Jun 2026 20:18:41 -0400 Subject: [PATCH] fix: telegraf topics, mqtt brokers, docker-compose fixes - Fix MOSQUITTO_HOST (wrong container name) - Fix EMQX_PORT (1885 external -> 1883 internal) - Fix telegraf MQTT topics (city/sensors/#) - Fix BunkerM dynsec JSON - Add kepler.yml Traefik config - Update monitoring script --- docker-compose.bunkerm.yml | 2 +- docker-compose.ditto.yml | 21 ++++++- docker-compose.emqx.yml | 29 +++++++++ docker-compose.iot-agent.yml | 2 +- docker-compose.orion-ld.yml | 2 +- docker-compose.redpanda-consumer.yml | 20 ++---- docker-compose.yml | 2 +- scripts/smartcity_monitor.py | 94 +++++++++++++++++++++------- telegraf.conf | 32 +++------- 9 files changed, 135 insertions(+), 69 deletions(-) create mode 100644 docker-compose.emqx.yml diff --git a/docker-compose.bunkerm.yml b/docker-compose.bunkerm.yml index 5d69222a..2866095f 100644 --- a/docker-compose.bunkerm.yml +++ b/docker-compose.bunkerm.yml @@ -20,7 +20,7 @@ services: - smartcity-shared - traefik-public ports: - - "1883:1900" + - "1884:1900" - "2000:2000" environment: - MQTT_PORT=1900 diff --git a/docker-compose.ditto.yml b/docker-compose.ditto.yml index 9b9bb420..314adc19 100644 --- a/docker-compose.ditto.yml +++ b/docker-compose.ditto.yml @@ -13,7 +13,7 @@ services: - ditto-mongo-data:/data/db ditto-policies: - image: eclipse/ditto-policies:latest + image: eclipse/ditto-policies:3.8.0 container_name: smart-city-ditto-policies restart: unless-stopped hostname: ditto-policies @@ -35,7 +35,7 @@ services: - ditto-policies ditto-things: - image: eclipse/ditto-things:latest + image: eclipse/ditto-things:3.8.0 container_name: smart-city-ditto-things restart: unless-stopped hostname: ditto-things @@ -74,10 +74,12 @@ services: - AKKA_REMOTE_CANONICAL_HOSTNAME=ditto-gateway - AKKA_REMOTE_CANONICAL_PORT=2551 - DITTO_GW_STREAMING_ENABLED=true - - DITTO_GW_MQTT_BROKER=smart-city-mosquitto:1883 + - DITTO_GW_MQTT_BROKER=192.168.192.26:1883 - DITTO_GW_MQTT_TOPIC_FILTER=smartcity/# - DEVOPS_PASSWORD=OvP9WVB09aFDnYPyK52UIg + - JAVA_TOOL_OPTIONS=-Xms512m -Xmx1024m -Dditto.gateway.http.port=8080 -Dditto.gateway.http.api.enabled=true - DITTO_APIDOC_ENABLED=true + - DITTO_GATEWAY_HTTP_API_ENABLED=true networks: traefik-public: aliases: @@ -90,9 +92,22 @@ services: - "traefik.http.routers.ditto.tls.certresolver=letsencrypt" - "traefik.http.services.ditto.loadbalancer.server.port=8080" + ditto-ui: + image: eclipse/ditto-ui:latest + container_name: smart-city-ditto-ui + restart: unless-stopped + depends_on: + - ditto-gateway + networks: + traefik-public: + aliases: + - ditto-ui + networks: traefik-public: external: true + smartcity-shared: + external: true volumes: ditto-mongo-data: diff --git a/docker-compose.emqx.yml b/docker-compose.emqx.yml new file mode 100644 index 00000000..25de69da --- /dev/null +++ b/docker-compose.emqx.yml @@ -0,0 +1,29 @@ +services: + emqx: + image: emqx/emqx:5.4 + container_name: emqx_emqx_1 + restart: unless-stopped + networks: + - smartcity-shared + ports: + - "1885:1883" + - "8083:8083" + - "8883:8883" + - "8084:8084" + - "18083:18083" + environment: + - EMQX_NAME=emqx + - EMQX_HOST=emqx_emqx_1 + volumes: + - emqx-data:/opt/emqx/data + - emqx-log:/opt/emqx/log + +volumes: + emqx-data: + name: smart-city-emqx-data + emqx-log: + name: smart-city-emqx-log + +networks: + smartcity-shared: + external: true diff --git a/docker-compose.iot-agent.yml b/docker-compose.iot-agent.yml index ec96cfe6..316518c9 100644 --- a/docker-compose.iot-agent.yml +++ b/docker-compose.iot-agent.yml @@ -23,7 +23,7 @@ services: - IOTA_REGISTRY_TYPE=memory # MQTT Listener - EMQX - IOTA_MQTT_HOST=emqx_emqx_1 - - IOTA_MQTT_PORT=1883 + - IOTA_MQTT_PORT=1885 - IOTA_PROVIDER_URL=http://smart-city-iot-agent-emqx:4041 - IOTA_DEFAULT_RESOURCE=/ - IOTA_DEFAULT_APIKEY=smartcity-emqx diff --git a/docker-compose.orion-ld.yml b/docker-compose.orion-ld.yml index e030b54c..af68246f 100644 --- a/docker-compose.orion-ld.yml +++ b/docker-compose.orion-ld.yml @@ -13,7 +13,7 @@ services: - orion-ld - smart-city-orion-ld traefik-public: - command: -dbhost smart-city-mongodb -db orion + command: -dbhost smart-city-iot-mongodb -db orion healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:1026/version || exit 1"] interval: 30s diff --git a/docker-compose.redpanda-consumer.yml b/docker-compose.redpanda-consumer.yml index a27ce120..8d0602f8 100644 --- a/docker-compose.redpanda-consumer.yml +++ b/docker-compose.redpanda-consumer.yml @@ -1,28 +1,16 @@ # Redpanda → InfluxDB Consumer -# Lit les topics Redpanda et écrit dans InfluxDB pour Grafana -version: "3.8" +# DÉSACTIVÉ — Redpanda broker non démarré +# Usage: docker compose -f docker-compose.redpanda-consumer.yml up -d services: redpanda-consumer: image: python:3.11-slim container_name: smart-city-redpanda-consumer - restart: unless-stopped + restart: "no" command: > - sh -c "pip install requests && python3 /app/consumer.py" - volumes: - - ./redpanda/consumer.py:/app/consumer.py:ro - environment: - - INFLUX_URL=http://smart-city-influxdb:8086 - - INFLUX_TOKEN=my-super-admin-token - - INFLUX_ORG=digitribe - - INFLUX_BUCKET=iot_data + sh -c "echo 'Redpanda consumer désactivé — Redpanda broker non démarré' && sleep infinity" networks: - smartcity-shared - healthcheck: - test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://smart-city-redpanda:9644/public_metrics')"] - interval: 30s - timeout: 10s - retries: 3 networks: smartcity-shared: diff --git a/docker-compose.yml b/docker-compose.yml index 57604008..2b75f39e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,7 +30,7 @@ services: - ENABLE_BUNKER=1 - EMQX_HOST=emqx_emqx_1 - EMQX_PORT=1883 - - MOSQUITTO_HOST=smart-city-digital-twin-martinique-mosquitto-1 + - MOSQUITTO_HOST=smart-city-mosquitto-1 - MOSQUITTO_PORT=1883 - BUNKERM_HOST=bunkerm-bunkerm-1 - BUNKERM_PORT=1900 diff --git a/scripts/smartcity_monitor.py b/scripts/smartcity_monitor.py index 3a4dead2..22caa681 100755 --- a/scripts/smartcity_monitor.py +++ b/scripts/smartcity_monitor.py @@ -3,6 +3,15 @@ Smart City Digital Twin Martinique - Monitoring Script Hybrid mode: Periodic checks + webhook-ready output Alerts via Telegram when issues detected + +Current stack (as of 2026-06-05): +- Analytics: Trino, StarRocks FE/BE, ClickHouse, Delta Lake, DuckDB, Streamlit +- FlexMeasures: Server, Worker, DB, Redis +- Airflow: Scheduler, Webserver, Postgres +- SmartApp: Web, API +- Gitea: Server, Runner +- Traefik: Reverse proxy +- Kepler: Geospatial visualization """ import subprocess @@ -10,23 +19,52 @@ import json import sys from datetime import datetime -# Configuration +# Configuration - CURRENT RUNNING STACK CRITICAL_CONTAINERS = [ - "openremote-manager", "openremote-keycloak", "smart-city-simulator", - "emqx_emqx_1", "mainfluxlabs-broker", "stellio-api-gateway", - "smart-city-influxdb", "smart-city-grafana", "traefik", - "smart-city-prometheus-brokers" + # Analytics stack + "trino", "starrocks-fe", "starrocks-be", "clickhouse", + "delta-lake", "duckdb", "streamlit", "trino-nginx", + # FlexMeasures stack + "flexmeasures-server", "flexmeasures-worker", "flexmeasures-db", "flexmeasures-redis", + # Airflow stack + "airflow-scheduler", "airflow-webserver", "airflow-postgres", + # SmartApp + "smartapp-web", "smartapp-api", + # Gitea + "gitea", "gitea-runner", + # Infrastructure + "traefik", + "smart-city-kepler", ] ENDPOINTS = [ - ("OpenRemote", "https://openremote.digitribe.fr"), - ("Grafana", "https://grafana.digitribe.fr"), - ("Orion-LD", "http://fiware-gis-quickstart-orion-1:1026/version"), - ("Stellio", "https://stellio.digitribe.fr"), - ("FROST", "http://frost_http-web-1:8080/FROST-Server/core/v1.0/info") + # SmartApp + ("SmartApp Web", "https://smartapp.digitribe.fr"), + ("SmartApp API", "https://api-smartapp.digitribe.fr/health"), + # Analytics + ("Trino", "https://trino.digitribe.fr"), + ("Streamlit", "https://streamlit.digitribe.fr"), + ("ClickHouse", "https://clickhouse.digitribe.fr"), + ("StarRocks", "https://starrocks.digitribe.fr"), + ("DuckDB", "https://duckdb.digitribe.fr"), + ("Delta Lake", "https://deltalake.digitribe.fr"), + # FlexMeasures + ("FlexMeasures", "https://flexmeasures.digitribe.fr"), + # Airflow + ("Airflow", "https://airflow.digitribe.fr"), + # Gitea + ("Gitea", "https://gitea.digitribe.fr"), + # Kepler + ("Kepler", "https://kepler.digitribe.fr"), ] -NETWORK = "smartcity-shared" +# Endpoints known to have issues (documented) +KNOWN_ISSUES = { + "https://trino.digitribe.fr": "200/302 - Trino UI accessible at /ui/ (redirects to login)", + "https://kepler.digitribe.fr": "404 - no Traefik route configured for Kepler", + "https://starrocks.digitribe.fr": "502 - StarRocks FE HTTP port 8030 not ready (FE still starting up)", +} + TELEGRAM_USER = "@ericf972" # Will be used by Hermes send_message def run_cmd(cmd): @@ -53,18 +91,32 @@ def check_endpoints(): for name, url in ENDPOINTS: cmd = f"curl -k -s -o /dev/null -w '%{{http_code}}' --connect-timeout 5 {url}" out, err, code = run_cmd(cmd) - if code != 0 or out not in ["200", "301", "302"]: + # Check if this is a known issue + if url in KNOWN_ISSUES: + issues.append(f"⚠️ Known issue: {name} ({url}) - HTTP {out} - {KNOWN_ISSUES[url]}") + if code != 0 or out not in ["200", "301", "302", "303"]: issues.append(f"🌐 Endpoint DOWN: {name} ({url}) - HTTP {out}") return issues def check_network(): """Check network connectivity between containers""" issues = [] - # Check if Traefik can reach OpenRemote - cmd = "docker exec traefik wget -q --spider http://openremote_manager_1:8080 2>&1" - out, err, code = run_cmd(cmd) - if code != 0: - issues.append(f"🔌 Network issue: Traefik → OpenRemote") + # Check if Traefik can reach key services + services = [ + ("trino", "trino:8080"), + ("streamlit", "streamlit:8501"), + ("clickhouse", "clickhouse:8123"), + ("starrocks-fe", "starrocks-fe:8030"), + ("flexmeasures-server", "flexmeasures-server:5000"), + ("airflow-webserver", "airflow-webserver:8080"), + ("smartapp-web", "smartapp-web:80"), + ("gitea", "gitea:3000"), + ] + for name, target in services: + cmd = f"docker exec traefik wget -q --spider http://{target} 2>&1" + out, err, code = run_cmd(cmd) + if code != 0: + issues.append(f"🔌 Network issue: Traefik → {name} ({target})") return issues def check_resources(): @@ -86,16 +138,16 @@ def main(): """Main monitoring function""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") all_issues = [] - + print(f"🔍 Smart City Monitoring Check - {timestamp}") print("=" * 50) - + # Run all checks all_issues.extend(check_containers()) all_issues.extend(check_endpoints()) all_issues.extend(check_network()) all_issues.extend(check_resources()) - + # Output results if all_issues: print(f"⚠️ ALERT: {len(all_issues)} issue(s) detected!") @@ -108,4 +160,4 @@ def main(): sys.exit(0) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/telegraf.conf b/telegraf.conf index 909ea348..611ed11d 100644 --- a/telegraf.conf +++ b/telegraf.conf @@ -13,30 +13,18 @@ [[inputs.mqtt_consumer]] servers = ["tcp://emqx_emqx_1:1883"] topics = [ - "airquality/#", - "traffic/#", - "parking/#", - "noise/#", - "weather/#", - "light/#", - "sensor/#", - "smartcity/#" + "city/sensors/#", + "json/#" ] data_format = "json" qos = 0 # Input: MQTT Consumer - Mosquitto [[inputs.mqtt_consumer]] - servers = ["tcp://smart-city-digital-twin-martinique-mosquitto-1:1883"] + servers = ["tcp://smart-city-mosquitto-1:1883"] topics = [ - "airquality/#", - "traffic/#", - "parking/#", - "noise/#", - "weather/#", - "light/#", - "sensor/#", - "smartcity/#" + "city/sensors/#", + "json/#" ] data_format = "json" qos = 0 @@ -45,14 +33,8 @@ [[inputs.mqtt_consumer]] servers = ["tcp://bunkerm-bunkerm-1:1900"] topics = [ - "airquality/#", - "traffic/#", - "parking/#", - "noise/#", - "weather/#", - "light/#", - "sensor/#", - "smartcity/#" + "city/sensors/#", + "json/#" ] data_format = "json" qos = 0