#!/usr/bin/env python3 """ Smart City Digital Twin Martinique - Monitoring Script Hybrid mode: Periodic checks + webhook-ready output Alerts via Telegram when issues detected """ import subprocess import json import sys from datetime import datetime # Configuration CRITICAL_CONTAINERS = [ "openremote-manager", "openremote-keycloak", "smart-city-simulator", "emqx_emqx_1", "mainfluxlabs-broker", "stellio-api-gateway", "smart-city-influxdb", "smart-city-grafana", "traefik", "smart-city-prometheus-brokers" ] ENDPOINTS = [ ("OpenRemote", "https://openremote.digitribe.fr"), ("Grafana", "https://grafana.digitribe.fr"), ("Orion-LD", "http://fiware-gis-quickstart-orion-1:1026/version"), ("Stellio", "https://stellio.digitribe.fr"), ("FROST", "http://frost_http-web-1:8080/FROST-Server/core/v1.0/info") ] NETWORK = "smartcity-shared" TELEGRAM_USER = "@ericf972" # Will be used by Hermes send_message def run_cmd(cmd): """Run shell command and return output""" try: result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=10) return result.stdout.strip(), result.stderr.strip(), result.returncode except Exception as e: return "", str(e), 1 def check_containers(): """Check if critical containers are running""" issues = [] for container in CRITICAL_CONTAINERS: cmd = f"docker ps --format '{{{{.Names}}}}' | grep -w '{container}'" out, err, code = run_cmd(cmd) if not out: issues.append(f"🛑 Container DOWN: {container}") return issues def check_endpoints(): """Check if key endpoints are accessible""" issues = [] for name, url in ENDPOINTS: cmd = f"curl -k -s -o /dev/null -w '%{{http_code}}' --connect-timeout 5 {url}" out, err, code = run_cmd(cmd) if code != 0 or out not in ["200", "301", "302"]: issues.append(f"🌐 Endpoint DOWN: {name} ({url}) - HTTP {out}") return issues def check_network(): """Check network connectivity between containers""" issues = [] # Check if Traefik can reach OpenRemote cmd = "docker exec traefik wget -q --spider http://openremote_manager_1:8080 2>&1" out, err, code = run_cmd(cmd) if code != 0: issues.append(f"🔌 Network issue: Traefik → OpenRemote") return issues def check_resources(): """Check system resources""" issues = [] # Disk space cmd = "df -h / | awk 'NR==2 {print $5}' | tr -d '%'" out, err, code = run_cmd(cmd) if out and int(out) > 80: issues.append(f"💾 Disk space critical: {out}% used") # Memory cmd = "free | awk '/Mem:/ {print int($3/$2 * 100)}'" out, err, code = run_cmd(cmd) if out and int(out) > 90: issues.append(f"🧠 Memory critical: {out}% used") return issues def main(): """Main monitoring function""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") all_issues = [] print(f"🔍 Smart City Monitoring Check - {timestamp}") print("=" * 50) # Run all checks all_issues.extend(check_containers()) all_issues.extend(check_endpoints()) all_issues.extend(check_network()) all_issues.extend(check_resources()) # Output results if all_issues: print(f"⚠️ ALERT: {len(all_issues)} issue(s) detected!") for issue in all_issues: print(f" - {issue}") # This output will be captured by Hermes cron job and sent to Telegram sys.exit(1) # Non-zero exit code indicates issues else: print("✅ All systems operational") sys.exit(0) if __name__ == "__main__": main()