Session 2026-05-06: QuantumLeap+CrateDB, Telegraf debug, MapStore GeoServer fix
This commit is contained in:
111
scripts/smartcity_monitor.py
Executable file
111
scripts/smartcity_monitor.py
Executable file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Smart City Digital Twin Martinique - Monitoring Script
|
||||
Hybrid mode: Periodic checks + webhook-ready output
|
||||
Alerts via Telegram when issues detected
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# Configuration
|
||||
CRITICAL_CONTAINERS = [
|
||||
"openremote_manager_1", "openremote_keycloak_1", "smart-city-simulator",
|
||||
"emqx_emqx_1", "mainfluxlabs-broker", "stellio-api-gateway",
|
||||
"smart-city-influxdb", "smart-city-grafana", "traefik",
|
||||
"smart-city-prometheus-brokers"
|
||||
]
|
||||
|
||||
ENDPOINTS = [
|
||||
("OpenRemote", "https://openremote.digitribe.fr"),
|
||||
("Grafana", "https://grafana.digitribe.fr"),
|
||||
("Orion-LD", "http://fiware-gis-quickstart-orion-1:1026/version"),
|
||||
("Stellio", "https://stellio.digitribe.fr"),
|
||||
("FROST", "http://frost_http-web-1:8080/FROST-Server/core/v1.0/info")
|
||||
]
|
||||
|
||||
NETWORK = "smartcity-shared"
|
||||
TELEGRAM_USER = "@ericf972" # Will be used by Hermes send_message
|
||||
|
||||
def run_cmd(cmd):
|
||||
"""Run shell command and return output"""
|
||||
try:
|
||||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=10)
|
||||
return result.stdout.strip(), result.stderr.strip(), result.returncode
|
||||
except Exception as e:
|
||||
return "", str(e), 1
|
||||
|
||||
def check_containers():
|
||||
"""Check if critical containers are running"""
|
||||
issues = []
|
||||
for container in CRITICAL_CONTAINERS:
|
||||
cmd = f"docker ps --format '{{{{.Names}}}}' | grep -w '{container}'"
|
||||
out, err, code = run_cmd(cmd)
|
||||
if not out:
|
||||
issues.append(f"🛑 Container DOWN: {container}")
|
||||
return issues
|
||||
|
||||
def check_endpoints():
|
||||
"""Check if key endpoints are accessible"""
|
||||
issues = []
|
||||
for name, url in ENDPOINTS:
|
||||
cmd = f"curl -k -s -o /dev/null -w '%{{http_code}}' --connect-timeout 5 {url}"
|
||||
out, err, code = run_cmd(cmd)
|
||||
if code != 0 or out not in ["200", "301", "302"]:
|
||||
issues.append(f"🌐 Endpoint DOWN: {name} ({url}) - HTTP {out}")
|
||||
return issues
|
||||
|
||||
def check_network():
|
||||
"""Check network connectivity between containers"""
|
||||
issues = []
|
||||
# Check if Traefik can reach OpenRemote
|
||||
cmd = "docker exec traefik wget -q --spider http://openremote_manager_1:8080 2>&1"
|
||||
out, err, code = run_cmd(cmd)
|
||||
if code != 0:
|
||||
issues.append(f"🔌 Network issue: Traefik → OpenRemote")
|
||||
return issues
|
||||
|
||||
def check_resources():
|
||||
"""Check system resources"""
|
||||
issues = []
|
||||
# Disk space
|
||||
cmd = "df -h / | awk 'NR==2 {print $5}' | tr -d '%'"
|
||||
out, err, code = run_cmd(cmd)
|
||||
if out and int(out) > 80:
|
||||
issues.append(f"💾 Disk space critical: {out}% used")
|
||||
# Memory
|
||||
cmd = "free | awk '/Mem:/ {print int($3/$2 * 100)}'"
|
||||
out, err, code = run_cmd(cmd)
|
||||
if out and int(out) > 90:
|
||||
issues.append(f"🧠 Memory critical: {out}% used")
|
||||
return issues
|
||||
|
||||
def main():
|
||||
"""Main monitoring function"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
all_issues = []
|
||||
|
||||
print(f"🔍 Smart City Monitoring Check - {timestamp}")
|
||||
print("=" * 50)
|
||||
|
||||
# Run all checks
|
||||
all_issues.extend(check_containers())
|
||||
all_issues.extend(check_endpoints())
|
||||
all_issues.extend(check_network())
|
||||
all_issues.extend(check_resources())
|
||||
|
||||
# Output results
|
||||
if all_issues:
|
||||
print(f"⚠️ ALERT: {len(all_issues)} issue(s) detected!")
|
||||
for issue in all_issues:
|
||||
print(f" - {issue}")
|
||||
# This output will be captured by Hermes cron job and sent to Telegram
|
||||
sys.exit(1) # Non-zero exit code indicates issues
|
||||
else:
|
||||
print("✅ All systems operational")
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
60
scripts/webhook_listener.py
Normal file
60
scripts/webhook_listener.py
Normal file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Webhook listener for Smart City Digital Twin Alerts
|
||||
Receives Docker events and sends Telegram alerts
|
||||
"""
|
||||
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
import json
|
||||
import subprocess
|
||||
import threading
|
||||
|
||||
TELEGRAM_USER = "@ericf972" # Will be replaced with actual send_message in production
|
||||
|
||||
class WebhookHandler(BaseHTTPRequestHandler):
|
||||
def do_POST(self):
|
||||
content_length = int(self.headers['Content-Length'])
|
||||
post_data = self.rfile.read(content_length)
|
||||
|
||||
try:
|
||||
event = json.loads(post_data.decode('utf-8'))
|
||||
self.process_event(event)
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
except Exception as e:
|
||||
print(f"Error processing webhook: {e}")
|
||||
self.send_response(500)
|
||||
self.end_headers()
|
||||
|
||||
def process_event(self, event):
|
||||
"""Process incoming webhook event"""
|
||||
event_type = event.get('Type', '')
|
||||
event_action = event.get('Action', '')
|
||||
event_actor = event.get('Actor', {}).get('Attributes', {}).get('name', '')
|
||||
|
||||
if event_type == 'container' and event_action in ['die', 'destroy', 'stop']:
|
||||
message = f"🚨 Smart City Alert!\n"
|
||||
message += f"Container: {event_actor}\n"
|
||||
message += f"Action: {event_action}\n"
|
||||
message += f"Time: {event.get('time', '')}\n"
|
||||
|
||||
# Send Telegram alert (using subprocess to call Hermes send_message)
|
||||
subprocess.run([
|
||||
'hermes', 'send-message',
|
||||
'--target', TELEGRAM_USER,
|
||||
'--message', message
|
||||
], timeout=10)
|
||||
print(f"Alert sent: {message}")
|
||||
|
||||
def log_message(self, format, *args):
|
||||
"""Suppress default logging"""
|
||||
pass
|
||||
|
||||
def run_webhook_server(port=8089):
|
||||
"""Start webhook server"""
|
||||
server = HTTPServer(('0.0.0.0', port), WebhookHandler)
|
||||
print(f"Webhook server started on port {port}")
|
||||
server.serve_forever()
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_webhook_server()
|
||||
Reference in New Issue
Block a user