chore: add VRE stack configs (JupyterHub + Zeppelin) + lakehouse components

- Add VRE directory with JupyterHub + Zeppelin docker-compose configs
- Add Gravitino, Flink, Kafka, MinIO, Trino lakehouse stack
- Add Superset, Metabase, StarRocks analytics tools
- Session reprise après crash 2026-06-01

Infrastructure: 86 conteneurs total
Known issues: Kafka (no ZK conn), Trino (node.env null), JupyterHub (DB path)
This commit is contained in:
Eric FELIXINE
2026-05-29 02:21:08 -04:00
parent 486c1d2675
commit a234e808f2
10 changed files with 324 additions and 0 deletions

26
vre/jupyterhub/Dockerfile Normal file
View File

@@ -0,0 +1,26 @@
# Dockerfile for JupyterHub with authenticator
FROM jupyterhub/jupyterhub:5.3.0
USER root
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir \
git+https://github.com/jupyterhub/nativeauthenticator.git@main \
oauthenticator \
jupyterhub-idle-culler \
jupyterlab \
notebook
# Create the directory structure JupyterHub expects for DB
# JupyterHub joins data_files_path + dirname(db_path), so we create the composed path
RUN mkdir -p /srv/jupyterhub/srv/jupyterhub && \
chown -R 1000:1000 /srv/jupyterhub
COPY jupyterhub_config.py /srv/jupyterhub/jupyterhub_config.py
WORKDIR /srv/jupyterhub
EXPOSE 8000
CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"]

View File

@@ -0,0 +1,41 @@
version: "3.9"
services:
jupyterhub:
build:
context: .
dockerfile: Dockerfile
image: smartcity-jupyterhub:latest
container_name: jupyterhub
restart: unless-stopped
networks:
- smartcity-shared
# Run as root to avoid UID issues, JupyterHub will drop privs
user: root
environment:
- JUPYTERHUB_CRYPT_KEY=a1b2c3d4-e5f6-7890-abcd-ef1234567890
volumes:
- jupyterhub_data:/srv/jupyterhub
labels:
- "traefik.enable=true"
- "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)"
- "traefik.http.routers.jupyterhub.entrypoints=websecure"
- "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt"
- "traefik.http.routers.jupyterhub.service=jupyterhub-svc"
- "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000"
- "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true"
- "traefik.docker.network=smartcity-shared"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
networks:
smartcity-shared:
external: true
volumes:
jupyterhub_data:
driver: local

View File

@@ -0,0 +1,31 @@
# JupyterHub configuration for Smart City VRE
c.JupyterHub.ip = '0.0.0.0'
c.JupyterHub.port = 8000
c.JupyterHub.hub_ip = '0.0.0.0'
# Authenticator: Native (username/password signup + login)
c.JupyterHub.authenticator_class = 'nativeauthenticator.NativeAuthenticator'
c.Authenticator.admin_users = {'admin'}
c.Authenticator.allow_all = True
# Spawner
c.JupyterHub.spawner_class = 'simple'
c.Spawner.cmd = ['jupyterhub-singleuser']
c.Spawner.default_url = '/lab'
# Database and cookies
c.JupyterHub.cookie_secret_file = '/srv/jupyterhub/jupyterhub_cookie_secret'
c.JupyterHub.db_url = 'sqlite:///jupyterhub.sqlite'
# Base URL
c.JupyterHub.base_url = '/'
# Trust forwarded headers from Traefik
c.JupyterHub.tornado_settings = {
'headers': {
'Content-Security-Policy': "frame-ancestors 'self'"
}
}
c.JupyterHub.shutdown_on_logout = False

View File

@@ -0,0 +1,27 @@
# Dockerfile for JupyterHub single-user notebooks
# Includes JupyterLab, common data science libs, and InfluxDB client
FROM jupyter/scipy-notebook:latest
USER root
# Install additional packages for smart city data analysis
RUN pip install --no-cache-dir \
influxdb-client \
pandas \
numpy \
matplotlib \
plotly \
folium \
requests \
sqlalchemy \
psycopg2-binary \
sqlalchemy \
ipywidgets \
jupyterlab-git
# Switch back to notebook user
USER ${NB_UID}
EXPOSE 8888
CMD ["jupyterhub-singleuser"]

15
vre/jupyterhub/start.sh Normal file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
set -e
# Ensure the data directory exists
mkdir -p /srv/jupyterhub
# If the DB doesn't exist, initialize it
if [ ! -f /data/jupyterhub.sqlite ]; then
echo "Initializing JupyterHub database..."
fi
# Run JupyterHub with DB in /data volume
export JUPYTERHUB_DATA=/srv/jupyterhub
exec jupyterhub -f /srv/jupyterhub/jupyterhub_config.py