chore: add VRE stack configs (JupyterHub + Zeppelin) + lakehouse components

- Add VRE directory with JupyterHub + Zeppelin docker-compose configs
- Add Gravitino, Flink, Kafka, MinIO, Trino lakehouse stack
- Add Superset, Metabase, StarRocks analytics tools
- Session reprise après crash 2026-06-01

Infrastructure: 86 conteneurs total
Known issues: Kafka (no ZK conn), Trino (node.env null), JupyterHub (DB path)
This commit is contained in:
Eric FELIXINE
2026-05-29 02:21:08 -04:00
parent 486c1d2675
commit a234e808f2
10 changed files with 324 additions and 0 deletions

80
vre/docker-compose.yml Normal file
View File

@@ -0,0 +1,80 @@
# Combined VRE (Virtual Research Environment) Stack
# JupyterHub + Apache Zeppelin behind Traefik
# Usage: docker compose -f vre/docker-compose.yml up -d
version: "3.9"
services:
jupyterhub:
build:
context: ./jupyterhub
dockerfile: Dockerfile
image: smartcity-jupyterhub:latest
container_name: jupyterhub
restart: unless-stopped
networks:
- smartcity-shared
volumes:
- jupyterhub_data:/srv/jupyterhub
labels:
- "traefik.enable=true"
- "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)"
- "traefik.http.routers.jupyterhub.entrypoints=websecure"
- "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt"
- "traefik.http.routers.jupyterhub.service=jupyterhub-svc"
- "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000"
- "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true"
- "traefik.docker.network=smartcity-shared"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
zeppelin:
image: apache/zeppelin:0.11.2
container_name: zeppelin
restart: unless-stopped
networks:
- smartcity-shared
volumes:
- ./zeppelin/zeppelin-site.xml:/opt/zeppelin/conf/zeppelin-site.xml
- ./zeppelin/shiro.ini:/opt/zeppelin/conf/shiro.ini
- zeppelin_notebooks:/notebook
- zeppelin_logs:/logs
environment:
- ZEPPELIN_ADDR=0.0.0.0
- ZEPPELIN_PORT=8080
- ZEPPELIN_NOTEBOOK_DIR=/notebook
- ZEPPELIN_MEM=-Xmx2g
- ZEPPELIN_INTP_MEM=-Xmx2g
- ZEPPELIN_LOG_DIR=/logs
- ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE=10240000
labels:
- "traefik.enable=true"
- "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)"
- "traefik.http.routers.zeppelin.entrypoints=websecure"
- "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt"
- "traefik.http.routers.zeppelin.service=zeppelin-svc"
- "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080"
- "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true"
- "traefik.docker.network=smartcity-shared"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
smartcity-shared:
external: true
volumes:
jupyterhub_data:
driver: local
zeppelin_notebooks:
driver: local
zeppelin_logs:
driver: local

26
vre/jupyterhub/Dockerfile Normal file
View File

@@ -0,0 +1,26 @@
# Dockerfile for JupyterHub with authenticator
FROM jupyterhub/jupyterhub:5.3.0
USER root
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir \
git+https://github.com/jupyterhub/nativeauthenticator.git@main \
oauthenticator \
jupyterhub-idle-culler \
jupyterlab \
notebook
# Create the directory structure JupyterHub expects for DB
# JupyterHub joins data_files_path + dirname(db_path), so we create the composed path
RUN mkdir -p /srv/jupyterhub/srv/jupyterhub && \
chown -R 1000:1000 /srv/jupyterhub
COPY jupyterhub_config.py /srv/jupyterhub/jupyterhub_config.py
WORKDIR /srv/jupyterhub
EXPOSE 8000
CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"]

View File

@@ -0,0 +1,41 @@
version: "3.9"
services:
jupyterhub:
build:
context: .
dockerfile: Dockerfile
image: smartcity-jupyterhub:latest
container_name: jupyterhub
restart: unless-stopped
networks:
- smartcity-shared
# Run as root to avoid UID issues, JupyterHub will drop privs
user: root
environment:
- JUPYTERHUB_CRYPT_KEY=a1b2c3d4-e5f6-7890-abcd-ef1234567890
volumes:
- jupyterhub_data:/srv/jupyterhub
labels:
- "traefik.enable=true"
- "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)"
- "traefik.http.routers.jupyterhub.entrypoints=websecure"
- "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt"
- "traefik.http.routers.jupyterhub.service=jupyterhub-svc"
- "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000"
- "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true"
- "traefik.docker.network=smartcity-shared"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
networks:
smartcity-shared:
external: true
volumes:
jupyterhub_data:
driver: local

View File

@@ -0,0 +1,31 @@
# JupyterHub configuration for Smart City VRE
c.JupyterHub.ip = '0.0.0.0'
c.JupyterHub.port = 8000
c.JupyterHub.hub_ip = '0.0.0.0'
# Authenticator: Native (username/password signup + login)
c.JupyterHub.authenticator_class = 'nativeauthenticator.NativeAuthenticator'
c.Authenticator.admin_users = {'admin'}
c.Authenticator.allow_all = True
# Spawner
c.JupyterHub.spawner_class = 'simple'
c.Spawner.cmd = ['jupyterhub-singleuser']
c.Spawner.default_url = '/lab'
# Database and cookies
c.JupyterHub.cookie_secret_file = '/srv/jupyterhub/jupyterhub_cookie_secret'
c.JupyterHub.db_url = 'sqlite:///jupyterhub.sqlite'
# Base URL
c.JupyterHub.base_url = '/'
# Trust forwarded headers from Traefik
c.JupyterHub.tornado_settings = {
'headers': {
'Content-Security-Policy': "frame-ancestors 'self'"
}
}
c.JupyterHub.shutdown_on_logout = False

View File

@@ -0,0 +1,27 @@
# Dockerfile for JupyterHub single-user notebooks
# Includes JupyterLab, common data science libs, and InfluxDB client
FROM jupyter/scipy-notebook:latest
USER root
# Install additional packages for smart city data analysis
RUN pip install --no-cache-dir \
influxdb-client \
pandas \
numpy \
matplotlib \
plotly \
folium \
requests \
sqlalchemy \
psycopg2-binary \
sqlalchemy \
ipywidgets \
jupyterlab-git
# Switch back to notebook user
USER ${NB_UID}
EXPOSE 8888
CMD ["jupyterhub-singleuser"]

15
vre/jupyterhub/start.sh Normal file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
set -e
# Ensure the data directory exists
mkdir -p /srv/jupyterhub
# If the DB doesn't exist, initialize it
if [ ! -f /data/jupyterhub.sqlite ]; then
echo "Initializing JupyterHub database..."
fi
# Run JupyterHub with DB in /data volume
export JUPYTERHUB_DATA=/srv/jupyterhub
exec jupyterhub -f /srv/jupyterhub/jupyterhub_config.py

4
vre/zeppelin/.env Normal file
View File

@@ -0,0 +1,4 @@
ZEPPELIN_PORT=8080
ZEPPELIN_NOTEBOOK_DIR=/notebook
ZEPPELIN_LOG_DIR=/logs
ZEPPELIN_MEM=-Xmx1024m

View File

@@ -0,0 +1,48 @@
version: "3.9"
services:
zeppelin:
image: apache/zeppelin:0.11.2
container_name: zeppelin
restart: unless-stopped
networks:
- smartcity-shared
user: root
ports:
- "127.0.0.1:8080:8080"
environment:
- ZEPPELIN_ADDR=0.0.0.0
- ZEPPELIN_PORT=8080
- ZEPPELIN_NOTEBOOK_DIR=/notebook
- ZEPPELIN_MEM=-Xmx2g
- ZEPPELIN_INTP_MEM=-Xmx2g
- ZEPPELIN_LOG_DIR=/logs
- ZEPPELIN_SSL=false
volumes:
- zeppelin_notebooks:/notebook
- zeppelin_logs:/logs
labels:
- "traefik.enable=true"
- "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)"
- "traefik.http.routers.zeppelin.entrypoints=websecure"
- "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt"
- "traefik.http.routers.zeppelin.service=zeppelin-svc"
- "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080"
- "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true"
- "traefik.docker.network=smartcity-shared"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
smartcity-shared:
external: true
volumes:
zeppelin_notebooks:
driver: local
zeppelin_logs:
driver: local

20
vre/zeppelin/shiro.ini Normal file
View File

@@ -0,0 +1,20 @@
[main]
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
securityManager.sessionManager = $sessionManager
sessionManager.sessionIdCookieEnabled = true
sessionManager.sessionIdUrlRewritingEnabled = true
shiro.loginUrl = /login
[users]
admin = Digitribe972, admin
[roles]
admin = *
[urls]
/api/version = anon
/api/cluster/address = anon
/login = authc
/** = authc

View File

@@ -0,0 +1,32 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>zeppelin.server.addr</name>
<value>0.0.0.0</value>
</property>
<property>
<name>zeppelin.server.port</name>
<value>8080</value>
</property>
<property>
<name>zeppelin.server.ssl.port</name>
<value>-1</value>
</property>
<property>
<name>zeppelin.server.context.path</name>
<value>/</value>
</property>
<property>
<name>zeppelin.server.strict.transport</name>
<value>max-age=63115200; includeSubDomains</value>
</property>
<property>
<name>zeppelin.websocket.max.text.message.size</name>
<value>10240000</value>
</property>
<property>
<name>zeppelin.server.authorization.header.clear</name>
<value>true</value>
</property>
</configuration>