chore: add VRE stack configs (JupyterHub + Zeppelin) + lakehouse components
- Add VRE directory with JupyterHub + Zeppelin docker-compose configs - Add Gravitino, Flink, Kafka, MinIO, Trino lakehouse stack - Add Superset, Metabase, StarRocks analytics tools - Session reprise après crash 2026-06-01 Infrastructure: 86 conteneurs total Known issues: Kafka (no ZK conn), Trino (node.env null), JupyterHub (DB path)
This commit is contained in:
80
vre/docker-compose.yml
Normal file
80
vre/docker-compose.yml
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
# Combined VRE (Virtual Research Environment) Stack
|
||||||
|
# JupyterHub + Apache Zeppelin behind Traefik
|
||||||
|
# Usage: docker compose -f vre/docker-compose.yml up -d
|
||||||
|
|
||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
services:
|
||||||
|
jupyterhub:
|
||||||
|
build:
|
||||||
|
context: ./jupyterhub
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: smartcity-jupyterhub:latest
|
||||||
|
container_name: jupyterhub
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
- smartcity-shared
|
||||||
|
volumes:
|
||||||
|
- jupyterhub_data:/srv/jupyterhub
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)"
|
||||||
|
- "traefik.http.routers.jupyterhub.entrypoints=websecure"
|
||||||
|
- "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt"
|
||||||
|
- "traefik.http.routers.jupyterhub.service=jupyterhub-svc"
|
||||||
|
- "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000"
|
||||||
|
- "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true"
|
||||||
|
- "traefik.docker.network=smartcity-shared"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
zeppelin:
|
||||||
|
image: apache/zeppelin:0.11.2
|
||||||
|
container_name: zeppelin
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
- smartcity-shared
|
||||||
|
volumes:
|
||||||
|
- ./zeppelin/zeppelin-site.xml:/opt/zeppelin/conf/zeppelin-site.xml
|
||||||
|
- ./zeppelin/shiro.ini:/opt/zeppelin/conf/shiro.ini
|
||||||
|
- zeppelin_notebooks:/notebook
|
||||||
|
- zeppelin_logs:/logs
|
||||||
|
environment:
|
||||||
|
- ZEPPELIN_ADDR=0.0.0.0
|
||||||
|
- ZEPPELIN_PORT=8080
|
||||||
|
- ZEPPELIN_NOTEBOOK_DIR=/notebook
|
||||||
|
- ZEPPELIN_MEM=-Xmx2g
|
||||||
|
- ZEPPELIN_INTP_MEM=-Xmx2g
|
||||||
|
- ZEPPELIN_LOG_DIR=/logs
|
||||||
|
- ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE=10240000
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)"
|
||||||
|
- "traefik.http.routers.zeppelin.entrypoints=websecure"
|
||||||
|
- "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt"
|
||||||
|
- "traefik.http.routers.zeppelin.service=zeppelin-svc"
|
||||||
|
- "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080"
|
||||||
|
- "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true"
|
||||||
|
- "traefik.docker.network=smartcity-shared"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 120s
|
||||||
|
|
||||||
|
networks:
|
||||||
|
smartcity-shared:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
jupyterhub_data:
|
||||||
|
driver: local
|
||||||
|
zeppelin_notebooks:
|
||||||
|
driver: local
|
||||||
|
zeppelin_logs:
|
||||||
|
driver: local
|
||||||
26
vre/jupyterhub/Dockerfile
Normal file
26
vre/jupyterhub/Dockerfile
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Dockerfile for JupyterHub with authenticator
|
||||||
|
FROM jupyterhub/jupyterhub:5.3.0
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
git+https://github.com/jupyterhub/nativeauthenticator.git@main \
|
||||||
|
oauthenticator \
|
||||||
|
jupyterhub-idle-culler \
|
||||||
|
jupyterlab \
|
||||||
|
notebook
|
||||||
|
|
||||||
|
# Create the directory structure JupyterHub expects for DB
|
||||||
|
# JupyterHub joins data_files_path + dirname(db_path), so we create the composed path
|
||||||
|
RUN mkdir -p /srv/jupyterhub/srv/jupyterhub && \
|
||||||
|
chown -R 1000:1000 /srv/jupyterhub
|
||||||
|
|
||||||
|
COPY jupyterhub_config.py /srv/jupyterhub/jupyterhub_config.py
|
||||||
|
|
||||||
|
WORKDIR /srv/jupyterhub
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"]
|
||||||
41
vre/jupyterhub/docker-compose.yml
Normal file
41
vre/jupyterhub/docker-compose.yml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
services:
|
||||||
|
jupyterhub:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: smartcity-jupyterhub:latest
|
||||||
|
container_name: jupyterhub
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
- smartcity-shared
|
||||||
|
# Run as root to avoid UID issues, JupyterHub will drop privs
|
||||||
|
user: root
|
||||||
|
environment:
|
||||||
|
- JUPYTERHUB_CRYPT_KEY=a1b2c3d4-e5f6-7890-abcd-ef1234567890
|
||||||
|
volumes:
|
||||||
|
- jupyterhub_data:/srv/jupyterhub
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)"
|
||||||
|
- "traefik.http.routers.jupyterhub.entrypoints=websecure"
|
||||||
|
- "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt"
|
||||||
|
- "traefik.http.routers.jupyterhub.service=jupyterhub-svc"
|
||||||
|
- "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000"
|
||||||
|
- "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true"
|
||||||
|
- "traefik.docker.network=smartcity-shared"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
networks:
|
||||||
|
smartcity-shared:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
jupyterhub_data:
|
||||||
|
driver: local
|
||||||
31
vre/jupyterhub/jupyterhub_config.py
Normal file
31
vre/jupyterhub/jupyterhub_config.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# JupyterHub configuration for Smart City VRE
|
||||||
|
|
||||||
|
c.JupyterHub.ip = '0.0.0.0'
|
||||||
|
c.JupyterHub.port = 8000
|
||||||
|
c.JupyterHub.hub_ip = '0.0.0.0'
|
||||||
|
|
||||||
|
# Authenticator: Native (username/password signup + login)
|
||||||
|
c.JupyterHub.authenticator_class = 'nativeauthenticator.NativeAuthenticator'
|
||||||
|
c.Authenticator.admin_users = {'admin'}
|
||||||
|
c.Authenticator.allow_all = True
|
||||||
|
|
||||||
|
# Spawner
|
||||||
|
c.JupyterHub.spawner_class = 'simple'
|
||||||
|
c.Spawner.cmd = ['jupyterhub-singleuser']
|
||||||
|
c.Spawner.default_url = '/lab'
|
||||||
|
|
||||||
|
# Database and cookies
|
||||||
|
c.JupyterHub.cookie_secret_file = '/srv/jupyterhub/jupyterhub_cookie_secret'
|
||||||
|
c.JupyterHub.db_url = 'sqlite:///jupyterhub.sqlite'
|
||||||
|
|
||||||
|
# Base URL
|
||||||
|
c.JupyterHub.base_url = '/'
|
||||||
|
|
||||||
|
# Trust forwarded headers from Traefik
|
||||||
|
c.JupyterHub.tornado_settings = {
|
||||||
|
'headers': {
|
||||||
|
'Content-Security-Policy': "frame-ancestors 'self'"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JupyterHub.shutdown_on_logout = False
|
||||||
27
vre/jupyterhub/singleuser/Dockerfile
Normal file
27
vre/jupyterhub/singleuser/Dockerfile
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Dockerfile for JupyterHub single-user notebooks
|
||||||
|
# Includes JupyterLab, common data science libs, and InfluxDB client
|
||||||
|
FROM jupyter/scipy-notebook:latest
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Install additional packages for smart city data analysis
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
influxdb-client \
|
||||||
|
pandas \
|
||||||
|
numpy \
|
||||||
|
matplotlib \
|
||||||
|
plotly \
|
||||||
|
folium \
|
||||||
|
requests \
|
||||||
|
sqlalchemy \
|
||||||
|
psycopg2-binary \
|
||||||
|
sqlalchemy \
|
||||||
|
ipywidgets \
|
||||||
|
jupyterlab-git
|
||||||
|
|
||||||
|
# Switch back to notebook user
|
||||||
|
USER ${NB_UID}
|
||||||
|
|
||||||
|
EXPOSE 8888
|
||||||
|
|
||||||
|
CMD ["jupyterhub-singleuser"]
|
||||||
15
vre/jupyterhub/start.sh
Normal file
15
vre/jupyterhub/start.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Ensure the data directory exists
|
||||||
|
mkdir -p /srv/jupyterhub
|
||||||
|
|
||||||
|
# If the DB doesn't exist, initialize it
|
||||||
|
if [ ! -f /data/jupyterhub.sqlite ]; then
|
||||||
|
echo "Initializing JupyterHub database..."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run JupyterHub with DB in /data volume
|
||||||
|
export JUPYTERHUB_DATA=/srv/jupyterhub
|
||||||
|
|
||||||
|
exec jupyterhub -f /srv/jupyterhub/jupyterhub_config.py
|
||||||
4
vre/zeppelin/.env
Normal file
4
vre/zeppelin/.env
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
ZEPPELIN_PORT=8080
|
||||||
|
ZEPPELIN_NOTEBOOK_DIR=/notebook
|
||||||
|
ZEPPELIN_LOG_DIR=/logs
|
||||||
|
ZEPPELIN_MEM=-Xmx1024m
|
||||||
48
vre/zeppelin/docker-compose.yml
Normal file
48
vre/zeppelin/docker-compose.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
services:
|
||||||
|
zeppelin:
|
||||||
|
image: apache/zeppelin:0.11.2
|
||||||
|
container_name: zeppelin
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
- smartcity-shared
|
||||||
|
user: root
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:8080:8080"
|
||||||
|
environment:
|
||||||
|
- ZEPPELIN_ADDR=0.0.0.0
|
||||||
|
- ZEPPELIN_PORT=8080
|
||||||
|
- ZEPPELIN_NOTEBOOK_DIR=/notebook
|
||||||
|
- ZEPPELIN_MEM=-Xmx2g
|
||||||
|
- ZEPPELIN_INTP_MEM=-Xmx2g
|
||||||
|
- ZEPPELIN_LOG_DIR=/logs
|
||||||
|
- ZEPPELIN_SSL=false
|
||||||
|
volumes:
|
||||||
|
- zeppelin_notebooks:/notebook
|
||||||
|
- zeppelin_logs:/logs
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)"
|
||||||
|
- "traefik.http.routers.zeppelin.entrypoints=websecure"
|
||||||
|
- "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt"
|
||||||
|
- "traefik.http.routers.zeppelin.service=zeppelin-svc"
|
||||||
|
- "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080"
|
||||||
|
- "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true"
|
||||||
|
- "traefik.docker.network=smartcity-shared"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 120s
|
||||||
|
|
||||||
|
networks:
|
||||||
|
smartcity-shared:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
zeppelin_notebooks:
|
||||||
|
driver: local
|
||||||
|
zeppelin_logs:
|
||||||
|
driver: local
|
||||||
20
vre/zeppelin/shiro.ini
Normal file
20
vre/zeppelin/shiro.ini
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
[main]
|
||||||
|
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
|
||||||
|
securityManager.sessionManager = $sessionManager
|
||||||
|
|
||||||
|
sessionManager.sessionIdCookieEnabled = true
|
||||||
|
sessionManager.sessionIdUrlRewritingEnabled = true
|
||||||
|
|
||||||
|
shiro.loginUrl = /login
|
||||||
|
|
||||||
|
[users]
|
||||||
|
admin = Digitribe972, admin
|
||||||
|
|
||||||
|
[roles]
|
||||||
|
admin = *
|
||||||
|
|
||||||
|
[urls]
|
||||||
|
/api/version = anon
|
||||||
|
/api/cluster/address = anon
|
||||||
|
/login = authc
|
||||||
|
/** = authc
|
||||||
32
vre/zeppelin/zeppelin-site.xml
Normal file
32
vre/zeppelin/zeppelin-site.xml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>zeppelin.server.addr</name>
|
||||||
|
<value>0.0.0.0</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>zeppelin.server.port</name>
|
||||||
|
<value>8080</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>zeppelin.server.ssl.port</name>
|
||||||
|
<value>-1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>zeppelin.server.context.path</name>
|
||||||
|
<value>/</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>zeppelin.server.strict.transport</name>
|
||||||
|
<value>max-age=63115200; includeSubDomains</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>zeppelin.websocket.max.text.message.size</name>
|
||||||
|
<value>10240000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>zeppelin.server.authorization.header.clear</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
Reference in New Issue
Block a user