chore: add VRE stack configs (JupyterHub + Zeppelin) + lakehouse components
- Add VRE directory with JupyterHub + Zeppelin docker-compose configs - Add Gravitino, Flink, Kafka, MinIO, Trino lakehouse stack - Add Superset, Metabase, StarRocks analytics tools - Session reprise après crash 2026-06-01 Infrastructure: 86 conteneurs total Known issues: Kafka (no ZK conn), Trino (node.env null), JupyterHub (DB path)
This commit is contained in:
80
vre/docker-compose.yml
Normal file
80
vre/docker-compose.yml
Normal file
@@ -0,0 +1,80 @@
|
||||
# Combined VRE (Virtual Research Environment) Stack
|
||||
# JupyterHub + Apache Zeppelin behind Traefik
|
||||
# Usage: docker compose -f vre/docker-compose.yml up -d
|
||||
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
jupyterhub:
|
||||
build:
|
||||
context: ./jupyterhub
|
||||
dockerfile: Dockerfile
|
||||
image: smartcity-jupyterhub:latest
|
||||
container_name: jupyterhub
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- smartcity-shared
|
||||
volumes:
|
||||
- jupyterhub_data:/srv/jupyterhub
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)"
|
||||
- "traefik.http.routers.jupyterhub.entrypoints=websecure"
|
||||
- "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.routers.jupyterhub.service=jupyterhub-svc"
|
||||
- "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000"
|
||||
- "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true"
|
||||
- "traefik.docker.network=smartcity-shared"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
|
||||
zeppelin:
|
||||
image: apache/zeppelin:0.11.2
|
||||
container_name: zeppelin
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- smartcity-shared
|
||||
volumes:
|
||||
- ./zeppelin/zeppelin-site.xml:/opt/zeppelin/conf/zeppelin-site.xml
|
||||
- ./zeppelin/shiro.ini:/opt/zeppelin/conf/shiro.ini
|
||||
- zeppelin_notebooks:/notebook
|
||||
- zeppelin_logs:/logs
|
||||
environment:
|
||||
- ZEPPELIN_ADDR=0.0.0.0
|
||||
- ZEPPELIN_PORT=8080
|
||||
- ZEPPELIN_NOTEBOOK_DIR=/notebook
|
||||
- ZEPPELIN_MEM=-Xmx2g
|
||||
- ZEPPELIN_INTP_MEM=-Xmx2g
|
||||
- ZEPPELIN_LOG_DIR=/logs
|
||||
- ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE=10240000
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)"
|
||||
- "traefik.http.routers.zeppelin.entrypoints=websecure"
|
||||
- "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.routers.zeppelin.service=zeppelin-svc"
|
||||
- "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080"
|
||||
- "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true"
|
||||
- "traefik.docker.network=smartcity-shared"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
|
||||
networks:
|
||||
smartcity-shared:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
jupyterhub_data:
|
||||
driver: local
|
||||
zeppelin_notebooks:
|
||||
driver: local
|
||||
zeppelin_logs:
|
||||
driver: local
|
||||
26
vre/jupyterhub/Dockerfile
Normal file
26
vre/jupyterhub/Dockerfile
Normal file
@@ -0,0 +1,26 @@
|
||||
# Dockerfile for JupyterHub with authenticator
|
||||
FROM jupyterhub/jupyterhub:5.3.0
|
||||
|
||||
USER root
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --no-cache-dir \
|
||||
git+https://github.com/jupyterhub/nativeauthenticator.git@main \
|
||||
oauthenticator \
|
||||
jupyterhub-idle-culler \
|
||||
jupyterlab \
|
||||
notebook
|
||||
|
||||
# Create the directory structure JupyterHub expects for DB
|
||||
# JupyterHub joins data_files_path + dirname(db_path), so we create the composed path
|
||||
RUN mkdir -p /srv/jupyterhub/srv/jupyterhub && \
|
||||
chown -R 1000:1000 /srv/jupyterhub
|
||||
|
||||
COPY jupyterhub_config.py /srv/jupyterhub/jupyterhub_config.py
|
||||
|
||||
WORKDIR /srv/jupyterhub
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"]
|
||||
41
vre/jupyterhub/docker-compose.yml
Normal file
41
vre/jupyterhub/docker-compose.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
jupyterhub:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: smartcity-jupyterhub:latest
|
||||
container_name: jupyterhub
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- smartcity-shared
|
||||
# Run as root to avoid UID issues, JupyterHub will drop privs
|
||||
user: root
|
||||
environment:
|
||||
- JUPYTERHUB_CRYPT_KEY=a1b2c3d4-e5f6-7890-abcd-ef1234567890
|
||||
volumes:
|
||||
- jupyterhub_data:/srv/jupyterhub
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)"
|
||||
- "traefik.http.routers.jupyterhub.entrypoints=websecure"
|
||||
- "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.routers.jupyterhub.service=jupyterhub-svc"
|
||||
- "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000"
|
||||
- "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true"
|
||||
- "traefik.docker.network=smartcity-shared"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
|
||||
networks:
|
||||
smartcity-shared:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
jupyterhub_data:
|
||||
driver: local
|
||||
31
vre/jupyterhub/jupyterhub_config.py
Normal file
31
vre/jupyterhub/jupyterhub_config.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# JupyterHub configuration for Smart City VRE
|
||||
|
||||
c.JupyterHub.ip = '0.0.0.0'
|
||||
c.JupyterHub.port = 8000
|
||||
c.JupyterHub.hub_ip = '0.0.0.0'
|
||||
|
||||
# Authenticator: Native (username/password signup + login)
|
||||
c.JupyterHub.authenticator_class = 'nativeauthenticator.NativeAuthenticator'
|
||||
c.Authenticator.admin_users = {'admin'}
|
||||
c.Authenticator.allow_all = True
|
||||
|
||||
# Spawner
|
||||
c.JupyterHub.spawner_class = 'simple'
|
||||
c.Spawner.cmd = ['jupyterhub-singleuser']
|
||||
c.Spawner.default_url = '/lab'
|
||||
|
||||
# Database and cookies
|
||||
c.JupyterHub.cookie_secret_file = '/srv/jupyterhub/jupyterhub_cookie_secret'
|
||||
c.JupyterHub.db_url = 'sqlite:///jupyterhub.sqlite'
|
||||
|
||||
# Base URL
|
||||
c.JupyterHub.base_url = '/'
|
||||
|
||||
# Trust forwarded headers from Traefik
|
||||
c.JupyterHub.tornado_settings = {
|
||||
'headers': {
|
||||
'Content-Security-Policy': "frame-ancestors 'self'"
|
||||
}
|
||||
}
|
||||
|
||||
c.JupyterHub.shutdown_on_logout = False
|
||||
27
vre/jupyterhub/singleuser/Dockerfile
Normal file
27
vre/jupyterhub/singleuser/Dockerfile
Normal file
@@ -0,0 +1,27 @@
|
||||
# Dockerfile for JupyterHub single-user notebooks
|
||||
# Includes JupyterLab, common data science libs, and InfluxDB client
|
||||
FROM jupyter/scipy-notebook:latest
|
||||
|
||||
USER root
|
||||
|
||||
# Install additional packages for smart city data analysis
|
||||
RUN pip install --no-cache-dir \
|
||||
influxdb-client \
|
||||
pandas \
|
||||
numpy \
|
||||
matplotlib \
|
||||
plotly \
|
||||
folium \
|
||||
requests \
|
||||
sqlalchemy \
|
||||
psycopg2-binary \
|
||||
sqlalchemy \
|
||||
ipywidgets \
|
||||
jupyterlab-git
|
||||
|
||||
# Switch back to notebook user
|
||||
USER ${NB_UID}
|
||||
|
||||
EXPOSE 8888
|
||||
|
||||
CMD ["jupyterhub-singleuser"]
|
||||
15
vre/jupyterhub/start.sh
Normal file
15
vre/jupyterhub/start.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Ensure the data directory exists
|
||||
mkdir -p /srv/jupyterhub
|
||||
|
||||
# If the DB doesn't exist, initialize it
|
||||
if [ ! -f /data/jupyterhub.sqlite ]; then
|
||||
echo "Initializing JupyterHub database..."
|
||||
fi
|
||||
|
||||
# Run JupyterHub with DB in /data volume
|
||||
export JUPYTERHUB_DATA=/srv/jupyterhub
|
||||
|
||||
exec jupyterhub -f /srv/jupyterhub/jupyterhub_config.py
|
||||
4
vre/zeppelin/.env
Normal file
4
vre/zeppelin/.env
Normal file
@@ -0,0 +1,4 @@
|
||||
ZEPPELIN_PORT=8080
|
||||
ZEPPELIN_NOTEBOOK_DIR=/notebook
|
||||
ZEPPELIN_LOG_DIR=/logs
|
||||
ZEPPELIN_MEM=-Xmx1024m
|
||||
48
vre/zeppelin/docker-compose.yml
Normal file
48
vre/zeppelin/docker-compose.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
zeppelin:
|
||||
image: apache/zeppelin:0.11.2
|
||||
container_name: zeppelin
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- smartcity-shared
|
||||
user: root
|
||||
ports:
|
||||
- "127.0.0.1:8080:8080"
|
||||
environment:
|
||||
- ZEPPELIN_ADDR=0.0.0.0
|
||||
- ZEPPELIN_PORT=8080
|
||||
- ZEPPELIN_NOTEBOOK_DIR=/notebook
|
||||
- ZEPPELIN_MEM=-Xmx2g
|
||||
- ZEPPELIN_INTP_MEM=-Xmx2g
|
||||
- ZEPPELIN_LOG_DIR=/logs
|
||||
- ZEPPELIN_SSL=false
|
||||
volumes:
|
||||
- zeppelin_notebooks:/notebook
|
||||
- zeppelin_logs:/logs
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)"
|
||||
- "traefik.http.routers.zeppelin.entrypoints=websecure"
|
||||
- "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt"
|
||||
- "traefik.http.routers.zeppelin.service=zeppelin-svc"
|
||||
- "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080"
|
||||
- "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true"
|
||||
- "traefik.docker.network=smartcity-shared"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
|
||||
networks:
|
||||
smartcity-shared:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
zeppelin_notebooks:
|
||||
driver: local
|
||||
zeppelin_logs:
|
||||
driver: local
|
||||
20
vre/zeppelin/shiro.ini
Normal file
20
vre/zeppelin/shiro.ini
Normal file
@@ -0,0 +1,20 @@
|
||||
[main]
|
||||
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
|
||||
securityManager.sessionManager = $sessionManager
|
||||
|
||||
sessionManager.sessionIdCookieEnabled = true
|
||||
sessionManager.sessionIdUrlRewritingEnabled = true
|
||||
|
||||
shiro.loginUrl = /login
|
||||
|
||||
[users]
|
||||
admin = Digitribe972, admin
|
||||
|
||||
[roles]
|
||||
admin = *
|
||||
|
||||
[urls]
|
||||
/api/version = anon
|
||||
/api/cluster/address = anon
|
||||
/login = authc
|
||||
/** = authc
|
||||
32
vre/zeppelin/zeppelin-site.xml
Normal file
32
vre/zeppelin/zeppelin-site.xml
Normal file
@@ -0,0 +1,32 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>zeppelin.server.addr</name>
|
||||
<value>0.0.0.0</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>zeppelin.server.port</name>
|
||||
<value>8080</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>zeppelin.server.ssl.port</name>
|
||||
<value>-1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>zeppelin.server.context.path</name>
|
||||
<value>/</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>zeppelin.server.strict.transport</name>
|
||||
<value>max-age=63115200; includeSubDomains</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>zeppelin.websocket.max.text.message.size</name>
|
||||
<value>10240000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>zeppelin.server.authorization.header.clear</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
Reference in New Issue
Block a user