diff --git a/vre/docker-compose.yml b/vre/docker-compose.yml new file mode 100644 index 00000000..fc8a58ce --- /dev/null +++ b/vre/docker-compose.yml @@ -0,0 +1,80 @@ +# Combined VRE (Virtual Research Environment) Stack +# JupyterHub + Apache Zeppelin behind Traefik +# Usage: docker compose -f vre/docker-compose.yml up -d + +version: "3.9" + +services: + jupyterhub: + build: + context: ./jupyterhub + dockerfile: Dockerfile + image: smartcity-jupyterhub:latest + container_name: jupyterhub + restart: unless-stopped + networks: + - smartcity-shared + volumes: + - jupyterhub_data:/srv/jupyterhub + labels: + - "traefik.enable=true" + - "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)" + - "traefik.http.routers.jupyterhub.entrypoints=websecure" + - "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt" + - "traefik.http.routers.jupyterhub.service=jupyterhub-svc" + - "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000" + - "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true" + - "traefik.docker.network=smartcity-shared" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + + zeppelin: + image: apache/zeppelin:0.11.2 + container_name: zeppelin + restart: unless-stopped + networks: + - smartcity-shared + volumes: + - ./zeppelin/zeppelin-site.xml:/opt/zeppelin/conf/zeppelin-site.xml + - ./zeppelin/shiro.ini:/opt/zeppelin/conf/shiro.ini + - zeppelin_notebooks:/notebook + - zeppelin_logs:/logs + environment: + - ZEPPELIN_ADDR=0.0.0.0 + - ZEPPELIN_PORT=8080 + - ZEPPELIN_NOTEBOOK_DIR=/notebook + - ZEPPELIN_MEM=-Xmx2g + - ZEPPELIN_INTP_MEM=-Xmx2g + - ZEPPELIN_LOG_DIR=/logs + - ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE=10240000 + labels: + - "traefik.enable=true" + - "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)" + - "traefik.http.routers.zeppelin.entrypoints=websecure" + - "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt" + - "traefik.http.routers.zeppelin.service=zeppelin-svc" + - "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080" + - "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true" + - "traefik.docker.network=smartcity-shared" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s + +networks: + smartcity-shared: + external: true + +volumes: + jupyterhub_data: + driver: local + zeppelin_notebooks: + driver: local + zeppelin_logs: + driver: local diff --git a/vre/jupyterhub/Dockerfile b/vre/jupyterhub/Dockerfile new file mode 100644 index 00000000..e54f11a0 --- /dev/null +++ b/vre/jupyterhub/Dockerfile @@ -0,0 +1,26 @@ +# Dockerfile for JupyterHub with authenticator +FROM jupyterhub/jupyterhub:5.3.0 + +USER root + +RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir \ + git+https://github.com/jupyterhub/nativeauthenticator.git@main \ + oauthenticator \ + jupyterhub-idle-culler \ + jupyterlab \ + notebook + +# Create the directory structure JupyterHub expects for DB +# JupyterHub joins data_files_path + dirname(db_path), so we create the composed path +RUN mkdir -p /srv/jupyterhub/srv/jupyterhub && \ + chown -R 1000:1000 /srv/jupyterhub + +COPY jupyterhub_config.py /srv/jupyterhub/jupyterhub_config.py + +WORKDIR /srv/jupyterhub + +EXPOSE 8000 + +CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"] diff --git a/vre/jupyterhub/docker-compose.yml b/vre/jupyterhub/docker-compose.yml new file mode 100644 index 00000000..01ef7a3e --- /dev/null +++ b/vre/jupyterhub/docker-compose.yml @@ -0,0 +1,41 @@ +version: "3.9" + +services: + jupyterhub: + build: + context: . + dockerfile: Dockerfile + image: smartcity-jupyterhub:latest + container_name: jupyterhub + restart: unless-stopped + networks: + - smartcity-shared + # Run as root to avoid UID issues, JupyterHub will drop privs + user: root + environment: + - JUPYTERHUB_CRYPT_KEY=a1b2c3d4-e5f6-7890-abcd-ef1234567890 + volumes: + - jupyterhub_data:/srv/jupyterhub + labels: + - "traefik.enable=true" + - "traefik.http.routers.jupyterhub.rule=Host(`jupyter.digitribe.fr`)" + - "traefik.http.routers.jupyterhub.entrypoints=websecure" + - "traefik.http.routers.jupyterhub.tls.certresolver=letsencrypt" + - "traefik.http.routers.jupyterhub.service=jupyterhub-svc" + - "traefik.http.services.jupyterhub-svc.loadbalancer.server.port=8000" + - "traefik.http.services.jupyterhub-svc.loadbalancer.passhostheader=true" + - "traefik.docker.network=smartcity-shared" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/hub/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + +networks: + smartcity-shared: + external: true + +volumes: + jupyterhub_data: + driver: local diff --git a/vre/jupyterhub/jupyterhub_config.py b/vre/jupyterhub/jupyterhub_config.py new file mode 100644 index 00000000..424b5745 --- /dev/null +++ b/vre/jupyterhub/jupyterhub_config.py @@ -0,0 +1,31 @@ +# JupyterHub configuration for Smart City VRE + +c.JupyterHub.ip = '0.0.0.0' +c.JupyterHub.port = 8000 +c.JupyterHub.hub_ip = '0.0.0.0' + +# Authenticator: Native (username/password signup + login) +c.JupyterHub.authenticator_class = 'nativeauthenticator.NativeAuthenticator' +c.Authenticator.admin_users = {'admin'} +c.Authenticator.allow_all = True + +# Spawner +c.JupyterHub.spawner_class = 'simple' +c.Spawner.cmd = ['jupyterhub-singleuser'] +c.Spawner.default_url = '/lab' + +# Database and cookies +c.JupyterHub.cookie_secret_file = '/srv/jupyterhub/jupyterhub_cookie_secret' +c.JupyterHub.db_url = 'sqlite:///jupyterhub.sqlite' + +# Base URL +c.JupyterHub.base_url = '/' + +# Trust forwarded headers from Traefik +c.JupyterHub.tornado_settings = { + 'headers': { + 'Content-Security-Policy': "frame-ancestors 'self'" + } +} + +c.JupyterHub.shutdown_on_logout = False diff --git a/vre/jupyterhub/singleuser/Dockerfile b/vre/jupyterhub/singleuser/Dockerfile new file mode 100644 index 00000000..82fe0e15 --- /dev/null +++ b/vre/jupyterhub/singleuser/Dockerfile @@ -0,0 +1,27 @@ +# Dockerfile for JupyterHub single-user notebooks +# Includes JupyterLab, common data science libs, and InfluxDB client +FROM jupyter/scipy-notebook:latest + +USER root + +# Install additional packages for smart city data analysis +RUN pip install --no-cache-dir \ + influxdb-client \ + pandas \ + numpy \ + matplotlib \ + plotly \ + folium \ + requests \ + sqlalchemy \ + psycopg2-binary \ + sqlalchemy \ + ipywidgets \ + jupyterlab-git + +# Switch back to notebook user +USER ${NB_UID} + +EXPOSE 8888 + +CMD ["jupyterhub-singleuser"] diff --git a/vre/jupyterhub/start.sh b/vre/jupyterhub/start.sh new file mode 100644 index 00000000..606c081e --- /dev/null +++ b/vre/jupyterhub/start.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +# Ensure the data directory exists +mkdir -p /srv/jupyterhub + +# If the DB doesn't exist, initialize it +if [ ! -f /data/jupyterhub.sqlite ]; then + echo "Initializing JupyterHub database..." +fi + +# Run JupyterHub with DB in /data volume +export JUPYTERHUB_DATA=/srv/jupyterhub + +exec jupyterhub -f /srv/jupyterhub/jupyterhub_config.py diff --git a/vre/zeppelin/.env b/vre/zeppelin/.env new file mode 100644 index 00000000..e7864aa8 --- /dev/null +++ b/vre/zeppelin/.env @@ -0,0 +1,4 @@ +ZEPPELIN_PORT=8080 +ZEPPELIN_NOTEBOOK_DIR=/notebook +ZEPPELIN_LOG_DIR=/logs +ZEPPELIN_MEM=-Xmx1024m diff --git a/vre/zeppelin/docker-compose.yml b/vre/zeppelin/docker-compose.yml new file mode 100644 index 00000000..f96f364e --- /dev/null +++ b/vre/zeppelin/docker-compose.yml @@ -0,0 +1,48 @@ +version: "3.9" + +services: + zeppelin: + image: apache/zeppelin:0.11.2 + container_name: zeppelin + restart: unless-stopped + networks: + - smartcity-shared + user: root + ports: + - "127.0.0.1:8080:8080" + environment: + - ZEPPELIN_ADDR=0.0.0.0 + - ZEPPELIN_PORT=8080 + - ZEPPELIN_NOTEBOOK_DIR=/notebook + - ZEPPELIN_MEM=-Xmx2g + - ZEPPELIN_INTP_MEM=-Xmx2g + - ZEPPELIN_LOG_DIR=/logs + - ZEPPELIN_SSL=false + volumes: + - zeppelin_notebooks:/notebook + - zeppelin_logs:/logs + labels: + - "traefik.enable=true" + - "traefik.http.routers.zeppelin.rule=Host(`zeppelin.digitribe.fr`)" + - "traefik.http.routers.zeppelin.entrypoints=websecure" + - "traefik.http.routers.zeppelin.tls.certresolver=letsencrypt" + - "traefik.http.routers.zeppelin.service=zeppelin-svc" + - "traefik.http.services.zeppelin-svc.loadbalancer.server.port=8080" + - "traefik.http.services.zeppelin-svc.loadbalancer.passhostheader=true" + - "traefik.docker.network=smartcity-shared" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s + +networks: + smartcity-shared: + external: true + +volumes: + zeppelin_notebooks: + driver: local + zeppelin_logs: + driver: local diff --git a/vre/zeppelin/shiro.ini b/vre/zeppelin/shiro.ini new file mode 100644 index 00000000..de500d0e --- /dev/null +++ b/vre/zeppelin/shiro.ini @@ -0,0 +1,20 @@ +[main] +sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager +securityManager.sessionManager = $sessionManager + +sessionManager.sessionIdCookieEnabled = true +sessionManager.sessionIdUrlRewritingEnabled = true + +shiro.loginUrl = /login + +[users] +admin = Digitribe972, admin + +[roles] +admin = * + +[urls] +/api/version = anon +/api/cluster/address = anon +/login = authc +/** = authc diff --git a/vre/zeppelin/zeppelin-site.xml b/vre/zeppelin/zeppelin-site.xml new file mode 100644 index 00000000..b6fb7e69 --- /dev/null +++ b/vre/zeppelin/zeppelin-site.xml @@ -0,0 +1,32 @@ + + + + + zeppelin.server.addr + 0.0.0.0 + + + zeppelin.server.port + 8080 + + + zeppelin.server.ssl.port + -1 + + + zeppelin.server.context.path + / + + + zeppelin.server.strict.transport + max-age=63115200; includeSubDomains + + + zeppelin.websocket.max.text.message.size + 10240000 + + + zeppelin.server.authorization.header.clear + true + +