chore: create script for selfhosted reflector (#866)

* self hosted with self gpu * add optional ollama model * garage ports * exposes ports and changes curl * custom domain * try to fix wroker * build locallly * documentation * docs format * precommit
2026-05-06 03:15:17 +00:00 · 2026-02-19 15:11:45 -05:00
parent a8ad237d85
commit cdd974b935
11 changed files with 2313 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,5 @@ www/.env.production
 docs/pnpm-lock.yaml
 .secrets
 opencode.json
 vibedocs/
--- a/Caddyfile.selfhosted.example
+++ b/Caddyfile.selfhosted.example
@@ -0,0 +1,25 @@
 # Reflector self-hosted production — HTTPS via Caddy reverse proxy
 # Copy to Caddyfile: cp Caddyfile.selfhosted.example Caddyfile
 # Run: ./scripts/setup-selfhosted.sh --ollama-gpu --garage --caddy
 #
 # DOMAIN defaults to localhost (self-signed cert).
 # Set to your real domain for automatic Let's Encrypt:
 #   export DOMAIN=reflector.example.com
 #
 # TLS_MODE defaults to "internal" (self-signed).
 # Set to "" for automatic Let's Encrypt (requires real domain + ports 80/443 open):
 #   export TLS_MODE=""
 {$DOMAIN:localhost} {
    tls {$TLS_MODE:internal}
    handle /v1/* {
        reverse_proxy server:1250
    }
    handle /health {
        reverse_proxy server:1250
    }
    handle {
        reverse_proxy web:3000
    }
 }
--- a/docker-compose.selfhosted.yml
+++ b/docker-compose.selfhosted.yml
@@ -0,0 +1,315 @@
 # Self-hosted production Docker Compose — single file for everything.
 #
 # Usage: ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
 #   or:  docker compose -f docker-compose.selfhosted.yml --profile gpu [--profile ollama-gpu] [--profile garage] [--profile caddy] up -d
 #
 # Specialized models (pick ONE — required):
 #   --profile gpu          NVIDIA GPU for transcription/diarization/translation
 #   --profile cpu          CPU-only for transcription/diarization/translation
 #
 # Local LLM (optional — for summarization/topics):
 #   --profile ollama-gpu   Local Ollama with NVIDIA GPU
 #   --profile ollama-cpu   Local Ollama on CPU only
 #
 # Other optional services:
 #   --profile garage       Local S3-compatible storage (Garage)
 #   --profile caddy        Reverse proxy with auto-SSL
 #
 # Prerequisites:
 #   1. Run ./scripts/setup-selfhosted.sh to generate env files and secrets
 #   2. Or manually create server/.env and www/.env from the .selfhosted.example templates
 services:
  # ===========================================================
  # Always-on core services (no profile required)
  # ===========================================================
  server:
    build:
      context: ./server
      dockerfile: Dockerfile
    image: monadicalsas/reflector-backend:latest
    restart: unless-stopped
    ports:
      - "127.0.0.1:1250:1250"
      - "50000-50100:50000-50100/udp"
    env_file:
      - ./server/.env
    environment:
      ENTRYPOINT: server
      # Docker-internal overrides (always correct inside compose network)
      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
      REDIS_HOST: redis
      CELERY_BROKER_URL: redis://redis:6379/1
      CELERY_RESULT_BACKEND: redis://redis:6379/1
      HATCHET_CLIENT_SERVER_URL: ""
      HATCHET_CLIENT_HOST_PORT: ""
      # Specialized models via gpu/cpu container (aliased as "transcription")
      TRANSCRIPT_BACKEND: modal
      TRANSCRIPT_URL: http://transcription:8000
      TRANSCRIPT_MODAL_API_KEY: selfhosted
      DIARIZATION_BACKEND: modal
      DIARIZATION_URL: http://transcription:8000
      TRANSLATION_BACKEND: modal
      TRANSLATE_URL: http://transcription:8000
      # WebRTC: fixed UDP port range for ICE candidates (mapped above)
      WEBRTC_PORT_RANGE: "50000-50100"
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_started
    volumes:
      - server_data:/app/data
  worker:
    build:
      context: ./server
      dockerfile: Dockerfile
    image: monadicalsas/reflector-backend:latest
    restart: unless-stopped
    env_file:
      - ./server/.env
    environment:
      ENTRYPOINT: worker
      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
      REDIS_HOST: redis
      CELERY_BROKER_URL: redis://redis:6379/1
      CELERY_RESULT_BACKEND: redis://redis:6379/1
      HATCHET_CLIENT_SERVER_URL: ""
      HATCHET_CLIENT_HOST_PORT: ""
      TRANSCRIPT_BACKEND: modal
      TRANSCRIPT_URL: http://transcription:8000
      TRANSCRIPT_MODAL_API_KEY: selfhosted
      DIARIZATION_BACKEND: modal
      DIARIZATION_URL: http://transcription:8000
      TRANSLATION_BACKEND: modal
      TRANSLATE_URL: http://transcription:8000
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_started
    volumes:
      - server_data:/app/data
  beat:
    build:
      context: ./server
      dockerfile: Dockerfile
    image: monadicalsas/reflector-backend:latest
    restart: unless-stopped
    env_file:
      - ./server/.env
    environment:
      ENTRYPOINT: beat
      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
      REDIS_HOST: redis
      CELERY_BROKER_URL: redis://redis:6379/1
      CELERY_RESULT_BACKEND: redis://redis:6379/1
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_started
  web:
    build:
      context: ./www
      dockerfile: Dockerfile
    image: monadicalsas/reflector-frontend:latest
    restart: unless-stopped
    ports:
      - "127.0.0.1:3000:3000"
    env_file:
      - ./www/.env
    environment:
      NODE_ENV: production
      SERVER_API_URL: http://server:1250
      KV_URL: redis://redis:6379
      KV_USE_TLS: "false"
      AUTHENTIK_ISSUER: ""
      AUTHENTIK_REFRESH_TOKEN_URL: ""
    depends_on:
      - redis
  redis:
    image: redis:7.2-alpine
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 30s
      timeout: 3s
      retries: 3
    volumes:
      - redis_data:/data
  postgres:
    image: postgres:17-alpine
    restart: unless-stopped
    environment:
      POSTGRES_USER: reflector
      POSTGRES_PASSWORD: reflector
      POSTGRES_DB: reflector
    volumes:
      - postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U reflector"]
      interval: 30s
      timeout: 3s
      retries: 3
  # ===========================================================
  # Specialized model containers (transcription, diarization, translation)
  # Both gpu and cpu get alias "transcription" so server config never changes.
  # ===========================================================
  gpu:
    build:
      context: ./gpu/self_hosted
      dockerfile: Dockerfile
    profiles: [gpu]
    restart: unless-stopped
    ports:
      - "127.0.0.1:8000:8000"
    environment:
      HF_TOKEN: ${HF_TOKEN:-}
    volumes:
      - gpu_cache:/root/.cache
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
      interval: 15s
      timeout: 5s
      retries: 10
      start_period: 120s
    networks:
      default:
        aliases:
          - transcription
  cpu:
    build:
      context: ./gpu/self_hosted
      dockerfile: Dockerfile.cpu
    profiles: [cpu]
    restart: unless-stopped
    ports:
      - "127.0.0.1:8000:8000"
    environment:
      HF_TOKEN: ${HF_TOKEN:-}
    volumes:
      - gpu_cache:/root/.cache
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
      interval: 15s
      timeout: 5s
      retries: 10
      start_period: 120s
    networks:
      default:
        aliases:
          - transcription
  # ===========================================================
  # Ollama — local LLM for summarization & topic detection
  # Only started with --ollama-gpu or --ollama-cpu modes.
  # ===========================================================
  ollama:
    image: ollama/ollama:latest
    profiles: [ollama-gpu]
    restart: unless-stopped
    ports:
      - "127.0.0.1:11434:11434"
    volumes:
      - ollama_data:/root/.ollama
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
      interval: 10s
      timeout: 5s
      retries: 5
  ollama-cpu:
    image: ollama/ollama:latest
    profiles: [ollama-cpu]
    restart: unless-stopped
    ports:
      - "127.0.0.1:11434:11434"
    volumes:
      - ollama_data:/root/.ollama
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
      interval: 10s
      timeout: 5s
      retries: 5
  # ===========================================================
  # Garage — local S3-compatible object storage (optional)
  # ===========================================================
  garage:
    image: dxflrs/garage:v1.1.0
    profiles: [garage]
    restart: unless-stopped
    ports:
      - "3900:3900"   # S3 API
      - "3903:3903"   # Admin API
    volumes:
      - garage_data:/var/lib/garage/data
      - garage_meta:/var/lib/garage/meta
      - ./data/garage.toml:/etc/garage.toml:ro
    healthcheck:
      test: ["CMD", "/garage", "stats"]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 5s
  # ===========================================================
  # Caddy — reverse proxy with automatic SSL (optional)
  # Maps 80:80 and 443:443 — only exposed ports in the stack.
  # ===========================================================
  caddy:
    image: caddy:2-alpine
    profiles: [caddy]
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./Caddyfile:/etc/caddy/Caddyfile:ro
      - caddy_data:/data
      - caddy_config:/config
    depends_on:
      - web
      - server
 volumes:
  postgres_data:
  redis_data:
  server_data:
  gpu_cache:
  garage_data:
  garage_meta:
  ollama_data:
  caddy_data:
  caddy_config:
 networks:
  default:
    attachable: true
--- a/docsv2/selfhosted-architecture.md
+++ b/docsv2/selfhosted-architecture.md
@@ -0,0 +1,468 @@
 # How the Self-Hosted Setup Works
 This document explains the internals of the self-hosted deployment: how the setup script orchestrates everything, how the Docker Compose profiles work, how services communicate, and how configuration flows from flags to running containers.
 > For quick-start instructions and flag reference, see [Self-Hosted Production Deployment](selfhosted-production.md).
 ## Table of Contents
 - [Overview](#overview)
 - [The Setup Script Step by Step](#the-setup-script-step-by-step)
 - [Docker Compose Profile System](#docker-compose-profile-system)
 - [Service Architecture](#service-architecture)
 - [Configuration Flow](#configuration-flow)
 - [Storage Architecture](#storage-architecture)
 - [SSL/TLS and Reverse Proxy](#ssltls-and-reverse-proxy)
 - [Build vs Pull Workflow](#build-vs-pull-workflow)
 - [Background Task Processing](#background-task-processing)
 - [Network and Port Layout](#network-and-port-layout)
 ---
 ## Overview
 The self-hosted deployment runs the entire Reflector platform on a single server using Docker Compose. A single bash script (`scripts/setup-selfhosted.sh`) handles all configuration and orchestration. The key design principles are:
 - **One command to deploy** — flags select which features to enable
 - **Idempotent** — safe to re-run without losing existing configuration
 - **Profile-based composition** — Docker Compose profiles activate optional services
 - **No external dependencies required** — with `--garage` and `--ollama-*`, everything runs locally
 ## The Setup Script Step by Step
 The script (`scripts/setup-selfhosted.sh`) runs 7 sequential steps. Here's what each one does and why.
 ### Step 0: Prerequisites
 Validates the environment before doing anything:
 - **Docker Compose V2** — checks `docker compose version` output (not the legacy `docker-compose`)
 - **Docker daemon** — verifies `docker info` succeeds
 - **NVIDIA GPU** — only checked when `--gpu` or `--ollama-gpu` is used; runs `nvidia-smi` to confirm drivers are installed
 - **Compose file** — verifies `docker-compose.selfhosted.yml` exists at the expected path
 If any check fails, the script exits with a clear error message and remediation steps.
 ### Step 1: Generate Secrets
 Creates cryptographic secrets needed by the backend and frontend:
 - **`SECRET_KEY`** — used by the FastAPI server for session signing (64 hex chars via `openssl rand -hex 32`)
 - **`NEXTAUTH_SECRET`** — used by Next.js NextAuth for JWT signing
 Secrets are only generated if they don't already exist or are still set to the placeholder value `changeme`. This is what makes the script idempotent for secrets.
 ### Step 2: Generate `server/.env`
 Creates or updates the backend environment file from `server/.env.selfhosted.example`. Sets:
 - **Infrastructure** — PostgreSQL URL, Redis host, Celery broker (all pointing to Docker-internal hostnames)
 - **Public URLs** — `BASE_URL` and `CORS_ORIGIN` computed from the domain (if `--domain`), IP (if detected on Linux), or `localhost`
 - **WebRTC** — `WEBRTC_HOST` set to the server's LAN IP so browsers can reach UDP ICE candidates
 - **Specialized models** — always points to `http://transcription:8000` (the Docker network alias shared by GPU and CPU containers)
 - **HuggingFace token** — prompts interactively for pyannote model access; writes to root `.env` so Docker Compose can inject it into GPU/CPU containers
 - **LLM** — if `--ollama-*` is used, configures `LLM_URL` pointing to the Ollama container. Otherwise, warns that the user needs to configure an external LLM
 - **Public mode** — sets `PUBLIC_MODE=true` so the app is accessible without authentication by default
 The script uses `env_set` for each variable, which either updates an existing line or appends a new one. This means re-running the script updates values in-place without duplicating keys.
 ### Step 3: Generate `www/.env`
 Creates or updates the frontend environment file from `www/.env.selfhosted.example`. Sets:
 - **`SITE_URL` / `NEXTAUTH_URL` / `API_URL`** — all set to the same public-facing URL (with `https://` if Caddy is enabled)
 - **`WEBSOCKET_URL`** — set to `auto`, which tells the frontend to derive the WebSocket URL from the page URL automatically
 - **`SERVER_API_URL`** — always `http://server:1250` (Docker-internal, used for server-side rendering)
 - **`KV_URL`** — Redis URL for Next.js caching
 - **`FEATURE_REQUIRE_LOGIN`** — `false` by default (matches `PUBLIC_MODE=true` on the backend)
 ### Step 4: Storage Setup
 Branches based on whether `--garage` was passed:
 **With `--garage` (local S3):**
 1. Generates `data/garage.toml` from a template, injecting a random RPC secret
 2. Starts only the Garage container (`docker compose --profile garage up -d garage`)
 3. Waits for the Garage admin API to respond on port 3903
 4. Assigns the node to a storage layout (1GB capacity, zone `dc1`)
 5. Creates the `reflector-media` bucket
 6. Creates an access key named `reflector` and grants it read/write on the bucket
 7. Writes all S3 credentials (`ENDPOINT_URL`, `BUCKET_NAME`, `REGION`, `ACCESS_KEY_ID`, `SECRET_ACCESS_KEY`) to `server/.env`
 The Garage endpoint is `http://garage:3900` (Docker-internal), and the region is set to `garage` (arbitrary, Garage ignores it). The boto3 client uses path-style addressing when an endpoint URL is configured, which is required for S3-compatible services like Garage.
 **Without `--garage` (external S3):**
 1. Checks `server/.env` for the four required S3 variables
 2. If any are missing, prompts interactively for each one
 3. Optionally prompts for an endpoint URL (for MinIO, Backblaze B2, etc.)
 ### Step 5: Caddyfile
 Only runs when `--caddy` or `--domain` is used. Generates a Caddy configuration file:
 **With `--domain`:** Creates a named site block (`reflector.example.com { ... }`). Caddy automatically provisions a Let's Encrypt certificate for this domain. Requires DNS pointing to the server and ports 80/443 open.
 **Without `--domain` (IP access):** Creates a catch-all `:443 { tls internal ... }` block. Caddy generates a self-signed certificate. Browsers will show a security warning.
 Both configurations route:
 - `/v1/*` and `/health` to the backend (`server:1250`)
 - Everything else to the frontend (`web:3000`)
 ### Step 6: Start Services
 1. **Always builds the GPU/CPU model image** — these are never prebuilt because they contain ML model download logic specific to the host's hardware
 2. **With `--build`:** Also builds backend (server, worker, beat) and frontend (web) images from source
 3. **Without `--build`:** Pulls prebuilt images from the Docker registry (`monadicalsas/reflector-backend:latest`, `monadicalsas/reflector-frontend:latest`)
 4. **Starts all services** — `docker compose up -d` with the active profiles
 5. **Quick sanity check** — after 3 seconds, checks for any containers that exited immediately
 ### Step 7: Health Checks
 Waits for each service in order, with generous timeouts:
 | Service | Check | Timeout | Notes |
 |---------|-------|---------|-------|
 | GPU/CPU models | `curl http://localhost:8000/docs` | 10 min (120 x 5s) | First start downloads ~1GB of models |
 | Ollama | `curl http://localhost:11434/api/tags` | 3 min (60 x 3s) | Then pulls the selected model |
 | Server API | `curl http://localhost:1250/health` | 7.5 min (90 x 5s) | First start runs database migrations |
 | Frontend | `curl http://localhost:3000` | 1.5 min (30 x 3s) | Next.js build on first start |
 | Caddy | `curl -k https://localhost` | Quick check | After other services are up |
 If the server container exits during the health check, the script dumps diagnostics (container statuses + logs) before exiting.
 After the Ollama health check passes, the script checks if the selected model is already pulled. If not, it runs `ollama pull <model>` inside the container.
 ---
 ## Docker Compose Profile System
 The compose file (`docker-compose.selfhosted.yml`) uses Docker Compose profiles to make services optional. Only services whose profiles match the active `--profile` flags are started.
 ### Always-on Services (no profile)
 These start regardless of which flags you pass:
 | Service | Role | Image |
 |---------|------|-------|
 | `server` | FastAPI backend, API endpoints, WebRTC | `monadicalsas/reflector-backend:latest` |
 | `worker` | Celery worker for background processing | Same image, `ENTRYPOINT=worker` |
 | `beat` | Celery beat scheduler for periodic tasks | Same image, `ENTRYPOINT=beat` |
 | `web` | Next.js frontend | `monadicalsas/reflector-frontend:latest` |
 | `redis` | Message broker + caching | `redis:7.2-alpine` |
 | `postgres` | Primary database | `postgres:17-alpine` |
 ### Profile-Based Services
 | Profile | Service | Role |
 |---------|---------|------|
 | `gpu` | `gpu` | NVIDIA GPU-accelerated transcription/diarization/translation |
 | `cpu` | `cpu` | CPU-only transcription/diarization/translation |
 | `ollama-gpu` | `ollama` | Local Ollama LLM with GPU |
 | `ollama-cpu` | `ollama-cpu` | Local Ollama LLM on CPU |
 | `garage` | `garage` | Local S3-compatible object storage |
 | `caddy` | `caddy` | Reverse proxy with SSL |
 ### The "transcription" Alias
 Both the `gpu` and `cpu` services define a Docker network alias of `transcription`. This means the backend always connects to `http://transcription:8000` regardless of which profile is active. The alias is defined in the compose file's `networks.default.aliases` section.
 ---
 ## Service Architecture
 ```
                    ┌─────────────┐
  Internet ────────>│    Caddy     │ :80/:443   (profile: caddy)
                    └──────┬──────┘
                           │
              ┌────────────┼────────────┐
              │            │            │
              v            v            │
         ┌─────────┐  ┌─────────┐      │
         │   web   │  │ server  │      │
         │ :3000   │  │ :1250   │      │
         └─────────┘  └────┬────┘      │
                           │            │
                      ┌────┴────┐       │
                      │ worker  │       │
                      │  beat   │       │
                      └────┬────┘       │
                           │            │
            ┌──────────────┼────────────┤
            │              │            │
            v              v            v
      ┌───────────┐  ┌─────────┐  ┌─────────┐
      │transcription│ │postgres │  │  redis  │
      │ (gpu/cpu) │  │ :5432   │  │ :6379   │
      │ :8000     │  └─────────┘  └─────────┘
      └───────────┘
            │
      ┌─────┴─────┐     ┌─────────┐
      │  ollama   │     │ garage  │
      │(optional) │     │(optional│
      │ :11434    │     │  S3)    │
      └───────────┘     └─────────┘
 ```
 ### How Services Interact
 1. **User request** hits Caddy (if enabled), which routes to `web` (pages) or `server` (API)
 2. **`web`** renders pages server-side using `SERVER_API_URL=http://server:1250` and client-side using the public `API_URL`
 3. **`server`** handles API requests, file uploads, WebRTC streaming. Dispatches background work to Celery via Redis
 4. **`worker`** picks up Celery tasks (transcription pipelines, audio processing). Calls `transcription:8000` for ML inference and uploads results to S3 storage
 5. **`beat`** schedules periodic tasks (cleanup, webhook retries) by pushing them onto the Celery queue
 6. **`transcription` (gpu/cpu)** runs Whisper/Parakeet (transcription), Pyannote (diarization), and translation models. Stateless HTTP API
 7. **`ollama`** provides an OpenAI-compatible API for summarization and topic detection. Called by the worker during post-processing
 8. **`garage`** provides S3-compatible storage for audio files and processed results. Accessed by the worker via boto3
 ---
 ## Configuration Flow
 Environment variables flow through multiple layers. Understanding this prevents confusion when debugging:
 ```
 Flags (--gpu, --garage, etc.)
  │
  ├── setup-selfhosted.sh interprets flags
  │     │
  │     ├── Writes server/.env (backend config)
  │     ├── Writes www/.env (frontend config)
  │     ├── Writes .env (HF_TOKEN for compose interpolation)
  │     └── Writes Caddyfile (proxy routes)
  │
  └── docker-compose.selfhosted.yml reads:
        ├── env_file: ./server/.env   (loaded into server, worker, beat)
        ├── env_file: ./www/.env      (loaded into web)
        ├── .env                      (compose variable interpolation, e.g. ${HF_TOKEN})
        └── environment: {...}        (hardcoded overrides, always win over env_file)
 ```
 ### Precedence Rules
 Docker Compose `environment:` keys **always override** `env_file:` values. This is by design — the compose file hardcodes infrastructure values that must be correct inside the Docker network (like `DATABASE_URL=postgresql+asyncpg://...@postgres:5432/...`) regardless of what's in `server/.env`.
 The `server/.env` file is still useful for:
 - Values not overridden in the compose file (LLM config, storage credentials, auth settings)
 - Running the server outside Docker during development
 ### The Three `.env` Files
 | File | Used By | Contains |
 |------|---------|----------|
 | `server/.env` | server, worker, beat | Backend config: database, Redis, S3, LLM, auth, public URLs |
 | `www/.env` | web | Frontend config: site URL, auth, feature flags |
 | `.env` (root) | Docker Compose interpolation | Only `HF_TOKEN` — injected into GPU/CPU container env |
 ---
 ## Storage Architecture
 All audio files and processing results are stored in S3-compatible object storage. The backend uses boto3 (via aioboto3) with automatic path-style addressing when a custom endpoint URL is configured.
 ### How Garage Works
 Garage is a lightweight, self-hosted S3-compatible storage engine. In this deployment:
 - Runs as a single-node cluster with 1GB capacity allocation
 - Listens on port 3900 (S3 API) and 3903 (admin API)
 - Data persists in Docker volumes (`garage_data`, `garage_meta`)
 - Accessed by the worker at `http://garage:3900` (Docker-internal)
 The setup script creates:
 - A bucket called `reflector-media`
 - An access key called `reflector` with read/write permissions on that bucket
 ### Path-Style vs Virtual-Hosted Addressing
 AWS S3 uses virtual-hosted addressing by default (`bucket.s3.amazonaws.com`). S3-compatible services like Garage require path-style addressing (`endpoint/bucket`). The `AwsStorage` class detects this automatically: when `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL` is set, it configures boto3 with `addressing_style: "path"`.
 ---
 ## SSL/TLS and Reverse Proxy
 ### With `--domain` (Production)
 Caddy automatically obtains and renews a Let's Encrypt certificate. Requirements:
 - DNS A record pointing to the server
 - Ports 80 (HTTP challenge) and 443 (HTTPS) open to the internet
 The generated Caddyfile uses the domain as the site address, which triggers Caddy's automatic HTTPS.
 ### Without `--domain` (Development/LAN)
 Caddy generates a self-signed certificate and listens on `:443` as a catch-all. Browsers will show a security warning that must be accepted manually.
 ### Without `--caddy` (BYO Proxy)
 No ports are exposed to the internet. The services listen on `127.0.0.1` only:
 - Frontend: `localhost:3000`
 - Backend API: `localhost:1250`
 You can point your own reverse proxy (nginx, Traefik, etc.) at these ports.
 ### WebRTC and UDP
 The server exposes UDP ports 50000-50100 for WebRTC ICE candidates. The `WEBRTC_HOST` variable tells the server which IP to advertise in ICE candidates — this must be the server's actual IP address (not a domain), because WebRTC uses UDP which doesn't go through the HTTP reverse proxy.
 ---
 ## Build vs Pull Workflow
 ### Default (no `--build` flag)
 ```
 GPU/CPU model image: Always built from source (./gpu/self_hosted/)
 Backend image:       Pulled from monadicalsas/reflector-backend:latest
 Frontend image:      Pulled from monadicalsas/reflector-frontend:latest
 ```
 The GPU/CPU image is always built because it contains hardware-specific build steps and ML model download logic.
 ### With `--build`
 ```
 GPU/CPU model image: Built from source (./gpu/self_hosted/)
 Backend image:       Built from source (./server/)
 Frontend image:      Built from source (./www/)
 ```
 Use `--build` when:
 - You've made local code changes
 - The prebuilt registry images are outdated
 - You want to verify the build works on your hardware
 ### Rebuilding Individual Services
 ```bash
 # Rebuild just the backend
 docker compose -f docker-compose.selfhosted.yml build server worker beat
 # Rebuild just the frontend
 docker compose -f docker-compose.selfhosted.yml build web
 # Rebuild the GPU model container
 docker compose -f docker-compose.selfhosted.yml build gpu
 # Force a clean rebuild (no cache)
 docker compose -f docker-compose.selfhosted.yml build --no-cache server
 ```
 ---
 ## Background Task Processing
 ### Celery Architecture
 The backend uses Celery for all background work, with Redis as the message broker:
 - **`worker`** — picks up tasks from the Redis queue and executes them
 - **`beat`** — schedules periodic tasks (cron-like) by pushing them onto the queue
 - **`Redis`** — acts as both message broker and result backend
 ### The Audio Processing Pipeline
 When a file is uploaded, the worker runs a multi-step pipeline:
 ```
 Upload → Extract Audio → Upload to S3
                           │
                    ┌──────┼──────┐
                    │      │      │
                    v      v      v
              Transcribe  Diarize  Waveform
                    │      │      │
                    └──────┼──────┘
                           │
                       Assemble
                           │
                    ┌──────┼──────┐
                    v      v      v
                Topics  Title  Summaries
                           │
                         Done
 ```
 Transcription, diarization, and waveform generation run in parallel. After assembly, topic detection, title generation, and summarization also run in parallel. Each step calls the appropriate service (transcription container for ML, Ollama/external LLM for text generation, S3 for storage).
 ### Event Loop Management
 Each Celery task runs in its own `asyncio.run()` call, which creates a fresh event loop. The `asynctask` decorator in `server/reflector/asynctask.py` handles:
 1. **Database connections** — resets the connection pool before each task (connections from a previous event loop would cause "Future attached to a different loop" errors)
 2. **Redis connections** — resets the WebSocket manager singleton so Redis pub/sub reconnects on the current loop
 3. **Cleanup** — disconnects the database and clears the context variable in the `finally` block
 ---
 ## Network and Port Layout
 All services communicate over Docker's default bridge network. Only specific ports are exposed to the host:
 | Port | Service | Binding | Purpose |
 |------|---------|---------|---------|
 | 80 | Caddy | `0.0.0.0:80` | HTTP (redirect to HTTPS / Let's Encrypt challenge) |
 | 443 | Caddy | `0.0.0.0:443` | HTTPS (main entry point) |
 | 1250 | Server | `127.0.0.1:1250` | Backend API (localhost only) |
 | 3000 | Web | `127.0.0.1:3000` | Frontend (localhost only) |
 | 3900 | Garage | `0.0.0.0:3900` | S3 API (for admin/debug access) |
 | 3903 | Garage | `0.0.0.0:3903` | Garage admin API |
 | 8000 | GPU/CPU | `127.0.0.1:8000` | ML model API (localhost only) |
 | 11434 | Ollama | `127.0.0.1:11434` | Ollama API (localhost only) |
 | 50000-50100/udp | Server | `0.0.0.0:50000-50100` | WebRTC ICE candidates |
 Services bound to `127.0.0.1` are only accessible from the host itself (not from the network). Caddy is the only service exposed to the internet on standard HTTP/HTTPS ports.
 ### Docker-Internal Hostnames
 Inside the Docker network, services reach each other by their compose service name:
 | Hostname | Resolves To |
 |----------|-------------|
 | `server` | Backend API container |
 | `web` | Frontend container |
 | `postgres` | PostgreSQL container |
 | `redis` | Redis container |
 | `transcription` | GPU or CPU container (network alias) |
 | `ollama` / `ollama-cpu` | Ollama container |
 | `garage` | Garage S3 container |
 ---
 ## Diagnostics and Error Handling
 The setup script includes an `ERR` trap that automatically dumps diagnostics when any command fails:
 1. Lists all container statuses
 2. Shows the last 30 lines of logs for any stopped/exited containers
 3. Shows the last 40 lines of the specific failing service
 This means if something goes wrong during setup, you'll see the relevant logs immediately without having to run manual debug commands.
 ### Common Debug Commands
 ```bash
 # Overall status
 docker compose -f docker-compose.selfhosted.yml ps
 # Logs for a specific service
 docker compose -f docker-compose.selfhosted.yml logs server --tail 50
 docker compose -f docker-compose.selfhosted.yml logs worker --tail 50
 # Check environment inside a container
 docker compose -f docker-compose.selfhosted.yml exec server env | grep TRANSCRIPT
 # Health check from inside the network
 docker compose -f docker-compose.selfhosted.yml exec server curl http://localhost:1250/health
 # Check S3 storage connectivity
 docker compose -f docker-compose.selfhosted.yml exec server curl http://garage:3900
 # Database access
 docker compose -f docker-compose.selfhosted.yml exec postgres psql -U reflector -c "SELECT id, status FROM transcript ORDER BY created_at DESC LIMIT 5;"
 # List files in server data directory
 docker compose -f docker-compose.selfhosted.yml exec server ls -la /app/data/
 ```
--- a/docsv2/selfhosted-production.md
+++ b/docsv2/selfhosted-production.md
@@ -0,0 +1,373 @@
 # Self-Hosted Production Deployment
 Deploy Reflector on a single server with everything running in Docker. Transcription, diarization, and translation use specialized ML models (Whisper/Parakeet, Pyannote); only summarization and topic detection require an LLM.
 > For a detailed walkthrough of how the setup script and infrastructure work under the hood, see [How the Self-Hosted Setup Works](selfhosted-architecture.md).
 ## Prerequisites
 ### Hardware
 - **With GPU**: Linux server with NVIDIA GPU (8GB+ VRAM recommended), 16GB+ RAM, 50GB+ disk
 - **CPU-only**: 8+ cores, 32GB+ RAM (transcription is slower but works)
 - Disk space for ML models (~2GB on first run) + audio storage
 ### Software
 - Docker Engine 24+ with Compose V2
 - NVIDIA drivers + `nvidia-container-toolkit` (GPU modes only)
 - `curl`, `openssl` (usually pre-installed)
 ### Accounts & Credentials (depending on options)
 **Always recommended:**
 - **HuggingFace token** — For downloading pyannote speaker diarization models. Get one at https://huggingface.co/settings/tokens and accept the model licenses:
  - https://huggingface.co/pyannote/speaker-diarization-3.1
  - https://huggingface.co/pyannote/segmentation-3.0
  - The setup script will prompt for this. If skipped, diarization falls back to a public model bundle (may be less reliable).
 **LLM for summarization & topic detection (pick one):**
 - **With `--ollama-gpu` or `--ollama-cpu`**: Nothing extra — Ollama runs locally and pulls the model automatically
 - **Without `--ollama-*`**: An OpenAI-compatible LLM API key and endpoint. Examples:
  - OpenAI: `LLM_URL=https://api.openai.com/v1`, `LLM_API_KEY=sk-...`, `LLM_MODEL=gpt-4o-mini`
  - Anthropic, Together, Groq, or any OpenAI-compatible API
  - A self-managed vLLM or Ollama instance elsewhere on the network
 **Object storage (pick one):**
 - **With `--garage`**: Nothing extra — Garage (local S3-compatible storage) is auto-configured by the script
 - **Without `--garage`**: S3-compatible storage credentials. The script will prompt for these, or you can pre-fill `server/.env`. Options include:
  - **AWS S3**: Access Key ID, Secret Access Key, bucket name, region
  - **MinIO**: Same credentials + `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://your-minio:9000`
  - **Any S3-compatible provider** (Backblaze B2, Cloudflare R2, DigitalOcean Spaces, etc.): same fields + custom endpoint URL
 **Optional add-ons (configure after initial setup):**
 - **Daily.co** (live meeting rooms): Requires a Daily.co account (https://www.daily.co/), API key, subdomain, and an AWS S3 bucket + IAM Role for recording storage. See [Enabling Daily.co Live Rooms](#enabling-dailyco-live-rooms) below.
 - **Authentik** (user authentication): Requires an Authentik instance with an OAuth2/OIDC application configured for Reflector. See [Enabling Authentication](#enabling-authentication-authentik) below.
 ## Quick Start
 ```bash
 git clone https://github.com/Monadical-SAS/reflector.git
 cd reflector
 # GPU + local Ollama LLM + local Garage storage + Caddy SSL (with domain):
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com
 # Same but without a domain (self-signed cert, access via IP):
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
 # CPU-only (same, but slower):
 ./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
 # Build from source instead of pulling prebuilt images:
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --build
 ```
 That's it. The script generates env files, secrets, starts all containers, waits for health checks, and prints the URL.
 ## Specialized Models (Required)
 Pick `--gpu` or `--cpu`. This determines how **transcription, diarization, and translation** run:
 | Flag | What it does | Requires |
 |------|-------------|----------|
 | `--gpu` | NVIDIA GPU acceleration for ML models | NVIDIA GPU + drivers + `nvidia-container-toolkit` |
 | `--cpu` | CPU-only (slower but works without GPU) | 8+ cores, 32GB+ RAM recommended |
 ## Local LLM (Optional)
 Optionally add `--ollama-gpu` or `--ollama-cpu` for a **local Ollama instance** that handles summarization and topic detection. If omitted, configure an external OpenAI-compatible LLM in `server/.env`.
 | Flag | What it does | Requires |
 |------|-------------|----------|
 | `--ollama-gpu` | Local Ollama with NVIDIA GPU acceleration | NVIDIA GPU |
 | `--ollama-cpu` | Local Ollama on CPU only | Nothing extra |
 | `--llm-model MODEL` | Choose which Ollama model to download (default: `qwen2.5:14b`) | `--ollama-gpu` or `--ollama-cpu` |
 | *(omitted)* | User configures external LLM (OpenAI, Anthropic, etc.) | LLM API key |
 ### Choosing an Ollama model
 The default model is `qwen2.5:14b` (~9GB download, good multilingual support and summary quality). Override with `--llm-model`:
 ```bash
 # Default (qwen2.5:14b)
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
 # Mistral — good balance of speed and quality (~4.1GB)
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy
 # Phi-4 — smaller and faster (~9.1GB)
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model phi4 --garage --caddy
 # Llama 3.3 70B — best quality, needs 48GB+ RAM or GPU VRAM (~43GB)
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model llama3.3:70b --garage --caddy
 # Gemma 2 9B (~5.4GB)
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model gemma2 --garage --caddy
 # DeepSeek R1 8B — reasoning model, verbose but thorough summaries (~4.9GB)
 ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model deepseek-r1:8b --garage --caddy
 ```
 Browse all available models at https://ollama.com/library.
 ### Recommended combinations
 - **`--gpu --ollama-gpu`**: Best for servers with NVIDIA GPU. Fully self-contained, no external API keys needed.
 - **`--cpu --ollama-cpu`**: No GPU available but want everything self-contained. Slower but works.
 - **`--gpu --ollama-cpu`**: GPU for transcription, CPU for LLM. Saves GPU VRAM for ML models.
 - **`--gpu`**: Have NVIDIA GPU but prefer a cloud LLM (faster/better summaries with GPT-4, Claude, etc.).
 - **`--cpu`**: No GPU, prefer cloud LLM. Slowest transcription but best summary quality.
 ## Other Optional Flags
 | Flag | What it does |
 |------|-------------|
 | `--garage` | Starts Garage (local S3-compatible storage). Auto-configures bucket, keys, and env vars. |
 | `--caddy` | Starts Caddy reverse proxy on ports 80/443 with self-signed cert. |
 | `--domain DOMAIN` | Use a real domain with Let's Encrypt auto-HTTPS (implies `--caddy`). Requires DNS A record pointing to this server and ports 80/443 open. |
 | `--build` | Build backend (server, worker, beat) and frontend (web) Docker images from source instead of pulling prebuilt images from the registry. Useful for development or when running a version with local changes. |
 Without `--garage`, you **must** provide S3-compatible credentials (the script will prompt interactively or you can pre-fill `server/.env`).
 Without `--caddy` or `--domain`, no ports are exposed. Point your own reverse proxy at `web:3000` (frontend) and `server:1250` (API).
 **Using a domain (recommended for production):** Point a DNS A record at your server's IP, then pass `--domain your.domain.com`. Caddy will automatically obtain and renew a Let's Encrypt certificate. Ports 80 and 443 must be open.
 **Without a domain:** `--caddy` alone uses a self-signed certificate. Browsers will show a security warning that must be accepted.
 ## What the Script Does
 1. **Prerequisites check** — Docker, NVIDIA GPU (if needed), compose file exists
 2. **Generate secrets** — `SECRET_KEY`, `NEXTAUTH_SECRET` via `openssl rand`
 3. **Generate `server/.env`** — From template, sets infrastructure defaults, configures LLM based on mode, enables `PUBLIC_MODE`
 4. **Generate `www/.env`** — Auto-detects server IP, sets URLs
 5. **Storage setup** — Either initializes Garage (bucket, keys, permissions) or prompts for external S3 credentials
 6. **Caddyfile** — Generates domain-specific (Let's Encrypt) or IP-specific (self-signed) configuration
 7. **Build & start** — Always builds GPU/CPU model image from source. With `--build`, also builds backend and frontend from source; otherwise pulls prebuilt images from the registry
 8. **Health checks** — Waits for each service, pulls Ollama model if needed, warns about missing LLM config
 > For a deeper dive into each step, see [How the Self-Hosted Setup Works](selfhosted-architecture.md).
 ## Configuration Reference
 ### Server Environment (`server/.env`)
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `DATABASE_URL` | PostgreSQL connection | Auto-set (Docker internal) |
 | `REDIS_HOST` | Redis hostname | Auto-set (`redis`) |
 | `SECRET_KEY` | App secret | Auto-generated |
 | `AUTH_BACKEND` | Authentication method | `none` |
 | `PUBLIC_MODE` | Allow unauthenticated access | `true` |
 | `WEBRTC_HOST` | IP advertised in WebRTC ICE candidates | Auto-detected (server IP) |
 | `TRANSCRIPT_URL` | Specialized model endpoint | `http://transcription:8000` |
 | `LLM_URL` | OpenAI-compatible LLM endpoint | Auto-set for Ollama modes |
 | `LLM_API_KEY` | LLM API key | `not-needed` for Ollama |
 | `LLM_MODEL` | LLM model name | `qwen2.5:14b` for Ollama (override with `--llm-model`) |
 | `TRANSCRIPT_STORAGE_BACKEND` | Storage backend | `aws` |
 | `TRANSCRIPT_STORAGE_AWS_*` | S3 credentials | Auto-set for Garage |
 ### Frontend Environment (`www/.env`)
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `SITE_URL` | Public-facing URL | Auto-detected |
 | `API_URL` | API URL (browser-side) | Same as SITE_URL |
 | `SERVER_API_URL` | API URL (server-side) | `http://server:1250` |
 | `NEXTAUTH_SECRET` | Auth secret | Auto-generated |
 | `FEATURE_REQUIRE_LOGIN` | Require authentication | `false` |
 ## Storage Options
 ### Garage (Recommended for Self-Hosted)
 Use `--garage` flag. The script automatically:
 - Generates `data/garage.toml` with a random RPC secret
 - Starts the Garage container
 - Creates the `reflector-media` bucket
 - Creates an access key with read/write permissions
 - Writes all S3 credentials to `server/.env`
 ### External S3 (AWS, MinIO, etc.)
 Don't use `--garage`. The script will prompt for:
 - Access Key ID
 - Secret Access Key
 - Bucket Name
 - Region
 - Endpoint URL (for non-AWS like MinIO)
 Or pre-fill in `server/.env`:
 ```env
 TRANSCRIPT_STORAGE_BACKEND=aws
 TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID=your-key
 TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY=your-secret
 TRANSCRIPT_STORAGE_AWS_BUCKET_NAME=reflector-media
 TRANSCRIPT_STORAGE_AWS_REGION=us-east-1
 # For non-AWS S3 (MinIO, etc.):
 TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://minio:9000
 ```
 ## Enabling Authentication (Authentik)
 By default, authentication is disabled (`AUTH_BACKEND=none`, `FEATURE_REQUIRE_LOGIN=false`). To enable:
 1. Deploy an Authentik instance (see [Authentik docs](https://goauthentik.io/docs/installation))
 2. Create an OAuth2/OIDC application for Reflector
 3. Update `server/.env`:
   ```env
   AUTH_BACKEND=jwt
   AUTH_JWT_AUDIENCE=your-client-id
   ```
 4. Update `www/.env`:
   ```env
   FEATURE_REQUIRE_LOGIN=true
   AUTHENTIK_ISSUER=https://authentik.example.com/application/o/reflector
   AUTHENTIK_REFRESH_TOKEN_URL=https://authentik.example.com/application/o/token/
   AUTHENTIK_CLIENT_ID=your-client-id
   AUTHENTIK_CLIENT_SECRET=your-client-secret
   ```
 5. Restart: `docker compose -f docker-compose.selfhosted.yml down && ./scripts/setup-selfhosted.sh <same-flags>`
 ## Enabling Daily.co Live Rooms
 Daily.co enables real-time meeting rooms with automatic recording and transcription.
 1. Create a [Daily.co](https://www.daily.co/) account
 2. Add to `server/.env`:
   ```env
   DEFAULT_VIDEO_PLATFORM=daily
   DAILY_API_KEY=your-daily-api-key
   DAILY_SUBDOMAIN=your-subdomain
   DAILY_WEBHOOK_SECRET=your-webhook-secret
   DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco
   DAILYCO_STORAGE_AWS_REGION=us-east-1
   DAILYCO_STORAGE_AWS_ROLE_ARN=arn:aws:iam::role/DailyCoAccess
   ```
 3. Restart the server: `docker compose -f docker-compose.selfhosted.yml restart server worker`
 ## Enabling Real Domain with Let's Encrypt
 By default, Caddy uses self-signed certificates. For a real domain:
 1. Point your domain's DNS to your server's IP
 2. Ensure ports 80 and 443 are open
 3. Edit `Caddyfile`:
   ```
   reflector.example.com {
       handle /v1/* {
           reverse_proxy server:1250
       }
       handle /health {
           reverse_proxy server:1250
       }
       handle {
           reverse_proxy web:3000
       }
   }
   ```
 4. Update `www/.env`:
   ```env
   SITE_URL=https://reflector.example.com
   NEXTAUTH_URL=https://reflector.example.com
   API_URL=https://reflector.example.com
   ```
 5. Restart Caddy: `docker compose -f docker-compose.selfhosted.yml restart caddy web`
 ## Troubleshooting
 ### Check service status
 ```bash
 docker compose -f docker-compose.selfhosted.yml ps
 ```
 ### View logs for a specific service
 ```bash
 docker compose -f docker-compose.selfhosted.yml logs server --tail 50
 docker compose -f docker-compose.selfhosted.yml logs gpu --tail 50
 docker compose -f docker-compose.selfhosted.yml logs web --tail 50
 ```
 ### GPU service taking too long
 First start downloads ~1-2GB of ML models. Check progress:
 ```bash
 docker compose -f docker-compose.selfhosted.yml logs gpu -f
 ```
 ### Server exits immediately
 Usually a database migration issue. Check:
 ```bash
 docker compose -f docker-compose.selfhosted.yml logs server --tail 50
 ```
 ### Caddy certificate issues
 For self-signed certs, your browser will warn. Click Advanced > Proceed.
 For Let's Encrypt, ensure ports 80/443 are open and DNS is pointed correctly.
 ### Summaries/topics not generating
 Check LLM configuration:
 ```bash
 grep LLM_ server/.env
 ```
 If you didn't use `--ollama-gpu` or `--ollama-cpu`, you must set `LLM_URL`, `LLM_API_KEY`, and `LLM_MODEL`.
 ### Health check from inside containers
 ```bash
 docker compose -f docker-compose.selfhosted.yml exec server curl http://localhost:1250/health
 docker compose -f docker-compose.selfhosted.yml exec gpu curl http://localhost:8000/docs
 ```
 ## Updating
 ```bash
 # Option A: Pull latest prebuilt images and restart
 docker compose -f docker-compose.selfhosted.yml down
 ./scripts/setup-selfhosted.sh <same-flags-as-before>
 # Option B: Build from source (after git pull) and restart
 git pull
 docker compose -f docker-compose.selfhosted.yml down
 ./scripts/setup-selfhosted.sh <same-flags-as-before> --build
 # Rebuild only the GPU/CPU model image (picks up model updates)
 docker compose -f docker-compose.selfhosted.yml build gpu  # or cpu
 ```
 The setup script is idempotent — it won't overwrite existing secrets or env vars that are already set.
 ## Architecture Overview
 ```
                    ┌─────────┐
  Internet ────────>│  Caddy  │ :80/:443
                    └────┬────┘
                         │
            ┌────────────┼────────────┐
            │            │            │
            v            v            │
       ┌─────────┐  ┌─────────┐      │
       │   web   │  │ server  │      │
       │ :3000   │  │ :1250   │      │
       └─────────┘  └────┬────┘      │
                         │            │
                    ┌────┴────┐       │
                    │ worker  │       │
                    │  beat   │       │
                    └────┬────┘       │
                         │            │
          ┌──────────────┼────────────┤
          │              │            │
          v              v            v
    ┌───────────┐  ┌─────────┐  ┌─────────┐
    │transcription│  │postgres │  │  redis  │
    │(gpu/cpu)  │  │ :5432   │  │ :6379   │
    │ :8000     │  └─────────┘  └─────────┘
    └───────────┘
          │
    ┌─────┴─────┐     ┌─────────┐
    │  ollama   │     │ garage  │
    │ (optional)│     │(optional│
    │ :11434    │     │ S3)     │
    └───────────┘     └─────────┘
 ```
 All services communicate over Docker's internal network. Only Caddy (if enabled) exposes ports to the internet.
--- a/node_modules/.yarn-integrity
+++ b/node_modules/.yarn-integrity
@@ -0,0 +1,10 @@
 {
  "systemParams": "darwin-x64-83",
  "modulesFolders": [],
  "flags": [],
  "linkedModules": [],
  "topLevelPatterns": [],
  "lockfileEntries": {},
  "files": [],
  "artifacts": {}
 }
--- a/scripts/setup-selfhosted.sh
+++ b/scripts/setup-selfhosted.sh
@@ -0,0 +1,945 @@
 #!/usr/bin/env bash
 #
 # Self-hosted production setup for Reflector.
 # Single script to configure and launch everything on one server.
 #
 # Usage:
 #   ./scripts/setup-selfhosted.sh <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]
 #
 # Specialized models (pick ONE — required):
 #   --gpu              NVIDIA GPU for transcription/diarization/translation
 #   --cpu              CPU-only for transcription/diarization/translation (slower)
 #
 # Local LLM (optional — for summarization & topic detection):
 #   --ollama-gpu       Local Ollama with NVIDIA GPU acceleration
 #   --ollama-cpu       Local Ollama on CPU only
 #   --llm-model MODEL  Ollama model to use (default: qwen2.5:14b)
 #   (If omitted, configure an external OpenAI-compatible LLM in server/.env)
 #
 # Optional flags:
 #   --garage           Use Garage for local S3-compatible storage
 #   --caddy            Enable Caddy reverse proxy with auto-SSL
 #   --domain DOMAIN    Use a real domain for Caddy (enables Let's Encrypt auto-HTTPS)
 #                      Requires: DNS pointing to this server + ports 80/443 open
 #                      Without --domain: Caddy uses self-signed cert for IP access
 #   --build            Build backend and frontend images from source instead of pulling
 #
 # Examples:
 #   ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
 #   ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com
 #   ./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
 #   ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy
 #   ./scripts/setup-selfhosted.sh --gpu --garage --caddy
 #   ./scripts/setup-selfhosted.sh --cpu
 #
 # Idempotent — safe to re-run at any time.
 #
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
 COMPOSE_FILE="$ROOT_DIR/docker-compose.selfhosted.yml"
 SERVER_ENV="$ROOT_DIR/server/.env"
 WWW_ENV="$ROOT_DIR/www/.env"
 OLLAMA_MODEL="qwen2.5:14b"
 OS="$(uname -s)"
 # --- Colors ---
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 CYAN='\033[0;36m'
 NC='\033[0m'
 info()  { echo -e "${CYAN}==>${NC} $*"; }
 ok()    { echo -e "${GREEN}  ✓${NC} $*"; }
 warn()  { echo -e "${YELLOW}  !${NC} $*"; }
 err()   { echo -e "${RED}  ✗${NC} $*" >&2; }
 # --- Helpers ---
 dump_diagnostics() {
    local failed_svc="${1:-}"
    echo ""
    err "========== DIAGNOSTICS =========="
    err "Container status:"
    compose_cmd ps -a --format "table {{.Name}}\t{{.Status}}" 2>/dev/null || true
    echo ""
    local stopped
    stopped=$(compose_cmd ps -a --format '{{.Name}}\t{{.Status}}' 2>/dev/null \
        | grep -iv 'up\|running' | awk -F'\t' '{print $1}' || true)
    for c in $stopped; do
        err "--- Logs for $c (exited/unhealthy) ---"
        docker logs --tail 30 "$c" 2>&1 || true
        echo ""
    done
    if [[ -n "$failed_svc" ]]; then
        err "--- Logs for $failed_svc (last 40) ---"
        compose_cmd logs "$failed_svc" --tail 40 2>&1 || true
    fi
    err "================================="
 }
 trap 'dump_diagnostics' ERR
 detect_lan_ip() {
    case "$OS" in
        Darwin)
            for iface in en0 en1 en2 en3; do
                local ip
                ip=$(ipconfig getifaddr "$iface" 2>/dev/null || true)
                if [[ -n "$ip" ]]; then
                    echo "$ip"
                    return
                fi
            done
            ;;
        Linux)
            ip route get 1.1.1.1 2>/dev/null | sed -n 's/.*src \([^ ]*\).*/\1/p'
            return
            ;;
    esac
    echo ""
 }
 wait_for_url() {
    local url="$1" label="$2" retries="${3:-30}" interval="${4:-2}"
    for i in $(seq 1 "$retries"); do
        if curl -sf "$url" > /dev/null 2>&1; then
            return 0
        fi
        echo -ne "\r  Waiting for $label... ($i/$retries)"
        sleep "$interval"
    done
    echo ""
    err "$label not responding at $url after $retries attempts"
    return 1
 }
 env_has_key() {
    local file="$1" key="$2"
    grep -q "^${key}=" "$file" 2>/dev/null
 }
 env_get() {
    local file="$1" key="$2"
    grep "^${key}=" "$file" 2>/dev/null | head -1 | cut -d= -f2-
 }
 env_set() {
    local file="$1" key="$2" value="$3"
    if env_has_key "$file" "$key"; then
        if [[ "$OS" == "Darwin" ]]; then
            sed -i '' "s|^${key}=.*|${key}=${value}|" "$file"
        else
            sed -i "s|^${key}=.*|${key}=${value}|" "$file"
        fi
    else
        echo "${key}=${value}" >> "$file"
    fi
 }
 compose_cmd() {
    local profiles=""
    for p in "${COMPOSE_PROFILES[@]}"; do
        profiles="$profiles --profile $p"
    done
    docker compose -f "$COMPOSE_FILE" $profiles "$@"
 }
 # Compose command with only garage profile (for garage-only operations before full stack start)
 compose_garage_cmd() {
    docker compose -f "$COMPOSE_FILE" --profile garage "$@"
 }
 # --- Parse arguments ---
 MODEL_MODE=""       # gpu or cpu (required, mutually exclusive)
 OLLAMA_MODE=""      # ollama-gpu or ollama-cpu (optional)
 USE_GARAGE=false
 USE_CADDY=false
 CUSTOM_DOMAIN=""    # optional domain for Let's Encrypt HTTPS
 BUILD_IMAGES=false  # build backend/frontend from source
 SKIP_NEXT=false
 ARGS=("$@")
 for i in "${!ARGS[@]}"; do
    if [[ "$SKIP_NEXT" == "true" ]]; then
        SKIP_NEXT=false
        continue
    fi
    arg="${ARGS[$i]}"
    case "$arg" in
        --gpu)
            [[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; }
            MODEL_MODE="gpu" ;;
        --cpu)
            [[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; }
            MODEL_MODE="cpu" ;;
        --ollama-gpu)
            [[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; }
            OLLAMA_MODE="ollama-gpu" ;;
        --ollama-cpu)
            [[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; }
            OLLAMA_MODE="ollama-cpu" ;;
        --llm-model)
            next_i=$((i + 1))
            if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
                err "--llm-model requires a model name (e.g. --llm-model mistral)"
                exit 1
            fi
            OLLAMA_MODEL="${ARGS[$next_i]}"
            SKIP_NEXT=true ;;
        --garage)       USE_GARAGE=true ;;
        --caddy)        USE_CADDY=true ;;
        --build)        BUILD_IMAGES=true ;;
        --domain)
            next_i=$((i + 1))
            if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
                err "--domain requires a domain name (e.g. --domain reflector.example.com)"
                exit 1
            fi
            CUSTOM_DOMAIN="${ARGS[$next_i]}"
            USE_CADDY=true  # --domain implies --caddy
            SKIP_NEXT=true ;;
        *)
            err "Unknown argument: $arg"
            err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]"
            exit 1
            ;;
    esac
 done
 if [[ -z "$MODEL_MODE" ]]; then
    err "No model mode specified. You must choose --gpu or --cpu."
    err ""
    err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]"
    err ""
    err "Specialized models (required):"
    err "  --gpu              NVIDIA GPU for transcription/diarization/translation"
    err "  --cpu              CPU-only (slower but works without GPU)"
    err ""
    err "Local LLM (optional):"
    err "  --ollama-gpu       Local Ollama with GPU (for summarization/topics)"
    err "  --ollama-cpu       Local Ollama on CPU (for summarization/topics)"
    err "  --llm-model MODEL  Ollama model to download (default: qwen2.5:14b)"
    err "  (omit --ollama-* for external OpenAI-compatible LLM)"
    err ""
    err "Other options:"
    err "  --garage           Local S3-compatible storage (Garage)"
    err "  --caddy            Caddy reverse proxy with self-signed cert"
    err "  --domain DOMAIN    Use a real domain with Let's Encrypt HTTPS (implies --caddy)"
    err "  --build            Build backend/frontend images from source instead of pulling"
    exit 1
 fi
 # Build profiles list — one profile per feature
 COMPOSE_PROFILES=("$MODEL_MODE")
 [[ -n "$OLLAMA_MODE" ]] && COMPOSE_PROFILES+=("$OLLAMA_MODE")
 [[ "$USE_GARAGE" == "true" ]] && COMPOSE_PROFILES+=("garage")
 [[ "$USE_CADDY" == "true" ]] && COMPOSE_PROFILES+=("caddy")
 # Derived flags
 NEEDS_NVIDIA=false
 [[ "$MODEL_MODE" == "gpu" ]] && NEEDS_NVIDIA=true
 [[ "$OLLAMA_MODE" == "ollama-gpu" ]] && NEEDS_NVIDIA=true
 USES_OLLAMA=false
 OLLAMA_SVC=""
 [[ "$OLLAMA_MODE" == "ollama-gpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama"
 [[ "$OLLAMA_MODE" == "ollama-cpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama-cpu"
 # Human-readable mode string for display
 MODE_DISPLAY="$MODEL_MODE"
 [[ -n "$OLLAMA_MODE" ]] && MODE_DISPLAY="$MODEL_MODE + $OLLAMA_MODE"
 # =========================================================
 # Step 0: Prerequisites
 # =========================================================
 step_prerequisites() {
    info "Step 0: Checking prerequisites"
    # Docker
    if ! docker compose version 2>/dev/null | grep -qi compose; then
        err "Docker Compose V2 not found."
        err "Install Docker with Compose V2: https://docs.docker.com/engine/install/"
        exit 1
    fi
    if ! docker info &>/dev/null; then
        err "Docker daemon not running."
        exit 1
    fi
    ok "Docker + Compose V2 ready"
    # NVIDIA GPU check
    if [[ "$NEEDS_NVIDIA" == "true" ]]; then
        if ! command -v nvidia-smi &>/dev/null || ! nvidia-smi &>/dev/null; then
            err "NVIDIA GPU required (model=$MODEL_MODE, ollama=$OLLAMA_MODE) but nvidia-smi failed."
            err "Install NVIDIA drivers and nvidia-container-toolkit."
            exit 1
        fi
        ok "NVIDIA GPU detected"
    fi
    # Compose file
    if [[ ! -f "$COMPOSE_FILE" ]]; then
        err "docker-compose.selfhosted.yml not found at $COMPOSE_FILE"
        err "Run this script from the repo root: ./scripts/setup-selfhosted.sh"
        exit 1
    fi
    ok "Prerequisites OK (models=$MODEL_MODE, ollama=$OLLAMA_MODE, garage=$USE_GARAGE, caddy=$USE_CADDY)"
 }
 # =========================================================
 # Step 1: Generate secrets
 # =========================================================
 step_secrets() {
    info "Step 1: Generating secrets"
    # These are used in later steps — generate once, reuse
    if [[ -f "$SERVER_ENV" ]] && env_has_key "$SERVER_ENV" "SECRET_KEY"; then
        SECRET_KEY=$(env_get "$SERVER_ENV" "SECRET_KEY")
        if [[ "$SECRET_KEY" != "changeme"* ]]; then
            ok "SECRET_KEY already set"
        else
            SECRET_KEY=$(openssl rand -hex 32)
        fi
    else
        SECRET_KEY=$(openssl rand -hex 32)
    fi
    if [[ -f "$WWW_ENV" ]] && env_has_key "$WWW_ENV" "NEXTAUTH_SECRET"; then
        NEXTAUTH_SECRET=$(env_get "$WWW_ENV" "NEXTAUTH_SECRET")
        if [[ "$NEXTAUTH_SECRET" != "changeme"* ]]; then
            ok "NEXTAUTH_SECRET already set"
        else
            NEXTAUTH_SECRET=$(openssl rand -hex 32)
        fi
    else
        NEXTAUTH_SECRET=$(openssl rand -hex 32)
    fi
    ok "Secrets ready"
 }
 # =========================================================
 # Step 2: Generate server/.env
 # =========================================================
 step_server_env() {
    info "Step 2: Generating server/.env"
    if [[ -f "$SERVER_ENV" ]]; then
        ok "server/.env already exists — ensuring required vars"
    else
        cp "$ROOT_DIR/server/.env.selfhosted.example" "$SERVER_ENV"
        ok "Created server/.env from template"
    fi
    # Core infrastructure
    env_set "$SERVER_ENV" "DATABASE_URL" "postgresql+asyncpg://reflector:reflector@postgres:5432/reflector"
    env_set "$SERVER_ENV" "REDIS_HOST" "redis"
    env_set "$SERVER_ENV" "CELERY_BROKER_URL" "redis://redis:6379/1"
    env_set "$SERVER_ENV" "CELERY_RESULT_BACKEND" "redis://redis:6379/1"
    env_set "$SERVER_ENV" "SECRET_KEY" "$SECRET_KEY"
    env_set "$SERVER_ENV" "AUTH_BACKEND" "none"
    env_set "$SERVER_ENV" "PUBLIC_MODE" "true"
    # Public-facing URLs
    local server_base_url
    if [[ -n "$CUSTOM_DOMAIN" ]]; then
        server_base_url="https://$CUSTOM_DOMAIN"
    elif [[ "$USE_CADDY" == "true" ]]; then
        if [[ -n "$PRIMARY_IP" ]]; then
            server_base_url="https://$PRIMARY_IP"
        else
            server_base_url="https://localhost"
        fi
    else
        if [[ -n "$PRIMARY_IP" ]]; then
            server_base_url="http://$PRIMARY_IP"
        else
            server_base_url="http://localhost:1250"
        fi
    fi
    env_set "$SERVER_ENV" "BASE_URL" "$server_base_url"
    env_set "$SERVER_ENV" "CORS_ORIGIN" "$server_base_url"
    # WebRTC: advertise host IP in ICE candidates so browsers can reach the server
    if [[ -n "$PRIMARY_IP" ]]; then
        env_set "$SERVER_ENV" "WEBRTC_HOST" "$PRIMARY_IP"
    fi
    # Specialized models (always via gpu/cpu container aliased as "transcription")
    env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
    env_set "$SERVER_ENV" "TRANSCRIPT_URL" "http://transcription:8000"
    env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "selfhosted"
    env_set "$SERVER_ENV" "DIARIZATION_ENABLED" "true"
    env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
    env_set "$SERVER_ENV" "DIARIZATION_URL" "http://transcription:8000"
    env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
    env_set "$SERVER_ENV" "TRANSLATE_URL" "http://transcription:8000"
    # HuggingFace token for gated models (pyannote diarization)
    # Written to root .env so docker compose picks it up for gpu/cpu containers
    local root_env="$ROOT_DIR/.env"
    local current_hf_token="${HF_TOKEN:-}"
    if [[ -f "$root_env" ]] && env_has_key "$root_env" "HF_TOKEN"; then
        current_hf_token=$(env_get "$root_env" "HF_TOKEN")
    fi
    if [[ -z "$current_hf_token" ]]; then
        echo ""
        warn "HF_TOKEN not set. Diarization will use a public model fallback."
        warn "For best results, get a token at https://huggingface.co/settings/tokens"
        warn "and accept pyannote licenses at https://huggingface.co/pyannote/speaker-diarization-3.1"
        read -rp "  HuggingFace token (or press Enter to skip): " current_hf_token
    fi
    if [[ -n "$current_hf_token" ]]; then
        touch "$root_env"
        env_set "$root_env" "HF_TOKEN" "$current_hf_token"
        export HF_TOKEN="$current_hf_token"
        ok "HF_TOKEN configured"
    else
        touch "$root_env"
        env_set "$root_env" "HF_TOKEN" ""
        ok "HF_TOKEN skipped (using public model fallback)"
    fi
    # LLM configuration
    if [[ "$USES_OLLAMA" == "true" ]]; then
        local llm_host="$OLLAMA_SVC"
        env_set "$SERVER_ENV" "LLM_URL" "http://${llm_host}:11434/v1"
        env_set "$SERVER_ENV" "LLM_MODEL" "$OLLAMA_MODEL"
        env_set "$SERVER_ENV" "LLM_API_KEY" "not-needed"
        ok "LLM configured for local Ollama ($llm_host, model=$OLLAMA_MODEL)"
    else
        # Check if user already configured LLM
        local current_llm_url=""
        if env_has_key "$SERVER_ENV" "LLM_URL"; then
            current_llm_url=$(env_get "$SERVER_ENV" "LLM_URL")
        fi
        if [[ -z "$current_llm_url" ]] || [[ "$current_llm_url" == "http://host.docker.internal"* ]]; then
            warn "LLM not configured. Summarization and topic detection will NOT work."
            warn "Edit server/.env and set LLM_URL, LLM_API_KEY, LLM_MODEL"
            warn "Example: LLM_URL=https://api.openai.com/v1  LLM_MODEL=gpt-4o-mini"
        else
            ok "LLM already configured: $current_llm_url"
        fi
    fi
    ok "server/.env ready"
 }
 # =========================================================
 # Step 3: Generate www/.env
 # =========================================================
 step_www_env() {
    info "Step 3: Generating www/.env"
    if [[ -f "$WWW_ENV" ]]; then
        ok "www/.env already exists — ensuring required vars"
    else
        cp "$ROOT_DIR/www/.env.selfhosted.example" "$WWW_ENV"
        ok "Created www/.env from template"
    fi
    # Public-facing URL for frontend
    local base_url
    if [[ -n "$CUSTOM_DOMAIN" ]]; then
        base_url="https://$CUSTOM_DOMAIN"
    elif [[ "$USE_CADDY" == "true" ]]; then
        if [[ -n "$PRIMARY_IP" ]]; then
            base_url="https://$PRIMARY_IP"
        else
            base_url="https://localhost"
        fi
    else
        # No Caddy — user's proxy handles SSL. Use http for now, they'll override.
        if [[ -n "$PRIMARY_IP" ]]; then
            base_url="http://$PRIMARY_IP"
        else
            base_url="http://localhost"
        fi
    fi
    env_set "$WWW_ENV" "SITE_URL" "$base_url"
    env_set "$WWW_ENV" "NEXTAUTH_URL" "$base_url"
    env_set "$WWW_ENV" "NEXTAUTH_SECRET" "$NEXTAUTH_SECRET"
    env_set "$WWW_ENV" "API_URL" "$base_url"
    env_set "$WWW_ENV" "WEBSOCKET_URL" "auto"
    env_set "$WWW_ENV" "SERVER_API_URL" "http://server:1250"
    env_set "$WWW_ENV" "KV_URL" "redis://redis:6379"
    env_set "$WWW_ENV" "FEATURE_REQUIRE_LOGIN" "false"
    ok "www/.env ready (URL=$base_url)"
 }
 # =========================================================
 # Step 4: Storage setup
 # =========================================================
 step_storage() {
    info "Step 4: Storage setup"
    if [[ "$USE_GARAGE" == "true" ]]; then
        step_garage
    else
        step_external_s3
    fi
 }
 step_garage() {
    info "Configuring Garage (local S3)"
    # Generate garage.toml from template
    local garage_toml="$ROOT_DIR/scripts/garage.toml"
    local garage_runtime="$ROOT_DIR/data/garage.toml"
    mkdir -p "$ROOT_DIR/data"
    if [[ -d "$garage_runtime" ]]; then
        rm -rf "$garage_runtime"
    fi
    if [[ ! -f "$garage_runtime" ]]; then
        local rpc_secret
        rpc_secret=$(openssl rand -hex 32)
        sed "s|__GARAGE_RPC_SECRET__|${rpc_secret}|" "$garage_toml" > "$garage_runtime"
        ok "Generated data/garage.toml"
    else
        ok "data/garage.toml already exists"
    fi
    # Start garage container only
    compose_garage_cmd up -d garage
    # Wait for admin API (port 3903 exposed to host for health checks)
    local garage_ready=false
    for i in $(seq 1 30); do
        if curl -sf http://localhost:3903/metrics > /dev/null 2>&1; then
            garage_ready=true
            break
        fi
        echo -ne "\r  Waiting for Garage admin API... ($i/30)"
        sleep 2
    done
    echo ""
    if [[ "$garage_ready" != "true" ]]; then
        err "Garage not responding. Check: docker compose logs garage"
        exit 1
    fi
    # Layout
    local node_id
    node_id=$(compose_garage_cmd exec -T garage /garage node id -q 2>/dev/null | tr -d '[:space:]')
    local layout_status
    layout_status=$(compose_garage_cmd exec -T garage /garage layout show 2>&1 || true)
    if echo "$layout_status" | grep -q "No nodes"; then
        compose_garage_cmd exec -T garage /garage layout assign "$node_id" -c 1G -z dc1
        compose_garage_cmd exec -T garage /garage layout apply --version 1
    fi
    # Bucket
    if ! compose_garage_cmd exec -T garage /garage bucket info reflector-media &>/dev/null; then
        compose_garage_cmd exec -T garage /garage bucket create reflector-media
    fi
    # Key
    local created_key=false
    if compose_garage_cmd exec -T garage /garage key info reflector &>/dev/null; then
        ok "Key 'reflector' already exists"
    else
        KEY_OUTPUT=$(compose_garage_cmd exec -T garage /garage key create reflector)
        created_key=true
    fi
    # Permissions
    compose_garage_cmd exec -T garage /garage bucket allow reflector-media --read --write --key reflector
    # Write S3 credentials to server/.env
    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_BACKEND" "aws"
    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL" "http://garage:3900"
    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_BUCKET_NAME" "reflector-media"
    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_REGION" "garage"
    if [[ "$created_key" == "true" ]]; then
        local key_id key_secret
        key_id=$(echo "$KEY_OUTPUT" | grep -i "key id" | awk '{print $NF}')
        key_secret=$(echo "$KEY_OUTPUT" | grep -i "secret key" | awk '{print $NF}')
        env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID" "$key_id"
        env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY" "$key_secret"
    fi
    ok "Garage storage ready"
 }
 step_external_s3() {
    info "Checking external S3 configuration"
    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_BACKEND" "aws"
    local s3_vars=("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID" "TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY" "TRANSCRIPT_STORAGE_AWS_BUCKET_NAME" "TRANSCRIPT_STORAGE_AWS_REGION")
    local missing=()
    for var in "${s3_vars[@]}"; do
        if ! env_has_key "$SERVER_ENV" "$var" || [[ -z "$(env_get "$SERVER_ENV" "$var")" ]]; then
            missing+=("$var")
        fi
    done
    if [[ ${#missing[@]} -gt 0 ]]; then
        warn "S3 storage is REQUIRED. The following vars are missing in server/.env:"
        for var in "${missing[@]}"; do
            warn "  $var"
        done
        echo ""
        info "Enter S3 credentials (or press Ctrl+C to abort and edit server/.env manually):"
        echo ""
        for var in "${missing[@]}"; do
            local prompt_label
            case "$var" in
                *ACCESS_KEY_ID)      prompt_label="Access Key ID" ;;
                *SECRET_ACCESS_KEY)  prompt_label="Secret Access Key" ;;
                *BUCKET_NAME)        prompt_label="Bucket Name" ;;
                *REGION)             prompt_label="Region (e.g. us-east-1)" ;;
            esac
            local value=""
            while [[ -z "$value" ]]; do
                read -rp "  $prompt_label: " value
            done
            env_set "$SERVER_ENV" "$var" "$value"
        done
        # Optional: endpoint URL for non-AWS S3
        echo ""
        read -rp "  S3 Endpoint URL (leave empty for AWS, or enter for MinIO/etc.): " endpoint_url
        if [[ -n "$endpoint_url" ]]; then
            env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL" "$endpoint_url"
        fi
    fi
    ok "S3 storage configured"
 }
 # =========================================================
 # Step 5: Caddyfile
 # =========================================================
 step_caddyfile() {
    if [[ "$USE_CADDY" != "true" ]]; then
        return
    fi
    info "Step 5: Caddyfile setup"
    local caddyfile="$ROOT_DIR/Caddyfile"
    if [[ -d "$caddyfile" ]]; then
        rm -rf "$caddyfile"
    fi
    if [[ -n "$CUSTOM_DOMAIN" ]]; then
        # Real domain: Caddy auto-provisions Let's Encrypt certificate
        cat > "$caddyfile" << CADDYEOF
 # Generated by setup-selfhosted.sh — Let's Encrypt HTTPS for $CUSTOM_DOMAIN
 $CUSTOM_DOMAIN {
    handle /v1/* {
        reverse_proxy server:1250
    }
    handle /health {
        reverse_proxy server:1250
    }
    handle {
        reverse_proxy web:3000
    }
 }
 CADDYEOF
        ok "Created Caddyfile for $CUSTOM_DOMAIN (Let's Encrypt auto-HTTPS)"
    elif [[ -n "$PRIMARY_IP" ]]; then
        # No domain, IP only: catch-all :443 with self-signed cert
        # (IP connections don't send SNI, so we can't match by address)
        cat > "$caddyfile" << CADDYEOF
 # Generated by setup-selfhosted.sh — self-signed cert for IP access
 :443 {
    tls internal
    handle /v1/* {
        reverse_proxy server:1250
    }
    handle /health {
        reverse_proxy server:1250
    }
    handle {
        reverse_proxy web:3000
    }
 }
 CADDYEOF
        ok "Created Caddyfile for $PRIMARY_IP (catch-all :443 with self-signed cert)"
    elif [[ ! -f "$caddyfile" ]]; then
        cp "$ROOT_DIR/Caddyfile.selfhosted.example" "$caddyfile"
        ok "Created Caddyfile from template"
    else
        ok "Caddyfile already exists"
    fi
 }
 # =========================================================
 # Step 6: Start services
 # =========================================================
 step_services() {
    info "Step 6: Starting Docker services"
    # Build GPU/CPU image from source (always needed — no prebuilt image)
    local build_svc="$MODEL_MODE"
    info "Building $build_svc image (first build downloads ML models, may take a while)..."
    compose_cmd build "$build_svc"
    ok "$build_svc image built"
    # Build or pull backend and frontend images
    if [[ "$BUILD_IMAGES" == "true" ]]; then
        info "Building backend image from source (server, worker, beat)..."
        compose_cmd build server worker beat
        ok "Backend image built"
        info "Building frontend image from source..."
        compose_cmd build web
        ok "Frontend image built"
    else
        info "Pulling latest backend and frontend images..."
        compose_cmd pull server web || warn "Pull failed — using cached images"
    fi
    # Start all services
    compose_cmd up -d
    ok "Containers started"
    # Quick sanity check
    sleep 3
    local exited
    exited=$(compose_cmd ps -a --format '{{.Name}} {{.Status}}' 2>/dev/null \
        | grep -i 'exit' || true)
    if [[ -n "$exited" ]]; then
        warn "Some containers exited immediately:"
        echo "$exited" | while read -r line; do warn "  $line"; done
        dump_diagnostics
    fi
 }
 # =========================================================
 # Step 7: Health checks
 # =========================================================
 step_health() {
    info "Step 7: Health checks"
    # Specialized model service (gpu or cpu)
    local model_svc="$MODEL_MODE"
    info "Waiting for $model_svc service (first start downloads ~1GB of models)..."
    local model_ok=false
    for i in $(seq 1 120); do
        if curl -sf http://localhost:8000/docs > /dev/null 2>&1; then
            model_ok=true
            break
        fi
        echo -ne "\r  Waiting for $model_svc service... ($i/120)"
        sleep 5
    done
    echo ""
    if [[ "$model_ok" == "true" ]]; then
        ok "$model_svc service healthy (transcription + diarization)"
    else
        warn "$model_svc service not ready yet — it will keep loading in the background"
        warn "Check with: docker compose -f docker-compose.selfhosted.yml logs $model_svc"
    fi
    # Ollama (if applicable)
    if [[ "$USES_OLLAMA" == "true" ]]; then
        info "Waiting for Ollama service..."
        local ollama_ok=false
        for i in $(seq 1 60); do
            if curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; then
                ollama_ok=true
                break
            fi
            echo -ne "\r  Waiting for Ollama... ($i/60)"
            sleep 3
        done
        echo ""
        if [[ "$ollama_ok" == "true" ]]; then
            ok "Ollama service healthy"
            # Pull model if not present
            if compose_cmd exec -T "$OLLAMA_SVC" ollama list 2>/dev/null | awk '{print $1}' | grep -qxF "$OLLAMA_MODEL"; then
                ok "Model $OLLAMA_MODEL already pulled"
            else
                info "Pulling model $OLLAMA_MODEL (this may take a while)..."
                compose_cmd exec -T "$OLLAMA_SVC" ollama pull "$OLLAMA_MODEL"
                ok "Model $OLLAMA_MODEL pulled"
            fi
        else
            warn "Ollama not ready yet. Check: docker compose logs $OLLAMA_SVC"
        fi
    fi
    # Server API
    info "Waiting for Server API (first run includes database migrations)..."
    local server_ok=false
    for i in $(seq 1 90); do
        local svc_status
        svc_status=$(compose_cmd ps server --format '{{.Status}}' 2>/dev/null || true)
        if [[ -z "$svc_status" ]] || echo "$svc_status" | grep -qi 'exit'; then
            echo ""
            err "Server container exited unexpectedly"
            dump_diagnostics server
            exit 1
        fi
        if curl -sf http://localhost:1250/health > /dev/null 2>&1; then
            server_ok=true
            break
        fi
        echo -ne "\r  Waiting for Server API... ($i/90)"
        sleep 5
    done
    echo ""
    if [[ "$server_ok" == "true" ]]; then
        ok "Server API healthy"
    else
        err "Server API not ready after ~7 minutes"
        dump_diagnostics server
        exit 1
    fi
    # Frontend
    info "Waiting for Frontend..."
    local web_ok=false
    for i in $(seq 1 30); do
        if curl -sf http://localhost:3000 > /dev/null 2>&1; then
            web_ok=true
            break
        fi
        echo -ne "\r  Waiting for Frontend... ($i/30)"
        sleep 3
    done
    echo ""
    if [[ "$web_ok" == "true" ]]; then
        ok "Frontend healthy"
    else
        warn "Frontend not responding. Check: docker compose logs web"
    fi
    # Caddy
    if [[ "$USE_CADDY" == "true" ]]; then
        sleep 2
        if curl -sfk "https://localhost" > /dev/null 2>&1; then
            ok "Caddy proxy healthy"
        else
            warn "Caddy proxy not responding. Check: docker compose logs caddy"
        fi
    fi
    # LLM warning for non-Ollama modes
    if [[ "$USES_OLLAMA" == "false" ]]; then
        local llm_url=""
        if env_has_key "$SERVER_ENV" "LLM_URL"; then
            llm_url=$(env_get "$SERVER_ENV" "LLM_URL")
        fi
        if [[ -z "$llm_url" ]]; then
            echo ""
            warn "LLM is not configured. Transcription will work, but:"
            warn "  - Summaries will NOT be generated"
            warn "  - Topics will NOT be detected"
            warn "  - Titles will NOT be auto-generated"
            warn "Configure in server/.env: LLM_URL, LLM_API_KEY, LLM_MODEL"
        fi
    fi
 }
 # =========================================================
 # Main
 # =========================================================
 main() {
    echo ""
    echo "=========================================="
    echo " Reflector — Self-Hosted Production Setup"
    echo "=========================================="
    echo ""
    echo "  Models:  $MODEL_MODE"
    echo "  LLM:     ${OLLAMA_MODE:-external}"
    echo "  Garage:  $USE_GARAGE"
    echo "  Caddy:   $USE_CADDY"
    [[ -n "$CUSTOM_DOMAIN" ]] && echo "  Domain:  $CUSTOM_DOMAIN"
    [[ "$BUILD_IMAGES" == "true" ]] && echo "  Build:   from source"
    echo ""
    # Detect primary IP
    PRIMARY_IP=""
    if [[ "$OS" == "Linux" ]]; then
        PRIMARY_IP=$(hostname -I 2>/dev/null | awk '{print $1}' || true)
        if [[ "$PRIMARY_IP" == "127."* ]] || [[ -z "$PRIMARY_IP" ]]; then
            PRIMARY_IP=$(ip -4 route get 1 2>/dev/null | sed -n 's/.*src \([0-9.]*\).*/\1/p' || true)
        fi
    fi
    # Touch env files so compose doesn't complain about missing env_file
    mkdir -p "$ROOT_DIR/data"
    touch "$SERVER_ENV" "$WWW_ENV"
    # Ensure garage.toml exists if garage profile is active (compose needs it for volume mount)
    if [[ "$USE_GARAGE" == "true" ]]; then
        local garage_runtime="$ROOT_DIR/data/garage.toml"
        if [[ ! -f "$garage_runtime" ]]; then
            local rpc_secret
            rpc_secret=$(openssl rand -hex 32)
            sed "s|__GARAGE_RPC_SECRET__|${rpc_secret}|" "$ROOT_DIR/scripts/garage.toml" > "$garage_runtime"
        fi
    fi
    step_prerequisites
    echo ""
    step_secrets
    echo ""
    step_server_env
    echo ""
    step_www_env
    echo ""
    step_storage
    echo ""
    step_caddyfile
    echo ""
    step_services
    echo ""
    step_health
    echo ""
    echo "=========================================="
    echo -e " ${GREEN}Reflector is running!${NC}"
    echo "=========================================="
    echo ""
    if [[ "$USE_CADDY" == "true" ]]; then
        if [[ -n "$CUSTOM_DOMAIN" ]]; then
            echo "  App:   https://$CUSTOM_DOMAIN"
            echo "  API:   https://$CUSTOM_DOMAIN/v1/"
        elif [[ -n "$PRIMARY_IP" ]]; then
            echo "  App:   https://$PRIMARY_IP  (accept self-signed cert in browser)"
            echo "  API:   https://$PRIMARY_IP/v1/"
            echo "  Local: https://localhost"
        else
            echo "  App:   https://localhost  (accept self-signed cert in browser)"
            echo "  API:   https://localhost/v1/"
        fi
    else
        echo "  No Caddy — point your reverse proxy at:"
        echo "    Frontend:  web:3000   (or localhost:3000 from host)"
        echo "    API:       server:1250 (or localhost:1250 from host)"
    fi
    echo ""
    echo "  Models:  $MODEL_MODE (transcription/diarization/translation)"
    [[ "$USE_GARAGE" == "true" ]] && echo "  Storage: Garage (local S3)"
    [[ "$USE_GARAGE" != "true" ]] && echo "  Storage: External S3"
    [[ "$USES_OLLAMA" == "true" ]] && echo "  LLM:     Ollama ($OLLAMA_MODEL) for summarization/topics"
    [[ "$USES_OLLAMA" != "true" ]] && echo "  LLM:     External (configure in server/.env)"
    echo ""
    echo "  To stop:   docker compose -f docker-compose.selfhosted.yml down"
    echo "  To re-run: ./scripts/setup-selfhosted.sh $*"
    echo ""
 }
 main "$@"
--- a/server/.env.selfhosted.example
+++ b/server/.env.selfhosted.example
@@ -0,0 +1,112 @@
 # =======================================================
 # Reflector Self-Hosted Production — Backend Configuration
 # Generated by: ./scripts/setup-selfhosted.sh
 # Reference: server/reflector/settings.py
 # =======================================================
 # =======================================================
 # Database & Infrastructure
 # Pre-filled for Docker internal networking (docker-compose.selfhosted.yml)
 # =======================================================
 DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
 REDIS_HOST=redis
 REDIS_PORT=6379
 CELERY_BROKER_URL=redis://redis:6379/1
 CELERY_RESULT_BACKEND=redis://redis:6379/1
 # Secret key — auto-generated by setup script
 # Generate manually with: openssl rand -hex 32
 SECRET_KEY=changeme-generate-a-secure-random-string
 # =======================================================
 # Authentication
 # Disabled by default. Enable Authentik for multi-user access.
 # See docsv2/selfhosted-production.md for setup instructions.
 # =======================================================
 AUTH_BACKEND=none
 # AUTH_BACKEND=jwt
 # AUTH_JWT_AUDIENCE=
 # =======================================================
 # Specialized Models (Transcription, Diarization, Translation)
 # These run in the gpu/cpu container — NOT an LLM.
 # The "modal" backend means "HTTP API client" — it talks to
 # the self-hosted container, not Modal.com cloud.
 # =======================================================
 TRANSCRIPT_BACKEND=modal
 TRANSCRIPT_URL=http://transcription:8000
 TRANSCRIPT_MODAL_API_KEY=selfhosted
 DIARIZATION_ENABLED=true
 DIARIZATION_BACKEND=modal
 DIARIZATION_URL=http://transcription:8000
 TRANSLATION_BACKEND=modal
 TRANSLATE_URL=http://transcription:8000
 # HuggingFace token — optional, for gated models (e.g. pyannote).
 # Falls back to public S3 model bundle if not set.
 # HF_TOKEN=hf_xxxxx
 # =======================================================
 # LLM for Summarization & Topic Detection
 # Only summaries and topics use an LLM. Everything else
 # (transcription, diarization, translation) uses specialized models above.
 #
 # Supports any OpenAI-compatible endpoint.
 # Auto-configured by setup script if using --ollama-gpu or --ollama-cpu.
 # For --gpu or --cpu modes, you MUST configure an external LLM.
 # =======================================================
 # --- Option A: External OpenAI-compatible API ---
 # LLM_URL=https://api.openai.com/v1
 # LLM_API_KEY=sk-your-api-key
 # LLM_MODEL=gpt-4o-mini
 # --- Option B: Local Ollama (auto-set by --ollama-gpu/--ollama-cpu) ---
 # LLM_URL=http://ollama:11434/v1
 # LLM_API_KEY=not-needed
 # LLM_MODEL=llama3.1
 LLM_CONTEXT_WINDOW=16000
 # =======================================================
 # S3 Storage (REQUIRED)
 # Where to store audio files and transcripts.
 #
 # Option A: Use --garage flag (auto-configured by setup script)
 # Option B: Any S3-compatible endpoint (AWS, MinIO, etc.)
 #           Set TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL for non-AWS endpoints.
 # =======================================================
 TRANSCRIPT_STORAGE_BACKEND=aws
 TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID=
 TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY=
 TRANSCRIPT_STORAGE_AWS_BUCKET_NAME=reflector-media
 TRANSCRIPT_STORAGE_AWS_REGION=us-east-1
 # For non-AWS S3-compatible endpoints (Garage, MinIO, etc.):
 # TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://garage:3900
 # =======================================================
 # Daily.co Live Rooms (Optional)
 # Enable real-time meeting rooms with Daily.co integration.
 # Requires a Daily.co account: https://www.daily.co/
 # =======================================================
 # DEFAULT_VIDEO_PLATFORM=daily
 # DAILY_API_KEY=your-daily-api-key
 # DAILY_SUBDOMAIN=your-subdomain
 # DAILY_WEBHOOK_SECRET=your-daily-webhook-secret
 # DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco
 # DAILYCO_STORAGE_AWS_REGION=us-east-1
 # DAILYCO_STORAGE_AWS_ROLE_ARN=arn:aws:iam::role/DailyCoAccess
 # =======================================================
 # Feature Flags
 # =======================================================
 PUBLIC_MODE=true
 # FEATURE_ROOMS=true
 # =======================================================
 # Sentry (Optional)
 # =======================================================
 # SENTRY_DSN=
--- a/server/reflector/asynctask.py
+++ b/server/reflector/asynctask.py
@@ -4,8 +4,9 @@ from uuid import uuid4
 from celery import current_task
-from reflector.db import get_database
+from reflector.db import _database_context, get_database
 from reflector.llm import llm_session_id
 from reflector.ws_manager import reset_ws_manager
 def asynctask(f):
@@ -20,6 +21,14 @@ def asynctask(f):
                return await f(*args, **kwargs)
            finally:
                await database.disconnect()
                _database_context.set(None)
        if current_task:
            # Reset cached connections before each Celery task.
            # Each asyncio.run() creates a new event loop, making connections
            # from previous tasks stale ("Future attached to a different loop").
            _database_context.set(None)
            reset_ws_manager()
        coro = run_with_db()
        if current_task:
--- a/www/.env.selfhosted.example
+++ b/www/.env.selfhosted.example
@@ -0,0 +1,49 @@
 # =======================================================
 # Reflector Self-Hosted Production — Frontend Configuration
 # Generated by: ./scripts/setup-selfhosted.sh
 # =======================================================
 # Site URL — set to your domain or server IP
 # The setup script auto-detects this on Linux.
 SITE_URL=https://localhost
 NEXTAUTH_URL=https://localhost
 NEXTAUTH_SECRET=changeme-generate-a-secure-random-string
 # API URLs
 # Public-facing (what the browser uses):
 API_URL=https://localhost
 WEBSOCKET_URL=auto
 # Internal Docker network (server-side rendering):
 SERVER_API_URL=http://server:1250
 KV_URL=redis://redis:6379
 # Authentication
 # Set to true when Authentik is configured
 FEATURE_REQUIRE_LOGIN=false
 # Nullify auth vars when not using Authentik
 AUTHENTIK_ISSUER=
 AUTHENTIK_REFRESH_TOKEN_URL=
 # =======================================================
 # Authentik OAuth/OIDC (Optional)
 # Uncomment and configure when enabling authentication.
 # See docsv2/selfhosted-production.md for setup instructions.
 # =======================================================
 # FEATURE_REQUIRE_LOGIN=true
 # AUTHENTIK_ISSUER=https://authentik.example.com/application/o/reflector
 # AUTHENTIK_REFRESH_TOKEN_URL=https://authentik.example.com/application/o/token/
 # AUTHENTIK_CLIENT_ID=your-client-id
 # AUTHENTIK_CLIENT_SECRET=your-client-secret
 # =======================================================
 # Feature Flags
 # =======================================================
 # FEATURE_ROOMS=true
 # FEATURE_BROWSE=true
 # =======================================================
 # Sentry (Optional)
 # =======================================================
 # SENTRY_DSN=
--- a/yarn.lock
+++ b/yarn.lock
@@ -0,0 +1,4 @@
 # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
 # yarn lockfile v1
		`@@ -0,0 +1,4 @@`
							`# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.`
							`# yarn lockfile v1`