feat: reduce Hatchet payload size by removing words from topic chunk workflows

Remove ~6.5MB of redundant Word data from Hatchet task boundaries: - Remove words from TopicChunkInput/TopicChunkResult (child workflow I/O) - detect_topics maps words from local chunks by chunk_index instead - TopicsResult carries empty transcript words (persisted to DB already) - extract_subjects refetches topics from DB instead of task output - Clear topics at detect_topics start for retry idempotency
2026-05-06 19:25:18 +00:00 · 2026-02-12 09:50:26 -05:00
47 changed files with 496 additions and 4106 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -23,5 +23,3 @@ www/.env.production
 docs/pnpm-lock.yaml
 .secrets
 opencode.json
-
-vibedocs/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,28 +1,5 @@
 # Changelog

-## [0.34.0](https://github.com/Monadical-SAS/reflector/compare/v0.33.0...v0.34.0) (2026-02-20)
-
-
-### Features
-
-* add Caddy reverse proxy with auto HTTPS for LAN access and auto-derive WebSocket URL ([#863](https://github.com/Monadical-SAS/reflector/issues/863)) ([7f2a401](https://github.com/Monadical-SAS/reflector/commit/7f2a4013cbb3d3ee3e76885f28d73331dcaf325c))
-* add change_seq to transcripts for ingestion support ([#868](https://github.com/Monadical-SAS/reflector/issues/868)) ([d4cc6be](https://github.com/Monadical-SAS/reflector/commit/d4cc6be1fed56ea7fba06acb8d50c9de43b26b07))
-* local llm support + standalone-script doc/draft ([#856](https://github.com/Monadical-SAS/reflector/issues/856)) ([b468427](https://github.com/Monadical-SAS/reflector/commit/b468427f1bb12634f5840990e9d64b2c145d7c1a))
-* remove network_mode host for standalone WebRTC ([#864](https://github.com/Monadical-SAS/reflector/issues/864)) ([9dbf155](https://github.com/Monadical-SAS/reflector/commit/9dbf155be4de7c059035a75f90c7bf0845344b74))
-* standalone frontend uses production build instead of dev server ([#862](https://github.com/Monadical-SAS/reflector/issues/862)) ([5bca925](https://github.com/Monadical-SAS/reflector/commit/5bca92510a5c33f8baeeaac2c346fb1978366ac8))
-
-
-### Bug Fixes
-
-* auto-rebuild standalone images and blank Hatchet vars ([3d13e5d](https://github.com/Monadical-SAS/reflector/commit/3d13e5d42fc53ce3c005841265ed1e8735a61518))
-* check compose version output, not just exit code ([e57c618](https://github.com/Monadical-SAS/reflector/commit/e57c6186f92d66e4525786e56b018c08cf792d2f))
-* check for Docker BuildKit (buildx) before building images ([14a8b58](https://github.com/Monadical-SAS/reflector/commit/14a8b5808e5aed860e55aaed35a0fdf8b2f4afa3))
-* check for Docker Compose plugin before running standalone setup ([36a8dae](https://github.com/Monadical-SAS/reflector/commit/36a8daee61c2b7a0937fd0914d51fb4ea8212ae7))
-* live flow real-time updates during processing ([#861](https://github.com/Monadical-SAS/reflector/issues/861)) ([972a52d](https://github.com/Monadical-SAS/reflector/commit/972a52d22f989f9e2c6f52362b3f1a4e17773663))
-* remove max_tokens cap to support thinking models (Kimi-K2.5) ([#869](https://github.com/Monadical-SAS/reflector/issues/869)) ([527a069](https://github.com/Monadical-SAS/reflector/commit/527a069ba9eff6717ccd4bb1e839674edebffceb))
-* standalone on ubuntu ([#865](https://github.com/Monadical-SAS/reflector/issues/865)) ([a8ad237](https://github.com/Monadical-SAS/reflector/commit/a8ad237d8571d5ef5c78fb4427c538592d6a7b43))
-* standalone server networking and setup diagnostics ([695f3c4](https://github.com/Monadical-SAS/reflector/commit/695f3c49285254869f6a6cbd5f860d1169fa4daa))
-
 ## [0.33.0](https://github.com/Monadical-SAS/reflector/compare/v0.32.2...v0.33.0) (2026-02-05)


--- a/Caddyfile.selfhosted.example
+++ b/Caddyfile.selfhosted.example
@@ -1,25 +0,0 @@
-# Reflector self-hosted production — HTTPS via Caddy reverse proxy
-# Copy to Caddyfile: cp Caddyfile.selfhosted.example Caddyfile
-# Run: ./scripts/setup-selfhosted.sh --ollama-gpu --garage --caddy
-#
-# DOMAIN defaults to localhost (self-signed cert).
-# Set to your real domain for automatic Let's Encrypt:
-#   export DOMAIN=reflector.example.com
-#
-# TLS_MODE defaults to "internal" (self-signed).
-# Set to "" for automatic Let's Encrypt (requires real domain + ports 80/443 open):
-#   export TLS_MODE=""
-
-{$DOMAIN:localhost} {
-    tls {$TLS_MODE:internal}
-
-    handle /v1/* {
-        reverse_proxy server:1250
-    }
-    handle /health {
-        reverse_proxy server:1250
-    }
-    handle {
-        reverse_proxy web:3000
-    }
-}
--- a/Caddyfile.standalone.example
+++ b/Caddyfile.standalone.example
@@ -1,42 +0,0 @@
-# Reflector standalone — HTTPS via Caddy (droplet / IP access)
-# Copy to Caddyfile: cp Caddyfile.standalone.example Caddyfile
-# Run: docker compose -f docker-compose.standalone.yml --profile ollama-cpu up -d
-#
-# :443 = catch-all inside container; Docker maps host port 3043 → container 443
-# on_demand = generate self-signed cert for IP/SNI on first request (required for bare IP access)
-# Browser will warn. Click Advanced → Proceed.
-# Access at https://localhost:3043 (or https://YOUR_IP:3043 on droplet)
-# Update www/.env.local with: API_URL=https://YOUR_IP:3043, WEBSOCKET_URL=wss://YOUR_IP:3043, SITE_URL=https://YOUR_IP:3043, NEXTAUTH_URL=https://YOUR_IP:3043
-
-:443 {
-    tls internal {
-        on_demand
-    }
-    handle /v1/* {
-        reverse_proxy server:1250
-    }
-    handle /health {
-        reverse_proxy server:1250
-    }
-    handle {
-        reverse_proxy web:3000
-    }
-}
-
-# Option B: localhost (comment Option A, uncomment this)
-# app.localhost {
-#     tls internal
-#     reverse_proxy web:3000
-# }
-# api.localhost {
-#     tls internal
-#     reverse_proxy server:1250
-# }
-
-# Option C: Real domain (uncomment and replace example.com)
-# app.example.com {
-#     reverse_proxy web:3000
-# }
-# api.example.com {
-#     reverse_proxy server:1250
-# }
--- a/docker-compose.selfhosted.yml
+++ b/docker-compose.selfhosted.yml
@@ -1,315 +0,0 @@
-# Self-hosted production Docker Compose — single file for everything.
-#
-# Usage: ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
-#   or:  docker compose -f docker-compose.selfhosted.yml --profile gpu [--profile ollama-gpu] [--profile garage] [--profile caddy] up -d
-#
-# Specialized models (pick ONE — required):
-#   --profile gpu          NVIDIA GPU for transcription/diarization/translation
-#   --profile cpu          CPU-only for transcription/diarization/translation
-#
-# Local LLM (optional — for summarization/topics):
-#   --profile ollama-gpu   Local Ollama with NVIDIA GPU
-#   --profile ollama-cpu   Local Ollama on CPU only
-#
-# Other optional services:
-#   --profile garage       Local S3-compatible storage (Garage)
-#   --profile caddy        Reverse proxy with auto-SSL
-#
-# Prerequisites:
-#   1. Run ./scripts/setup-selfhosted.sh to generate env files and secrets
-#   2. Or manually create server/.env and www/.env from the .selfhosted.example templates
-
-services:
-  # ===========================================================
-  # Always-on core services (no profile required)
-  # ===========================================================
-
-  server:
-    build:
-      context: ./server
-      dockerfile: Dockerfile
-    image: monadicalsas/reflector-backend:latest
-    restart: unless-stopped
-    ports:
-      - "127.0.0.1:1250:1250"
-      - "50000-50100:50000-50100/udp"
-    env_file:
-      - ./server/.env
-    environment:
-      ENTRYPOINT: server
-      # Docker-internal overrides (always correct inside compose network)
-      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
-      REDIS_HOST: redis
-      CELERY_BROKER_URL: redis://redis:6379/1
-      CELERY_RESULT_BACKEND: redis://redis:6379/1
-      HATCHET_CLIENT_SERVER_URL: ""
-      HATCHET_CLIENT_HOST_PORT: ""
-      # Specialized models via gpu/cpu container (aliased as "transcription")
-      TRANSCRIPT_BACKEND: modal
-      TRANSCRIPT_URL: http://transcription:8000
-      TRANSCRIPT_MODAL_API_KEY: selfhosted
-      DIARIZATION_BACKEND: modal
-      DIARIZATION_URL: http://transcription:8000
-      TRANSLATION_BACKEND: modal
-      TRANSLATE_URL: http://transcription:8000
-      # WebRTC: fixed UDP port range for ICE candidates (mapped above)
-      WEBRTC_PORT_RANGE: "50000-50100"
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_started
-    volumes:
-      - server_data:/app/data
-
-  worker:
-    build:
-      context: ./server
-      dockerfile: Dockerfile
-    image: monadicalsas/reflector-backend:latest
-    restart: unless-stopped
-    env_file:
-      - ./server/.env
-    environment:
-      ENTRYPOINT: worker
-      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
-      REDIS_HOST: redis
-      CELERY_BROKER_URL: redis://redis:6379/1
-      CELERY_RESULT_BACKEND: redis://redis:6379/1
-      HATCHET_CLIENT_SERVER_URL: ""
-      HATCHET_CLIENT_HOST_PORT: ""
-      TRANSCRIPT_BACKEND: modal
-      TRANSCRIPT_URL: http://transcription:8000
-      TRANSCRIPT_MODAL_API_KEY: selfhosted
-      DIARIZATION_BACKEND: modal
-      DIARIZATION_URL: http://transcription:8000
-      TRANSLATION_BACKEND: modal
-      TRANSLATE_URL: http://transcription:8000
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_started
-    volumes:
-      - server_data:/app/data
-
-  beat:
-    build:
-      context: ./server
-      dockerfile: Dockerfile
-    image: monadicalsas/reflector-backend:latest
-    restart: unless-stopped
-    env_file:
-      - ./server/.env
-    environment:
-      ENTRYPOINT: beat
-      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
-      REDIS_HOST: redis
-      CELERY_BROKER_URL: redis://redis:6379/1
-      CELERY_RESULT_BACKEND: redis://redis:6379/1
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_started
-
-  web:
-    build:
-      context: ./www
-      dockerfile: Dockerfile
-    image: monadicalsas/reflector-frontend:latest
-    restart: unless-stopped
-    ports:
-      - "127.0.0.1:3000:3000"
-    env_file:
-      - ./www/.env
-    environment:
-      NODE_ENV: production
-      SERVER_API_URL: http://server:1250
-      KV_URL: redis://redis:6379
-      KV_USE_TLS: "false"
-      AUTHENTIK_ISSUER: ""
-      AUTHENTIK_REFRESH_TOKEN_URL: ""
-    depends_on:
-      - redis
-
-  redis:
-    image: redis:7.2-alpine
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      interval: 30s
-      timeout: 3s
-      retries: 3
-    volumes:
-      - redis_data:/data
-
-  postgres:
-    image: postgres:17-alpine
-    restart: unless-stopped
-    environment:
-      POSTGRES_USER: reflector
-      POSTGRES_PASSWORD: reflector
-      POSTGRES_DB: reflector
-    volumes:
-      - postgres_data:/var/lib/postgresql/data
-    healthcheck:
-      test: ["CMD-SHELL", "pg_isready -U reflector"]
-      interval: 30s
-      timeout: 3s
-      retries: 3
-
-  # ===========================================================
-  # Specialized model containers (transcription, diarization, translation)
-  # Both gpu and cpu get alias "transcription" so server config never changes.
-  # ===========================================================
-
-  gpu:
-    build:
-      context: ./gpu/self_hosted
-      dockerfile: Dockerfile
-    profiles: [gpu]
-    restart: unless-stopped
-    ports:
-      - "127.0.0.1:8000:8000"
-    environment:
-      HF_TOKEN: ${HF_TOKEN:-}
-    volumes:
-      - gpu_cache:/root/.cache
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
-      interval: 15s
-      timeout: 5s
-      retries: 10
-      start_period: 120s
-    networks:
-      default:
-        aliases:
-          - transcription
-
-  cpu:
-    build:
-      context: ./gpu/self_hosted
-      dockerfile: Dockerfile.cpu
-    profiles: [cpu]
-    restart: unless-stopped
-    ports:
-      - "127.0.0.1:8000:8000"
-    environment:
-      HF_TOKEN: ${HF_TOKEN:-}
-    volumes:
-      - gpu_cache:/root/.cache
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
-      interval: 15s
-      timeout: 5s
-      retries: 10
-      start_period: 120s
-    networks:
-      default:
-        aliases:
-          - transcription
-
-  # ===========================================================
-  # Ollama — local LLM for summarization & topic detection
-  # Only started with --ollama-gpu or --ollama-cpu modes.
-  # ===========================================================
-
-  ollama:
-    image: ollama/ollama:latest
-    profiles: [ollama-gpu]
-    restart: unless-stopped
-    ports:
-      - "127.0.0.1:11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-  ollama-cpu:
-    image: ollama/ollama:latest
-    profiles: [ollama-cpu]
-    restart: unless-stopped
-    ports:
-      - "127.0.0.1:11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-  # ===========================================================
-  # Garage — local S3-compatible object storage (optional)
-  # ===========================================================
-
-  garage:
-    image: dxflrs/garage:v1.1.0
-    profiles: [garage]
-    restart: unless-stopped
-    ports:
-      - "3900:3900"   # S3 API
-      - "3903:3903"   # Admin API
-    volumes:
-      - garage_data:/var/lib/garage/data
-      - garage_meta:/var/lib/garage/meta
-      - ./data/garage.toml:/etc/garage.toml:ro
-    healthcheck:
-      test: ["CMD", "/garage", "stats"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-      start_period: 5s
-
-  # ===========================================================
-  # Caddy — reverse proxy with automatic SSL (optional)
-  # Maps 80:80 and 443:443 — only exposed ports in the stack.
-  # ===========================================================
-
-  caddy:
-    image: caddy:2-alpine
-    profiles: [caddy]
-    restart: unless-stopped
-    ports:
-      - "80:80"
-      - "443:443"
-    volumes:
-      - ./Caddyfile:/etc/caddy/Caddyfile:ro
-      - caddy_data:/data
-      - caddy_config:/config
-    depends_on:
-      - web
-      - server
-
-volumes:
-  postgres_data:
-  redis_data:
-  server_data:
-  gpu_cache:
-  garage_data:
-  garage_meta:
-  ollama_data:
-  caddy_data:
-  caddy_config:
-
-networks:
-  default:
-    attachable: true
--- a/docker-compose.standalone.yml
+++ b/docker-compose.standalone.yml
@@ -1,148 +1,11 @@
-# Self-contained standalone compose for fully local deployment (no external dependencies).
-# Usage: docker compose -f docker-compose.standalone.yml up -d
+# Standalone services for fully local deployment (no external dependencies).
+# Usage: docker compose -f docker-compose.yml -f docker-compose.standalone.yml up -d
 #
 # On Linux with NVIDIA GPU, also pass: --profile ollama-gpu
 # On Linux without GPU:                --profile ollama-cpu
 # On Mac: Ollama runs natively (Metal GPU) — no profile needed, services here unused.

 services:
-  caddy:
-    image: caddy:2-alpine
-    restart: unless-stopped
-    ports:
-      - "3043:443"
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-    volumes:
-      - ./Caddyfile:/etc/caddy/Caddyfile:ro
-      - caddy_data:/data
-      - caddy_config:/config
-    depends_on:
-      - web
-      - server
-
-  server:
-    build:
-      context: server
-    ports:
-      - "1250:1250"
-      - "50000-50100:50000-50100/udp"
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-    volumes:
-      - ./server/:/app/
-      - /app/.venv
-    env_file:
-      - ./server/.env
-    environment:
-      ENTRYPOINT: server
-      # Docker DNS names instead of localhost
-      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
-      REDIS_HOST: redis
-      CELERY_BROKER_URL: redis://redis:6379/1
-      CELERY_RESULT_BACKEND: redis://redis:6379/1
-      # Standalone doesn't run Hatchet
-      HATCHET_CLIENT_SERVER_URL: ""
-      HATCHET_CLIENT_HOST_PORT: ""
-      # Self-hosted transcription/diarization via CPU service
-      TRANSCRIPT_BACKEND: modal
-      TRANSCRIPT_URL: http://cpu:8000
-      TRANSCRIPT_MODAL_API_KEY: local
-      DIARIZATION_BACKEND: modal
-      DIARIZATION_URL: http://cpu:8000
-      # Caddy reverse proxy prefix
-      ROOT_PATH: /server-api
-      # WebRTC: fixed UDP port range for ICE candidates (mapped above).
-      # WEBRTC_HOST is set by setup-standalone.sh in server/.env (LAN IP detection).
-      WEBRTC_PORT_RANGE: "50000-50100"
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_started
-
-  worker:
-    build:
-      context: server
-    volumes:
-      - ./server/:/app/
-      - /app/.venv
-    env_file:
-      - ./server/.env
-    environment:
-      ENTRYPOINT: worker
-      HATCHET_CLIENT_SERVER_URL: ""
-      HATCHET_CLIENT_HOST_PORT: ""
-      TRANSCRIPT_BACKEND: modal
-      TRANSCRIPT_URL: http://cpu:8000
-      TRANSCRIPT_MODAL_API_KEY: local
-      DIARIZATION_BACKEND: modal
-      DIARIZATION_URL: http://cpu:8000
-    depends_on:
-      redis:
-        condition: service_started
-
-  beat:
-    build:
-      context: server
-    volumes:
-      - ./server/:/app/
-      - /app/.venv
-    env_file:
-      - ./server/.env
-    environment:
-      ENTRYPOINT: beat
-    depends_on:
-      redis:
-        condition: service_started
-
-  redis:
-    image: redis:7.2
-    ports:
-      - 6379:6379
-
-  postgres:
-    image: postgres:17
-    command: postgres -c 'max_connections=200'
-    ports:
-      - 5432:5432
-    environment:
-      POSTGRES_USER: reflector
-      POSTGRES_PASSWORD: reflector
-      POSTGRES_DB: reflector
-    volumes:
-      - ./data/postgres:/var/lib/postgresql/data
-    healthcheck:
-      test: ["CMD-SHELL", "pg_isready -d reflector -U reflector"]
-      interval: 5s
-      timeout: 5s
-      retries: 10
-      start_period: 15s
-
-  web:
-    image: reflector-frontend-standalone
-    build:
-      context: ./www
-    ports:
-      - "3000:3000"
-    command: ["node", "server.js"]
-    env_file:
-      - ./www/.env.local
-    environment:
-      NODE_ENV: production
-      # API_URL, WEBSOCKET_URL, SITE_URL, NEXTAUTH_URL from www/.env.local (allows HTTPS)
-      # Server-side URLs (docker-network internal)
-      SERVER_API_URL: http://server:1250
-      KV_URL: redis://redis:6379
-      KV_USE_TLS: "false"
-      # Standalone: no external auth provider
-      FEATURE_REQUIRE_LOGIN: "false"
-      FEATURE_ROOMS: "false"
-      NEXTAUTH_SECRET: standalone-local-secret
-      # Nullify partial auth vars inherited from base env_file
-      AUTHENTIK_ISSUER: ""
-      AUTHENTIK_REFRESH_TOKEN_URL: ""
-
  garage:
    image: dxflrs/garage:v1.1.0
    ports:
@@ -160,6 +23,59 @@ services:
      retries: 5
      start_period: 5s

+  ollama:
+    image: ollama/ollama:latest
+    profiles: ["ollama-gpu"]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  ollama-cpu:
+    image: ollama/ollama:latest
+    profiles: ["ollama-cpu"]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  # Override server/worker/beat to use self-hosted GPU service for transcription+diarization.
+  # compose `environment:` overrides values from `env_file:` — no need to edit server/.env.
+  server:
+    environment:
+      TRANSCRIPT_BACKEND: modal
+      TRANSCRIPT_URL: http://localhost:8100
+      TRANSCRIPT_MODAL_API_KEY: local
+      DIARIZATION_BACKEND: modal
+      DIARIZATION_URL: http://localhost:8100
+
+  worker:
+    environment:
+      TRANSCRIPT_BACKEND: modal
+      TRANSCRIPT_URL: http://cpu:8000
+      TRANSCRIPT_MODAL_API_KEY: local
+      DIARIZATION_BACKEND: modal
+      DIARIZATION_URL: http://cpu:8000
+
  cpu:
    build:
      context: ./gpu/self_hosted
@@ -197,45 +113,8 @@ services:
      retries: 10
      start_period: 120s

-  ollama:
-    image: ollama/ollama:latest
-    profiles: ["ollama-gpu"]
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-  ollama-cpu:
-    image: ollama/ollama:latest
-    profiles: ["ollama-cpu"]
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
 volumes:
  garage_data:
  garage_meta:
  ollama_data:
  gpu_cache:
-  caddy_data:
-  caddy_config:
--- a/docs/docs/installation/setup-standalone.md
+++ b/docs/docs/installation/setup-standalone.md
@@ -13,27 +13,15 @@ cd reflector
 ./scripts/setup-standalone.sh
 ```

-On Ubuntu, the setup script installs Docker automatically if missing.
-
 The script is idempotent — safe to re-run at any time. It detects what's already set up and skips completed steps.

 ## Prerequisites

- Docker with Compose V2 plugin (Docker Desktop, OrbStack, or Docker Engine + compose plugin)
+- Docker / OrbStack / Docker Desktop (any)
 - Mac (Apple Silicon) or Linux
 - 16GB+ RAM (32GB recommended for 14B LLM models)
 - **Mac only**: [Ollama](https://ollama.com/download) installed (`brew install ollama`)

-### Installing Docker (if not already installed)
-
-**Ubuntu**: The setup script runs `install-docker-ubuntu.sh` automatically when Docker is missing. Or run it manually:
-
-```bash
-./scripts/install-docker-ubuntu.sh
-```
-
-**Mac**: Install [Docker Desktop](https://www.docker.com/products/docker-desktop/) or [OrbStack](https://orbstack.dev/).
-
 ## What the script does

 ### 1. LLM inference via Ollama
@@ -48,28 +36,28 @@ Generates `server/.env` and `www/.env.local` with standalone defaults:

 **`server/.env`** — key settings:

-| Variable              | Value                                              | Why                                 |
-| --------------------- | -------------------------------------------------- | ----------------------------------- |
-| `DATABASE_URL`        | `postgresql+asyncpg://...@postgres:5432/reflector` | Docker-internal hostname            |
-| `REDIS_HOST`          | `redis`                                            | Docker-internal hostname            |
-| `CELERY_BROKER_URL`   | `redis://redis:6379/1`                             | Docker-internal hostname            |
-| `AUTH_BACKEND`        | `none`                                             | No Authentik in standalone          |
-| `TRANSCRIPT_BACKEND`  | `modal`                                            | HTTP API to self-hosted CPU service |
-| `TRANSCRIPT_URL`      | `http://cpu:8000`                                  | Docker-internal CPU service         |
-| `DIARIZATION_BACKEND` | `modal`                                            | HTTP API to self-hosted CPU service |
-| `DIARIZATION_URL`     | `http://cpu:8000`                                  | Docker-internal CPU service         |
-| `TRANSLATION_BACKEND` | `passthrough`                                      | No Modal                            |
-| `LLM_URL`             | `http://host.docker.internal:11434/v1` (Mac)       | Ollama endpoint                     |
+| Variable | Value | Why |
+|----------|-------|-----|
+| `DATABASE_URL` | `postgresql+asyncpg://...@postgres:5432/reflector` | Docker-internal hostname |
+| `REDIS_HOST` | `redis` | Docker-internal hostname |
+| `CELERY_BROKER_URL` | `redis://redis:6379/1` | Docker-internal hostname |
+| `AUTH_BACKEND` | `none` | No Authentik in standalone |
+| `TRANSCRIPT_BACKEND` | `modal` | HTTP API to self-hosted CPU service |
+| `TRANSCRIPT_URL` | `http://cpu:8000` | Docker-internal CPU service |
+| `DIARIZATION_BACKEND` | `modal` | HTTP API to self-hosted CPU service |
+| `DIARIZATION_URL` | `http://cpu:8000` | Docker-internal CPU service |
+| `TRANSLATION_BACKEND` | `passthrough` | No Modal |
+| `LLM_URL` | `http://host.docker.internal:11434/v1` (Mac) | Ollama endpoint |

 **`www/.env.local`** — key settings:

-| Variable                | Value                                      |
-| ----------------------- | ------------------------------------------ |
-| `API_URL`               | `https://localhost:3043` or `https://YOUR_IP:3043` (Linux) |
-| `SERVER_API_URL`        | `http://server:1250`                       |
-| `WEBSOCKET_URL`         | `auto`                                    |
-| `FEATURE_REQUIRE_LOGIN` | `false`                                    |
-| `NEXTAUTH_SECRET`       | `standalone-dev-secret-not-for-production` |
+| Variable | Value |
+|----------|-------|
+| `API_URL` | `http://localhost:1250` |
+| `SERVER_API_URL` | `http://server:1250` |
+| `WEBSOCKET_URL` | `ws://localhost:1250` |
+| `FEATURE_REQUIRE_LOGIN` | `false` |
+| `NEXTAUTH_SECRET` | `standalone-dev-secret-not-for-production` |

 If env files already exist (including symlinks from worktree setup), the script resolves symlinks and ensures all standalone-critical vars are set. Existing vars not related to standalone are preserved.

@@ -79,14 +67,14 @@ Standalone uses [Garage](https://garagehq.deuxfleurs.fr/) — a lightweight S3-c

 **`server/.env`** — storage settings added by the script:

-| Variable                                   | Value                | Why                                   |
-| ------------------------------------------ | -------------------- | ------------------------------------- |
-| `TRANSCRIPT_STORAGE_BACKEND`               | `aws`                | Uses the S3-compatible storage driver |
-| `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL`      | `http://garage:3900` | Docker-internal Garage S3 API         |
-| `TRANSCRIPT_STORAGE_AWS_BUCKET_NAME`       | `reflector-media`    | Created by the script                 |
-| `TRANSCRIPT_STORAGE_AWS_REGION`            | `garage`             | Must match Garage config              |
-| `TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID`     | _(auto-generated)_   | Created by `garage key create`        |
-| `TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY` | _(auto-generated)_   | Created by `garage key create`        |
+| Variable | Value | Why |
+|----------|-------|-----|
+| `TRANSCRIPT_STORAGE_BACKEND` | `aws` | Uses the S3-compatible storage driver |
+| `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL` | `http://garage:3900` | Docker-internal Garage S3 API |
+| `TRANSCRIPT_STORAGE_AWS_BUCKET_NAME` | `reflector-media` | Created by the script |
+| `TRANSCRIPT_STORAGE_AWS_REGION` | `garage` | Must match Garage config |
+| `TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID` | *(auto-generated)* | Created by `garage key create` |
+| `TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY` | *(auto-generated)* | Created by `garage key create` |

 The `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL` setting enables S3-compatible backends. When set, the storage driver uses path-style addressing and routes all requests to the custom endpoint. When unset (production AWS), behavior is unchanged.

@@ -119,25 +107,23 @@ Run automatically by the `server` container on startup (`runserver.sh` calls `al
 ### 7. Health check

 Verifies:
-
 - CPU service responds (transcription + diarization ready)
 - Server responds at `http://localhost:1250/health`
- Frontend serves at `http://localhost:3000` (or via Caddy at `https://localhost:3043`)
+- Frontend serves at `http://localhost:3000`
 - LLM endpoint reachable from inside containers

 ## Services

-| Service    | Port       | Purpose                                            |
-| ---------- | ---------- | -------------------------------------------------- |
-| `caddy`    | 3043       | Reverse proxy (HTTPS, self-signed cert)            |
-| `server`   | 1250       | FastAPI backend (runs migrations on start)         |
-| `web`      | 3000       | Next.js frontend                                   |
-| `postgres` | 5432       | PostgreSQL database                                |
-| `redis`    | 6379       | Cache + Celery broker                              |
-| `garage`   | 3900, 3903 | S3-compatible object storage (S3 API + admin API)  |
-| `cpu`      | —          | Self-hosted transcription + diarization (CPU-only) |
-| `worker`   | —          | Celery worker (live pipeline post-processing)      |
-| `beat`     | —          | Celery beat (scheduled tasks)                      |
+| Service | Port | Purpose |
+|---------|------|---------|
+| `server` | 1250 | FastAPI backend (runs migrations on start) |
+| `web` | 3000 | Next.js frontend |
+| `postgres` | 5432 | PostgreSQL database |
+| `redis` | 6379 | Cache + Celery broker |
+| `garage` | 3900, 3903 | S3-compatible object storage (S3 API + admin API) |
+| `cpu` | — | Self-hosted transcription + diarization (CPU-only) |
+| `worker` | — | Celery worker (live pipeline post-processing) |
+| `beat` | — | Celery beat (scheduled tasks) |

 ## Testing programmatically

@@ -171,89 +157,8 @@ Expected result: status `ended`, auto-generated `title`, `short_summary`, `long_

 CPU-only processing is slow (~15 min for a 3 min audio file). Diarization finishes in ~3 min, transcription takes the rest.

-## Enabling HTTPS (droplet via IP)
-
-To serve Reflector over HTTPS on a droplet accessed by IP (self-signed certificate):
-
-1. **Copy the Caddyfile** (no edits needed — `:443` catches all HTTPS inside container, mapped to host port 3043):
-   ```bash
-   cp Caddyfile.standalone.example Caddyfile
-   ```
-
-2. **Update `www/.env.local`** with HTTPS URLs (port 3043):
-   ```env
-   API_URL=https://YOUR_IP:3043
-   WEBSOCKET_URL=wss://YOUR_IP:3043
-   SITE_URL=https://YOUR_IP:3043
-   NEXTAUTH_URL=https://YOUR_IP:3043
-   ```
-
-3. **Restart services**:
-   ```bash
-   docker compose -f docker-compose.standalone.yml --profile ollama-cpu up -d
-   ```
-   (Use `ollama-gpu` instead of `ollama-cpu` if you have an NVIDIA GPU.)
-
-4. **Access** at `https://YOUR_IP:3043`. The browser will warn about the self-signed cert — click **Advanced** → **Proceed to YOUR_IP (unsafe)**. All traffic (page, API, WebSocket) uses the same origin, so accepting once is enough.
-
 ## Troubleshooting

-### ERR_SSL_PROTOCOL_ERROR when accessing https://YOUR_IP
-
-You do **not** need a domain — the setup works with an IP address. This error usually means Caddy isn't serving TLS on port 3043. Check in order:
-
-1. **Caddyfile** — must use the `:443` catch-all (container-internal; Docker maps host 3043 → container 443):
-   ```bash
-   cp Caddyfile.standalone.example Caddyfile
-   ```
-
-2. **Firewall** — allow port 3043 (common on DigitalOcean):
-   ```bash
-   sudo ufw allow 3043
-   sudo ufw status
-   ```
-
-3. **Caddy running** — verify and restart:
-   ```bash
-   docker compose -f docker-compose.standalone.yml ps
-   docker compose -f docker-compose.standalone.yml logs caddy --tail 20
-   docker compose -f docker-compose.standalone.yml --profile ollama-cpu up -d
-   ```
-
-4. **Test from the droplet** — if this works, the issue is external (firewall, network):
-   ```bash
-   curl -vk https://localhost:3043
-   ```
-
-5. **localhost works but external IP fails** — Re-run the setup script; it generates a Caddyfile with your droplet IP explicitly, so Caddy provisions the cert at startup:
-   ```bash
-   ./scripts/setup-standalone.sh
-   ```
-   Or manually create `Caddyfile` with your IP (replace 138.197.162.116):
-   ```
-   https://138.197.162.116, localhost {
-       tls internal
-       handle /v1/* { reverse_proxy server:1250 }
-       handle /health { reverse_proxy server:1250 }
-       handle { reverse_proxy web:3000 }
-   }
-   ```
-   Then restart: `docker compose -f docker-compose.standalone.yml --profile ollama-cpu up -d`
-
-6. **Still failing?** Try HTTP (no TLS) — create `Caddyfile`:
-   ```
-   :80 {
-       handle /v1/* { reverse_proxy server:1250 }
-       handle /health { reverse_proxy server:1250 }
-       handle { reverse_proxy web:3000 }
-   }
-   ```
-   Update `www/.env.local`: `API_URL=http://YOUR_IP:3043`, `WEBSOCKET_URL=ws://YOUR_IP:3043`, `SITE_URL=http://YOUR_IP:3043`, `NEXTAUTH_URL=http://YOUR_IP:3043`. Restart, then access `http://YOUR_IP:3043`.
-
-### Docker not ready
-
-If setup fails with "Docker not ready", on Ubuntu run `./scripts/install-docker-ubuntu.sh`. If Docker is installed but you're not root, run `newgrp docker` then run the setup script again.
-
 ### Port conflicts (most common issue)

 If the frontend or backend behaves unexpectedly (e.g., env vars seem ignored, changes don't take effect), **check for port conflicts first**:
@@ -271,7 +176,6 @@ lsof -ti :3000 | xargs kill
 ```

 Common causes:
-
 - A stale `next dev` or `pnpm dev` process from another terminal/worktree
 - Another Docker Compose project (different worktree) with containers on the same ports — the setup script only manages its own project; containers from other projects must be stopped manually (`docker ps` to find them, `docker stop` to kill them)

@@ -287,7 +191,7 @@ Standalone runs without authentication (`FEATURE_REQUIRE_LOGIN=false`, `AUTH_BAC

 1. In `www/.env.local`: set `FEATURE_REQUIRE_LOGIN=true`, uncomment `AUTHENTIK_ISSUER` and `AUTHENTIK_REFRESH_TOKEN_URL`
 2. In `server/.env`: set `AUTH_BACKEND=authentik` (or your backend), configure `AUTH_JWT_AUDIENCE`
-3. Restart: `docker compose -f docker-compose.standalone.yml up -d --force-recreate web server`
+3. Restart: `docker compose -f docker-compose.yml -f docker-compose.standalone.yml up -d --force-recreate web server`

 ## What's NOT covered

--- a/docsv2/selfhosted-architecture.md
+++ b/docsv2/selfhosted-architecture.md
@@ -1,468 +0,0 @@
-# How the Self-Hosted Setup Works
-
-This document explains the internals of the self-hosted deployment: how the setup script orchestrates everything, how the Docker Compose profiles work, how services communicate, and how configuration flows from flags to running containers.
-
-> For quick-start instructions and flag reference, see [Self-Hosted Production Deployment](selfhosted-production.md).
-
-## Table of Contents
-
- [Overview](#overview)
- [The Setup Script Step by Step](#the-setup-script-step-by-step)
- [Docker Compose Profile System](#docker-compose-profile-system)
- [Service Architecture](#service-architecture)
- [Configuration Flow](#configuration-flow)
- [Storage Architecture](#storage-architecture)
- [SSL/TLS and Reverse Proxy](#ssltls-and-reverse-proxy)
- [Build vs Pull Workflow](#build-vs-pull-workflow)
- [Background Task Processing](#background-task-processing)
- [Network and Port Layout](#network-and-port-layout)
-
---
-
-## Overview
-
-The self-hosted deployment runs the entire Reflector platform on a single server using Docker Compose. A single bash script (`scripts/setup-selfhosted.sh`) handles all configuration and orchestration. The key design principles are:
-
- **One command to deploy** — flags select which features to enable
- **Idempotent** — safe to re-run without losing existing configuration
- **Profile-based composition** — Docker Compose profiles activate optional services
- **No external dependencies required** — with `--garage` and `--ollama-*`, everything runs locally
-
-## The Setup Script Step by Step
-
-The script (`scripts/setup-selfhosted.sh`) runs 7 sequential steps. Here's what each one does and why.
-
-### Step 0: Prerequisites
-
-Validates the environment before doing anything:
-
- **Docker Compose V2** — checks `docker compose version` output (not the legacy `docker-compose`)
- **Docker daemon** — verifies `docker info` succeeds
- **NVIDIA GPU** — only checked when `--gpu` or `--ollama-gpu` is used; runs `nvidia-smi` to confirm drivers are installed
- **Compose file** — verifies `docker-compose.selfhosted.yml` exists at the expected path
-
-If any check fails, the script exits with a clear error message and remediation steps.
-
-### Step 1: Generate Secrets
-
-Creates cryptographic secrets needed by the backend and frontend:
-
- **`SECRET_KEY`** — used by the FastAPI server for session signing (64 hex chars via `openssl rand -hex 32`)
- **`NEXTAUTH_SECRET`** — used by Next.js NextAuth for JWT signing
-
-Secrets are only generated if they don't already exist or are still set to the placeholder value `changeme`. This is what makes the script idempotent for secrets.
-
-### Step 2: Generate `server/.env`
-
-Creates or updates the backend environment file from `server/.env.selfhosted.example`. Sets:
-
- **Infrastructure** — PostgreSQL URL, Redis host, Celery broker (all pointing to Docker-internal hostnames)
- **Public URLs** — `BASE_URL` and `CORS_ORIGIN` computed from the domain (if `--domain`), IP (if detected on Linux), or `localhost`
- **WebRTC** — `WEBRTC_HOST` set to the server's LAN IP so browsers can reach UDP ICE candidates
- **Specialized models** — always points to `http://transcription:8000` (the Docker network alias shared by GPU and CPU containers)
- **HuggingFace token** — prompts interactively for pyannote model access; writes to root `.env` so Docker Compose can inject it into GPU/CPU containers
- **LLM** — if `--ollama-*` is used, configures `LLM_URL` pointing to the Ollama container. Otherwise, warns that the user needs to configure an external LLM
- **Public mode** — sets `PUBLIC_MODE=true` so the app is accessible without authentication by default
-
-The script uses `env_set` for each variable, which either updates an existing line or appends a new one. This means re-running the script updates values in-place without duplicating keys.
-
-### Step 3: Generate `www/.env`
-
-Creates or updates the frontend environment file from `www/.env.selfhosted.example`. Sets:
-
- **`SITE_URL` / `NEXTAUTH_URL` / `API_URL`** — all set to the same public-facing URL (with `https://` if Caddy is enabled)
- **`WEBSOCKET_URL`** — set to `auto`, which tells the frontend to derive the WebSocket URL from the page URL automatically
- **`SERVER_API_URL`** — always `http://server:1250` (Docker-internal, used for server-side rendering)
- **`KV_URL`** — Redis URL for Next.js caching
- **`FEATURE_REQUIRE_LOGIN`** — `false` by default (matches `PUBLIC_MODE=true` on the backend)
-
-### Step 4: Storage Setup
-
-Branches based on whether `--garage` was passed:
-
-**With `--garage` (local S3):**
-
-1. Generates `data/garage.toml` from a template, injecting a random RPC secret
-2. Starts only the Garage container (`docker compose --profile garage up -d garage`)
-3. Waits for the Garage admin API to respond on port 3903
-4. Assigns the node to a storage layout (1GB capacity, zone `dc1`)
-5. Creates the `reflector-media` bucket
-6. Creates an access key named `reflector` and grants it read/write on the bucket
-7. Writes all S3 credentials (`ENDPOINT_URL`, `BUCKET_NAME`, `REGION`, `ACCESS_KEY_ID`, `SECRET_ACCESS_KEY`) to `server/.env`
-
-The Garage endpoint is `http://garage:3900` (Docker-internal), and the region is set to `garage` (arbitrary, Garage ignores it). The boto3 client uses path-style addressing when an endpoint URL is configured, which is required for S3-compatible services like Garage.
-
-**Without `--garage` (external S3):**
-
-1. Checks `server/.env` for the four required S3 variables
-2. If any are missing, prompts interactively for each one
-3. Optionally prompts for an endpoint URL (for MinIO, Backblaze B2, etc.)
-
-### Step 5: Caddyfile
-
-Only runs when `--caddy` or `--domain` is used. Generates a Caddy configuration file:
-
-**With `--domain`:** Creates a named site block (`reflector.example.com { ... }`). Caddy automatically provisions a Let's Encrypt certificate for this domain. Requires DNS pointing to the server and ports 80/443 open.
-
-**Without `--domain` (IP access):** Creates a catch-all `:443 { tls internal ... }` block. Caddy generates a self-signed certificate. Browsers will show a security warning.
-
-Both configurations route:
- `/v1/*` and `/health` to the backend (`server:1250`)
- Everything else to the frontend (`web:3000`)
-
-### Step 6: Start Services
-
-1. **Always builds the GPU/CPU model image** — these are never prebuilt because they contain ML model download logic specific to the host's hardware
-2. **With `--build`:** Also builds backend (server, worker, beat) and frontend (web) images from source
-3. **Without `--build`:** Pulls prebuilt images from the Docker registry (`monadicalsas/reflector-backend:latest`, `monadicalsas/reflector-frontend:latest`)
-4. **Starts all services** — `docker compose up -d` with the active profiles
-5. **Quick sanity check** — after 3 seconds, checks for any containers that exited immediately
-
-### Step 7: Health Checks
-
-Waits for each service in order, with generous timeouts:
-
-| Service | Check | Timeout | Notes |
-|---------|-------|---------|-------|
-| GPU/CPU models | `curl http://localhost:8000/docs` | 10 min (120 x 5s) | First start downloads ~1GB of models |
-| Ollama | `curl http://localhost:11434/api/tags` | 3 min (60 x 3s) | Then pulls the selected model |
-| Server API | `curl http://localhost:1250/health` | 7.5 min (90 x 5s) | First start runs database migrations |
-| Frontend | `curl http://localhost:3000` | 1.5 min (30 x 3s) | Next.js build on first start |
-| Caddy | `curl -k https://localhost` | Quick check | After other services are up |
-
-If the server container exits during the health check, the script dumps diagnostics (container statuses + logs) before exiting.
-
-After the Ollama health check passes, the script checks if the selected model is already pulled. If not, it runs `ollama pull <model>` inside the container.
-
---
-
-## Docker Compose Profile System
-
-The compose file (`docker-compose.selfhosted.yml`) uses Docker Compose profiles to make services optional. Only services whose profiles match the active `--profile` flags are started.
-
-### Always-on Services (no profile)
-
-These start regardless of which flags you pass:
-
-| Service | Role | Image |
-|---------|------|-------|
-| `server` | FastAPI backend, API endpoints, WebRTC | `monadicalsas/reflector-backend:latest` |
-| `worker` | Celery worker for background processing | Same image, `ENTRYPOINT=worker` |
-| `beat` | Celery beat scheduler for periodic tasks | Same image, `ENTRYPOINT=beat` |
-| `web` | Next.js frontend | `monadicalsas/reflector-frontend:latest` |
-| `redis` | Message broker + caching | `redis:7.2-alpine` |
-| `postgres` | Primary database | `postgres:17-alpine` |
-
-### Profile-Based Services
-
-| Profile | Service | Role |
-|---------|---------|------|
-| `gpu` | `gpu` | NVIDIA GPU-accelerated transcription/diarization/translation |
-| `cpu` | `cpu` | CPU-only transcription/diarization/translation |
-| `ollama-gpu` | `ollama` | Local Ollama LLM with GPU |
-| `ollama-cpu` | `ollama-cpu` | Local Ollama LLM on CPU |
-| `garage` | `garage` | Local S3-compatible object storage |
-| `caddy` | `caddy` | Reverse proxy with SSL |
-
-### The "transcription" Alias
-
-Both the `gpu` and `cpu` services define a Docker network alias of `transcription`. This means the backend always connects to `http://transcription:8000` regardless of which profile is active. The alias is defined in the compose file's `networks.default.aliases` section.
-
---
-
-## Service Architecture
-
-```
-                    ┌─────────────┐
-  Internet ────────>│    Caddy     │ :80/:443   (profile: caddy)
-                    └──────┬──────┘
-                           │
-              ┌────────────┼────────────┐
-              │            │            │
-              v            v            │
-         ┌─────────┐  ┌─────────┐      │
-         │   web   │  │ server  │      │
-         │ :3000   │  │ :1250   │      │
-         └─────────┘  └────┬────┘      │
-                           │            │
-                      ┌────┴────┐       │
-                      │ worker  │       │
-                      │  beat   │       │
-                      └────┬────┘       │
-                           │            │
-            ┌──────────────┼────────────┤
-            │              │            │
-            v              v            v
-      ┌───────────┐  ┌─────────┐  ┌─────────┐
-      │transcription│ │postgres │  │  redis  │
-      │ (gpu/cpu) │  │ :5432   │  │ :6379   │
-      │ :8000     │  └─────────┘  └─────────┘
-      └───────────┘
-            │
-      ┌─────┴─────┐     ┌─────────┐
-      │  ollama   │     │ garage  │
-      │(optional) │     │(optional│
-      │ :11434    │     │  S3)    │
-      └───────────┘     └─────────┘
-```
-
-### How Services Interact
-
-1. **User request** hits Caddy (if enabled), which routes to `web` (pages) or `server` (API)
-2. **`web`** renders pages server-side using `SERVER_API_URL=http://server:1250` and client-side using the public `API_URL`
-3. **`server`** handles API requests, file uploads, WebRTC streaming. Dispatches background work to Celery via Redis
-4. **`worker`** picks up Celery tasks (transcription pipelines, audio processing). Calls `transcription:8000` for ML inference and uploads results to S3 storage
-5. **`beat`** schedules periodic tasks (cleanup, webhook retries) by pushing them onto the Celery queue
-6. **`transcription` (gpu/cpu)** runs Whisper/Parakeet (transcription), Pyannote (diarization), and translation models. Stateless HTTP API
-7. **`ollama`** provides an OpenAI-compatible API for summarization and topic detection. Called by the worker during post-processing
-8. **`garage`** provides S3-compatible storage for audio files and processed results. Accessed by the worker via boto3
-
---
-
-## Configuration Flow
-
-Environment variables flow through multiple layers. Understanding this prevents confusion when debugging:
-
-```
-Flags (--gpu, --garage, etc.)
-  │
-  ├── setup-selfhosted.sh interprets flags
-  │     │
-  │     ├── Writes server/.env (backend config)
-  │     ├── Writes www/.env (frontend config)
-  │     ├── Writes .env (HF_TOKEN for compose interpolation)
-  │     └── Writes Caddyfile (proxy routes)
-  │
-  └── docker-compose.selfhosted.yml reads:
-        ├── env_file: ./server/.env   (loaded into server, worker, beat)
-        ├── env_file: ./www/.env      (loaded into web)
-        ├── .env                      (compose variable interpolation, e.g. ${HF_TOKEN})
-        └── environment: {...}        (hardcoded overrides, always win over env_file)
-```
-
-### Precedence Rules
-
-Docker Compose `environment:` keys **always override** `env_file:` values. This is by design — the compose file hardcodes infrastructure values that must be correct inside the Docker network (like `DATABASE_URL=postgresql+asyncpg://...@postgres:5432/...`) regardless of what's in `server/.env`.
-
-The `server/.env` file is still useful for:
- Values not overridden in the compose file (LLM config, storage credentials, auth settings)
- Running the server outside Docker during development
-
-### The Three `.env` Files
-
-| File | Used By | Contains |
-|------|---------|----------|
-| `server/.env` | server, worker, beat | Backend config: database, Redis, S3, LLM, auth, public URLs |
-| `www/.env` | web | Frontend config: site URL, auth, feature flags |
-| `.env` (root) | Docker Compose interpolation | Only `HF_TOKEN` — injected into GPU/CPU container env |
-
---
-
-## Storage Architecture
-
-All audio files and processing results are stored in S3-compatible object storage. The backend uses boto3 (via aioboto3) with automatic path-style addressing when a custom endpoint URL is configured.
-
-### How Garage Works
-
-Garage is a lightweight, self-hosted S3-compatible storage engine. In this deployment:
-
- Runs as a single-node cluster with 1GB capacity allocation
- Listens on port 3900 (S3 API) and 3903 (admin API)
- Data persists in Docker volumes (`garage_data`, `garage_meta`)
- Accessed by the worker at `http://garage:3900` (Docker-internal)
-
-The setup script creates:
- A bucket called `reflector-media`
- An access key called `reflector` with read/write permissions on that bucket
-
-### Path-Style vs Virtual-Hosted Addressing
-
-AWS S3 uses virtual-hosted addressing by default (`bucket.s3.amazonaws.com`). S3-compatible services like Garage require path-style addressing (`endpoint/bucket`). The `AwsStorage` class detects this automatically: when `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL` is set, it configures boto3 with `addressing_style: "path"`.
-
---
-
-## SSL/TLS and Reverse Proxy
-
-### With `--domain` (Production)
-
-Caddy automatically obtains and renews a Let's Encrypt certificate. Requirements:
- DNS A record pointing to the server
- Ports 80 (HTTP challenge) and 443 (HTTPS) open to the internet
-
-The generated Caddyfile uses the domain as the site address, which triggers Caddy's automatic HTTPS.
-
-### Without `--domain` (Development/LAN)
-
-Caddy generates a self-signed certificate and listens on `:443` as a catch-all. Browsers will show a security warning that must be accepted manually.
-
-### Without `--caddy` (BYO Proxy)
-
-No ports are exposed to the internet. The services listen on `127.0.0.1` only:
- Frontend: `localhost:3000`
- Backend API: `localhost:1250`
-
-You can point your own reverse proxy (nginx, Traefik, etc.) at these ports.
-
-### WebRTC and UDP
-
-The server exposes UDP ports 50000-50100 for WebRTC ICE candidates. The `WEBRTC_HOST` variable tells the server which IP to advertise in ICE candidates — this must be the server's actual IP address (not a domain), because WebRTC uses UDP which doesn't go through the HTTP reverse proxy.
-
---
-
-## Build vs Pull Workflow
-
-### Default (no `--build` flag)
-
-```
-GPU/CPU model image: Always built from source (./gpu/self_hosted/)
-Backend image:       Pulled from monadicalsas/reflector-backend:latest
-Frontend image:      Pulled from monadicalsas/reflector-frontend:latest
-```
-
-The GPU/CPU image is always built because it contains hardware-specific build steps and ML model download logic.
-
-### With `--build`
-
-```
-GPU/CPU model image: Built from source (./gpu/self_hosted/)
-Backend image:       Built from source (./server/)
-Frontend image:      Built from source (./www/)
-```
-
-Use `--build` when:
- You've made local code changes
- The prebuilt registry images are outdated
- You want to verify the build works on your hardware
-
-### Rebuilding Individual Services
-
-```bash
-# Rebuild just the backend
-docker compose -f docker-compose.selfhosted.yml build server worker beat
-
-# Rebuild just the frontend
-docker compose -f docker-compose.selfhosted.yml build web
-
-# Rebuild the GPU model container
-docker compose -f docker-compose.selfhosted.yml build gpu
-
-# Force a clean rebuild (no cache)
-docker compose -f docker-compose.selfhosted.yml build --no-cache server
-```
-
---
-
-## Background Task Processing
-
-### Celery Architecture
-
-The backend uses Celery for all background work, with Redis as the message broker:
-
- **`worker`** — picks up tasks from the Redis queue and executes them
- **`beat`** — schedules periodic tasks (cron-like) by pushing them onto the queue
- **`Redis`** — acts as both message broker and result backend
-
-### The Audio Processing Pipeline
-
-When a file is uploaded, the worker runs a multi-step pipeline:
-
-```
-Upload → Extract Audio → Upload to S3
-                           │
-                    ┌──────┼──────┐
-                    │      │      │
-                    v      v      v
-              Transcribe  Diarize  Waveform
-                    │      │      │
-                    └──────┼──────┘
-                           │
-                       Assemble
-                           │
-                    ┌──────┼──────┐
-                    v      v      v
-                Topics  Title  Summaries
-                           │
-                         Done
-```
-
-Transcription, diarization, and waveform generation run in parallel. After assembly, topic detection, title generation, and summarization also run in parallel. Each step calls the appropriate service (transcription container for ML, Ollama/external LLM for text generation, S3 for storage).
-
-### Event Loop Management
-
-Each Celery task runs in its own `asyncio.run()` call, which creates a fresh event loop. The `asynctask` decorator in `server/reflector/asynctask.py` handles:
-
-1. **Database connections** — resets the connection pool before each task (connections from a previous event loop would cause "Future attached to a different loop" errors)
-2. **Redis connections** — resets the WebSocket manager singleton so Redis pub/sub reconnects on the current loop
-3. **Cleanup** — disconnects the database and clears the context variable in the `finally` block
-
---
-
-## Network and Port Layout
-
-All services communicate over Docker's default bridge network. Only specific ports are exposed to the host:
-
-| Port | Service | Binding | Purpose |
-|------|---------|---------|---------|
-| 80 | Caddy | `0.0.0.0:80` | HTTP (redirect to HTTPS / Let's Encrypt challenge) |
-| 443 | Caddy | `0.0.0.0:443` | HTTPS (main entry point) |
-| 1250 | Server | `127.0.0.1:1250` | Backend API (localhost only) |
-| 3000 | Web | `127.0.0.1:3000` | Frontend (localhost only) |
-| 3900 | Garage | `0.0.0.0:3900` | S3 API (for admin/debug access) |
-| 3903 | Garage | `0.0.0.0:3903` | Garage admin API |
-| 8000 | GPU/CPU | `127.0.0.1:8000` | ML model API (localhost only) |
-| 11434 | Ollama | `127.0.0.1:11434` | Ollama API (localhost only) |
-| 50000-50100/udp | Server | `0.0.0.0:50000-50100` | WebRTC ICE candidates |
-
-Services bound to `127.0.0.1` are only accessible from the host itself (not from the network). Caddy is the only service exposed to the internet on standard HTTP/HTTPS ports.
-
-### Docker-Internal Hostnames
-
-Inside the Docker network, services reach each other by their compose service name:
-
-| Hostname | Resolves To |
-|----------|-------------|
-| `server` | Backend API container |
-| `web` | Frontend container |
-| `postgres` | PostgreSQL container |
-| `redis` | Redis container |
-| `transcription` | GPU or CPU container (network alias) |
-| `ollama` / `ollama-cpu` | Ollama container |
-| `garage` | Garage S3 container |
-
---
-
-## Diagnostics and Error Handling
-
-The setup script includes an `ERR` trap that automatically dumps diagnostics when any command fails:
-
-1. Lists all container statuses
-2. Shows the last 30 lines of logs for any stopped/exited containers
-3. Shows the last 40 lines of the specific failing service
-
-This means if something goes wrong during setup, you'll see the relevant logs immediately without having to run manual debug commands.
-
-### Common Debug Commands
-
-```bash
-# Overall status
-docker compose -f docker-compose.selfhosted.yml ps
-
-# Logs for a specific service
-docker compose -f docker-compose.selfhosted.yml logs server --tail 50
-docker compose -f docker-compose.selfhosted.yml logs worker --tail 50
-
-# Check environment inside a container
-docker compose -f docker-compose.selfhosted.yml exec server env | grep TRANSCRIPT
-
-# Health check from inside the network
-docker compose -f docker-compose.selfhosted.yml exec server curl http://localhost:1250/health
-
-# Check S3 storage connectivity
-docker compose -f docker-compose.selfhosted.yml exec server curl http://garage:3900
-
-# Database access
-docker compose -f docker-compose.selfhosted.yml exec postgres psql -U reflector -c "SELECT id, status FROM transcript ORDER BY created_at DESC LIMIT 5;"
-
-# List files in server data directory
-docker compose -f docker-compose.selfhosted.yml exec server ls -la /app/data/
-```
--- a/docsv2/selfhosted-production.md
+++ b/docsv2/selfhosted-production.md
@@ -1,373 +0,0 @@
-# Self-Hosted Production Deployment
-
-Deploy Reflector on a single server with everything running in Docker. Transcription, diarization, and translation use specialized ML models (Whisper/Parakeet, Pyannote); only summarization and topic detection require an LLM.
-
-> For a detailed walkthrough of how the setup script and infrastructure work under the hood, see [How the Self-Hosted Setup Works](selfhosted-architecture.md).
-
-## Prerequisites
-
-### Hardware
- **With GPU**: Linux server with NVIDIA GPU (8GB+ VRAM recommended), 16GB+ RAM, 50GB+ disk
- **CPU-only**: 8+ cores, 32GB+ RAM (transcription is slower but works)
- Disk space for ML models (~2GB on first run) + audio storage
-
-### Software
- Docker Engine 24+ with Compose V2
- NVIDIA drivers + `nvidia-container-toolkit` (GPU modes only)
- `curl`, `openssl` (usually pre-installed)
-
-### Accounts & Credentials (depending on options)
-
-**Always recommended:**
- **HuggingFace token** — For downloading pyannote speaker diarization models. Get one at https://huggingface.co/settings/tokens and accept the model licenses:
-  - https://huggingface.co/pyannote/speaker-diarization-3.1
-  - https://huggingface.co/pyannote/segmentation-3.0
-  - The setup script will prompt for this. If skipped, diarization falls back to a public model bundle (may be less reliable).
-
-**LLM for summarization & topic detection (pick one):**
- **With `--ollama-gpu` or `--ollama-cpu`**: Nothing extra — Ollama runs locally and pulls the model automatically
- **Without `--ollama-*`**: An OpenAI-compatible LLM API key and endpoint. Examples:
-  - OpenAI: `LLM_URL=https://api.openai.com/v1`, `LLM_API_KEY=sk-...`, `LLM_MODEL=gpt-4o-mini`
-  - Anthropic, Together, Groq, or any OpenAI-compatible API
-  - A self-managed vLLM or Ollama instance elsewhere on the network
-
-**Object storage (pick one):**
- **With `--garage`**: Nothing extra — Garage (local S3-compatible storage) is auto-configured by the script
- **Without `--garage`**: S3-compatible storage credentials. The script will prompt for these, or you can pre-fill `server/.env`. Options include:
-  - **AWS S3**: Access Key ID, Secret Access Key, bucket name, region
-  - **MinIO**: Same credentials + `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://your-minio:9000`
-  - **Any S3-compatible provider** (Backblaze B2, Cloudflare R2, DigitalOcean Spaces, etc.): same fields + custom endpoint URL
-
-**Optional add-ons (configure after initial setup):**
- **Daily.co** (live meeting rooms): Requires a Daily.co account (https://www.daily.co/), API key, subdomain, and an AWS S3 bucket + IAM Role for recording storage. See [Enabling Daily.co Live Rooms](#enabling-dailyco-live-rooms) below.
- **Authentik** (user authentication): Requires an Authentik instance with an OAuth2/OIDC application configured for Reflector. See [Enabling Authentication](#enabling-authentication-authentik) below.
-
-## Quick Start
-
-```bash
-git clone https://github.com/Monadical-SAS/reflector.git
-cd reflector
-
-# GPU + local Ollama LLM + local Garage storage + Caddy SSL (with domain):
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com
-
-# Same but without a domain (self-signed cert, access via IP):
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
-
-# CPU-only (same, but slower):
-./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
-
-# Build from source instead of pulling prebuilt images:
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --build
-```
-
-That's it. The script generates env files, secrets, starts all containers, waits for health checks, and prints the URL.
-
-## Specialized Models (Required)
-
-Pick `--gpu` or `--cpu`. This determines how **transcription, diarization, and translation** run:
-
-| Flag | What it does | Requires |
-|------|-------------|----------|
-| `--gpu` | NVIDIA GPU acceleration for ML models | NVIDIA GPU + drivers + `nvidia-container-toolkit` |
-| `--cpu` | CPU-only (slower but works without GPU) | 8+ cores, 32GB+ RAM recommended |
-
-## Local LLM (Optional)
-
-Optionally add `--ollama-gpu` or `--ollama-cpu` for a **local Ollama instance** that handles summarization and topic detection. If omitted, configure an external OpenAI-compatible LLM in `server/.env`.
-
-| Flag | What it does | Requires |
-|------|-------------|----------|
-| `--ollama-gpu` | Local Ollama with NVIDIA GPU acceleration | NVIDIA GPU |
-| `--ollama-cpu` | Local Ollama on CPU only | Nothing extra |
-| `--llm-model MODEL` | Choose which Ollama model to download (default: `qwen2.5:14b`) | `--ollama-gpu` or `--ollama-cpu` |
-| *(omitted)* | User configures external LLM (OpenAI, Anthropic, etc.) | LLM API key |
-
-### Choosing an Ollama model
-
-The default model is `qwen2.5:14b` (~9GB download, good multilingual support and summary quality). Override with `--llm-model`:
-
-```bash
-# Default (qwen2.5:14b)
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
-
-# Mistral — good balance of speed and quality (~4.1GB)
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy
-
-# Phi-4 — smaller and faster (~9.1GB)
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model phi4 --garage --caddy
-
-# Llama 3.3 70B — best quality, needs 48GB+ RAM or GPU VRAM (~43GB)
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model llama3.3:70b --garage --caddy
-
-# Gemma 2 9B (~5.4GB)
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model gemma2 --garage --caddy
-
-# DeepSeek R1 8B — reasoning model, verbose but thorough summaries (~4.9GB)
-./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model deepseek-r1:8b --garage --caddy
-```
-
-Browse all available models at https://ollama.com/library.
-
-### Recommended combinations
-
- **`--gpu --ollama-gpu`**: Best for servers with NVIDIA GPU. Fully self-contained, no external API keys needed.
- **`--cpu --ollama-cpu`**: No GPU available but want everything self-contained. Slower but works.
- **`--gpu --ollama-cpu`**: GPU for transcription, CPU for LLM. Saves GPU VRAM for ML models.
- **`--gpu`**: Have NVIDIA GPU but prefer a cloud LLM (faster/better summaries with GPT-4, Claude, etc.).
- **`--cpu`**: No GPU, prefer cloud LLM. Slowest transcription but best summary quality.
-
-## Other Optional Flags
-
-| Flag | What it does |
-|------|-------------|
-| `--garage` | Starts Garage (local S3-compatible storage). Auto-configures bucket, keys, and env vars. |
-| `--caddy` | Starts Caddy reverse proxy on ports 80/443 with self-signed cert. |
-| `--domain DOMAIN` | Use a real domain with Let's Encrypt auto-HTTPS (implies `--caddy`). Requires DNS A record pointing to this server and ports 80/443 open. |
-| `--build` | Build backend (server, worker, beat) and frontend (web) Docker images from source instead of pulling prebuilt images from the registry. Useful for development or when running a version with local changes. |
-
-Without `--garage`, you **must** provide S3-compatible credentials (the script will prompt interactively or you can pre-fill `server/.env`).
-
-Without `--caddy` or `--domain`, no ports are exposed. Point your own reverse proxy at `web:3000` (frontend) and `server:1250` (API).
-
-**Using a domain (recommended for production):** Point a DNS A record at your server's IP, then pass `--domain your.domain.com`. Caddy will automatically obtain and renew a Let's Encrypt certificate. Ports 80 and 443 must be open.
-
-**Without a domain:** `--caddy` alone uses a self-signed certificate. Browsers will show a security warning that must be accepted.
-
-## What the Script Does
-
-1. **Prerequisites check** — Docker, NVIDIA GPU (if needed), compose file exists
-2. **Generate secrets** — `SECRET_KEY`, `NEXTAUTH_SECRET` via `openssl rand`
-3. **Generate `server/.env`** — From template, sets infrastructure defaults, configures LLM based on mode, enables `PUBLIC_MODE`
-4. **Generate `www/.env`** — Auto-detects server IP, sets URLs
-5. **Storage setup** — Either initializes Garage (bucket, keys, permissions) or prompts for external S3 credentials
-6. **Caddyfile** — Generates domain-specific (Let's Encrypt) or IP-specific (self-signed) configuration
-7. **Build & start** — Always builds GPU/CPU model image from source. With `--build`, also builds backend and frontend from source; otherwise pulls prebuilt images from the registry
-8. **Health checks** — Waits for each service, pulls Ollama model if needed, warns about missing LLM config
-
-> For a deeper dive into each step, see [How the Self-Hosted Setup Works](selfhosted-architecture.md).
-
-## Configuration Reference
-
-### Server Environment (`server/.env`)
-
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `DATABASE_URL` | PostgreSQL connection | Auto-set (Docker internal) |
-| `REDIS_HOST` | Redis hostname | Auto-set (`redis`) |
-| `SECRET_KEY` | App secret | Auto-generated |
-| `AUTH_BACKEND` | Authentication method | `none` |
-| `PUBLIC_MODE` | Allow unauthenticated access | `true` |
-| `WEBRTC_HOST` | IP advertised in WebRTC ICE candidates | Auto-detected (server IP) |
-| `TRANSCRIPT_URL` | Specialized model endpoint | `http://transcription:8000` |
-| `LLM_URL` | OpenAI-compatible LLM endpoint | Auto-set for Ollama modes |
-| `LLM_API_KEY` | LLM API key | `not-needed` for Ollama |
-| `LLM_MODEL` | LLM model name | `qwen2.5:14b` for Ollama (override with `--llm-model`) |
-| `TRANSCRIPT_STORAGE_BACKEND` | Storage backend | `aws` |
-| `TRANSCRIPT_STORAGE_AWS_*` | S3 credentials | Auto-set for Garage |
-
-### Frontend Environment (`www/.env`)
-
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `SITE_URL` | Public-facing URL | Auto-detected |
-| `API_URL` | API URL (browser-side) | Same as SITE_URL |
-| `SERVER_API_URL` | API URL (server-side) | `http://server:1250` |
-| `NEXTAUTH_SECRET` | Auth secret | Auto-generated |
-| `FEATURE_REQUIRE_LOGIN` | Require authentication | `false` |
-
-## Storage Options
-
-### Garage (Recommended for Self-Hosted)
-
-Use `--garage` flag. The script automatically:
- Generates `data/garage.toml` with a random RPC secret
- Starts the Garage container
- Creates the `reflector-media` bucket
- Creates an access key with read/write permissions
- Writes all S3 credentials to `server/.env`
-
-### External S3 (AWS, MinIO, etc.)
-
-Don't use `--garage`. The script will prompt for:
- Access Key ID
- Secret Access Key
- Bucket Name
- Region
- Endpoint URL (for non-AWS like MinIO)
-
-Or pre-fill in `server/.env`:
-```env
-TRANSCRIPT_STORAGE_BACKEND=aws
-TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID=your-key
-TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY=your-secret
-TRANSCRIPT_STORAGE_AWS_BUCKET_NAME=reflector-media
-TRANSCRIPT_STORAGE_AWS_REGION=us-east-1
-# For non-AWS S3 (MinIO, etc.):
-TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://minio:9000
-```
-
-## Enabling Authentication (Authentik)
-
-By default, authentication is disabled (`AUTH_BACKEND=none`, `FEATURE_REQUIRE_LOGIN=false`). To enable:
-
-1. Deploy an Authentik instance (see [Authentik docs](https://goauthentik.io/docs/installation))
-2. Create an OAuth2/OIDC application for Reflector
-3. Update `server/.env`:
-   ```env
-   AUTH_BACKEND=jwt
-   AUTH_JWT_AUDIENCE=your-client-id
-   ```
-4. Update `www/.env`:
-   ```env
-   FEATURE_REQUIRE_LOGIN=true
-   AUTHENTIK_ISSUER=https://authentik.example.com/application/o/reflector
-   AUTHENTIK_REFRESH_TOKEN_URL=https://authentik.example.com/application/o/token/
-   AUTHENTIK_CLIENT_ID=your-client-id
-   AUTHENTIK_CLIENT_SECRET=your-client-secret
-   ```
-5. Restart: `docker compose -f docker-compose.selfhosted.yml down && ./scripts/setup-selfhosted.sh <same-flags>`
-
-## Enabling Daily.co Live Rooms
-
-Daily.co enables real-time meeting rooms with automatic recording and transcription.
-
-1. Create a [Daily.co](https://www.daily.co/) account
-2. Add to `server/.env`:
-   ```env
-   DEFAULT_VIDEO_PLATFORM=daily
-   DAILY_API_KEY=your-daily-api-key
-   DAILY_SUBDOMAIN=your-subdomain
-   DAILY_WEBHOOK_SECRET=your-webhook-secret
-   DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco
-   DAILYCO_STORAGE_AWS_REGION=us-east-1
-   DAILYCO_STORAGE_AWS_ROLE_ARN=arn:aws:iam::role/DailyCoAccess
-   ```
-3. Restart the server: `docker compose -f docker-compose.selfhosted.yml restart server worker`
-
-## Enabling Real Domain with Let's Encrypt
-
-By default, Caddy uses self-signed certificates. For a real domain:
-
-1. Point your domain's DNS to your server's IP
-2. Ensure ports 80 and 443 are open
-3. Edit `Caddyfile`:
-   ```
-   reflector.example.com {
-       handle /v1/* {
-           reverse_proxy server:1250
-       }
-       handle /health {
-           reverse_proxy server:1250
-       }
-       handle {
-           reverse_proxy web:3000
-       }
-   }
-   ```
-4. Update `www/.env`:
-   ```env
-   SITE_URL=https://reflector.example.com
-   NEXTAUTH_URL=https://reflector.example.com
-   API_URL=https://reflector.example.com
-   ```
-5. Restart Caddy: `docker compose -f docker-compose.selfhosted.yml restart caddy web`
-
-## Troubleshooting
-
-### Check service status
-```bash
-docker compose -f docker-compose.selfhosted.yml ps
-```
-
-### View logs for a specific service
-```bash
-docker compose -f docker-compose.selfhosted.yml logs server --tail 50
-docker compose -f docker-compose.selfhosted.yml logs gpu --tail 50
-docker compose -f docker-compose.selfhosted.yml logs web --tail 50
-```
-
-### GPU service taking too long
-First start downloads ~1-2GB of ML models. Check progress:
-```bash
-docker compose -f docker-compose.selfhosted.yml logs gpu -f
-```
-
-### Server exits immediately
-Usually a database migration issue. Check:
-```bash
-docker compose -f docker-compose.selfhosted.yml logs server --tail 50
-```
-
-### Caddy certificate issues
-For self-signed certs, your browser will warn. Click Advanced > Proceed.
-For Let's Encrypt, ensure ports 80/443 are open and DNS is pointed correctly.
-
-### Summaries/topics not generating
-Check LLM configuration:
-```bash
-grep LLM_ server/.env
-```
-If you didn't use `--ollama-gpu` or `--ollama-cpu`, you must set `LLM_URL`, `LLM_API_KEY`, and `LLM_MODEL`.
-
-### Health check from inside containers
-```bash
-docker compose -f docker-compose.selfhosted.yml exec server curl http://localhost:1250/health
-docker compose -f docker-compose.selfhosted.yml exec gpu curl http://localhost:8000/docs
-```
-
-## Updating
-
-```bash
-# Option A: Pull latest prebuilt images and restart
-docker compose -f docker-compose.selfhosted.yml down
-./scripts/setup-selfhosted.sh <same-flags-as-before>
-
-# Option B: Build from source (after git pull) and restart
-git pull
-docker compose -f docker-compose.selfhosted.yml down
-./scripts/setup-selfhosted.sh <same-flags-as-before> --build
-
-# Rebuild only the GPU/CPU model image (picks up model updates)
-docker compose -f docker-compose.selfhosted.yml build gpu  # or cpu
-```
-
-The setup script is idempotent — it won't overwrite existing secrets or env vars that are already set.
-
-## Architecture Overview
-
-```
-                    ┌─────────┐
-  Internet ────────>│  Caddy  │ :80/:443
-                    └────┬────┘
-                         │
-            ┌────────────┼────────────┐
-            │            │            │
-            v            v            │
-       ┌─────────┐  ┌─────────┐      │
-       │   web   │  │ server  │      │
-       │ :3000   │  │ :1250   │      │
-       └─────────┘  └────┬────┘      │
-                         │            │
-                    ┌────┴────┐       │
-                    │ worker  │       │
-                    │  beat   │       │
-                    └────┬────┘       │
-                         │            │
-          ┌──────────────┼────────────┤
-          │              │            │
-          v              v            v
-    ┌───────────┐  ┌─────────┐  ┌─────────┐
-    │transcription│  │postgres │  │  redis  │
-    │(gpu/cpu)  │  │ :5432   │  │ :6379   │
-    │ :8000     │  └─────────┘  └─────────┘
-    └───────────┘
-          │
-    ┌─────┴─────┐     ┌─────────┐
-    │  ollama   │     │ garage  │
-    │ (optional)│     │(optional│
-    │ :11434    │     │ S3)     │
-    └───────────┘     └─────────┘
-```
-
-All services communicate over Docker's internal network. Only Caddy (if enabled) exposes ports to the internet.
--- a/node_modules/.yarn-integrity
+++ b/node_modules/.yarn-integrity
@@ -1,10 +0,0 @@
-{
-  "systemParams": "darwin-x64-83",
-  "modulesFolders": [],
-  "flags": [],
-  "linkedModules": [],
-  "topLevelPatterns": [],
-  "lockfileEntries": {},
-  "files": [],
-  "artifacts": {}
-}
--- a/scripts/install-docker-ubuntu.sh
+++ b/scripts/install-docker-ubuntu.sh
@@ -1,87 +0,0 @@
-#!/usr/bin/env bash
-#
-# Install Docker Engine + Compose plugin on Ubuntu.
-# Ubuntu's default repos don't include docker-compose-plugin, so we add Docker's official repo.
-#
-# Usage:
-#   ./scripts/install-docker-ubuntu.sh
-#
-# Requires: root or sudo
-#
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# --- Colors ---
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-CYAN='\033[0;36m'
-NC='\033[0m'
-
-info()  { echo -e "${CYAN}==>${NC} $*"; }
-ok()    { echo -e "${GREEN}  ✓${NC} $*"; }
-warn()  { echo -e "${YELLOW}  !${NC} $*"; }
-err()   { echo -e "${RED}  ✗${NC} $*" >&2; }
-
-# Use sudo if available and not root; otherwise run directly
-if [[ $(id -u) -eq 0 ]]; then
-    MAYBE_SUDO=""
-elif command -v sudo &>/dev/null; then
-    MAYBE_SUDO="sudo "
-else
-    err "Need root. Run as root or install sudo: apt install sudo"
-    exit 1
-fi
-
-# Check Ubuntu
-if [[ ! -f /etc/os-release ]]; then
-    err "Cannot detect OS. This script is for Ubuntu."
-    exit 1
-fi
-source /etc/os-release
-if [[ "${ID:-}" != "ubuntu" ]] && [[ "${ID_LIKE:-}" != *"ubuntu"* ]]; then
-    err "This script is for Ubuntu. Detected: ${ID:-unknown}"
-    exit 1
-fi
-
-info "Adding Docker's official repository..."
-${MAYBE_SUDO}apt update
-${MAYBE_SUDO}apt install -y ca-certificates curl
-${MAYBE_SUDO}install -m 0755 -d /etc/apt/keyrings
-${MAYBE_SUDO}rm -f /etc/apt/sources.list.d/docker.list /etc/apt/sources.list.d/docker.sources
-curl -fsSL https://download.docker.com/linux/ubuntu/gpg | ${MAYBE_SUDO}tee /etc/apt/keyrings/docker.asc > /dev/null
-${MAYBE_SUDO}chmod a+r /etc/apt/keyrings/docker.asc
-CODENAME="$(. /etc/os-release && echo "${UBUNTU_CODENAME:-${VERSION_CODENAME:-}}")"
-[[ -z "$CODENAME" ]] && { err "Could not detect Ubuntu version codename."; exit 1; }
-${MAYBE_SUDO}tee /etc/apt/sources.list.d/docker.sources > /dev/null <<EOF
-Types: deb
-URIs: https://download.docker.com/linux/ubuntu
-Suites: ${CODENAME}
-Components: stable
-Signed-By: /etc/apt/keyrings/docker.asc
-EOF
-
-info "Installing Docker Engine and Compose plugin..."
-${MAYBE_SUDO}apt update
-${MAYBE_SUDO}apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
-
-if [[ -d /run/systemd/system ]]; then
-    info "Enabling and starting Docker..."
-    ${MAYBE_SUDO}systemctl enable --now docker
-else
-    err "No systemd. This script requires Ubuntu with systemd (e.g. DigitalOcean droplet)."
-    exit 1
-fi
-
-DOCKER_USER="${SUDO_USER:-${USER:-root}}"
-if [[ "$DOCKER_USER" != "root" ]]; then
-    info "Adding $DOCKER_USER to docker group..."
-    ${MAYBE_SUDO}usermod -aG docker "$DOCKER_USER"
-fi
-
-ok "Docker installed successfully."
-echo ""
-echo "  Log out and back in (or run: newgrp docker) so the group change takes effect."
-echo "  Then verify with: docker compose version"
-echo ""
--- a/scripts/setup-selfhosted.sh
+++ b/scripts/setup-selfhosted.sh
@@ -1,945 +0,0 @@
-#!/usr/bin/env bash
-#
-# Self-hosted production setup for Reflector.
-# Single script to configure and launch everything on one server.
-#
-# Usage:
-#   ./scripts/setup-selfhosted.sh <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]
-#
-# Specialized models (pick ONE — required):
-#   --gpu              NVIDIA GPU for transcription/diarization/translation
-#   --cpu              CPU-only for transcription/diarization/translation (slower)
-#
-# Local LLM (optional — for summarization & topic detection):
-#   --ollama-gpu       Local Ollama with NVIDIA GPU acceleration
-#   --ollama-cpu       Local Ollama on CPU only
-#   --llm-model MODEL  Ollama model to use (default: qwen2.5:14b)
-#   (If omitted, configure an external OpenAI-compatible LLM in server/.env)
-#
-# Optional flags:
-#   --garage           Use Garage for local S3-compatible storage
-#   --caddy            Enable Caddy reverse proxy with auto-SSL
-#   --domain DOMAIN    Use a real domain for Caddy (enables Let's Encrypt auto-HTTPS)
-#                      Requires: DNS pointing to this server + ports 80/443 open
-#                      Without --domain: Caddy uses self-signed cert for IP access
-#   --build            Build backend and frontend images from source instead of pulling
-#
-# Examples:
-#   ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
-#   ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com
-#   ./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
-#   ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy
-#   ./scripts/setup-selfhosted.sh --gpu --garage --caddy
-#   ./scripts/setup-selfhosted.sh --cpu
-#
-# Idempotent — safe to re-run at any time.
-#
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
-
-COMPOSE_FILE="$ROOT_DIR/docker-compose.selfhosted.yml"
-SERVER_ENV="$ROOT_DIR/server/.env"
-WWW_ENV="$ROOT_DIR/www/.env"
-
-OLLAMA_MODEL="qwen2.5:14b"
-OS="$(uname -s)"
-
-# --- Colors ---
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-CYAN='\033[0;36m'
-NC='\033[0m'
-
-info()  { echo -e "${CYAN}==>${NC} $*"; }
-ok()    { echo -e "${GREEN}  ✓${NC} $*"; }
-warn()  { echo -e "${YELLOW}  !${NC} $*"; }
-err()   { echo -e "${RED}  ✗${NC} $*" >&2; }
-
-# --- Helpers ---
-
-dump_diagnostics() {
-    local failed_svc="${1:-}"
-    echo ""
-    err "========== DIAGNOSTICS =========="
-
-    err "Container status:"
-    compose_cmd ps -a --format "table {{.Name}}\t{{.Status}}" 2>/dev/null || true
-    echo ""
-
-    local stopped
-    stopped=$(compose_cmd ps -a --format '{{.Name}}\t{{.Status}}' 2>/dev/null \
-        | grep -iv 'up\|running' | awk -F'\t' '{print $1}' || true)
-    for c in $stopped; do
-        err "--- Logs for $c (exited/unhealthy) ---"
-        docker logs --tail 30 "$c" 2>&1 || true
-        echo ""
-    done
-
-    if [[ -n "$failed_svc" ]]; then
-        err "--- Logs for $failed_svc (last 40) ---"
-        compose_cmd logs "$failed_svc" --tail 40 2>&1 || true
-    fi
-
-    err "================================="
-}
-
-trap 'dump_diagnostics' ERR
-
-detect_lan_ip() {
-    case "$OS" in
-        Darwin)
-            for iface in en0 en1 en2 en3; do
-                local ip
-                ip=$(ipconfig getifaddr "$iface" 2>/dev/null || true)
-                if [[ -n "$ip" ]]; then
-                    echo "$ip"
-                    return
-                fi
-            done
-            ;;
-        Linux)
-            ip route get 1.1.1.1 2>/dev/null | sed -n 's/.*src \([^ ]*\).*/\1/p'
-            return
-            ;;
-    esac
-    echo ""
-}
-
-wait_for_url() {
-    local url="$1" label="$2" retries="${3:-30}" interval="${4:-2}"
-    for i in $(seq 1 "$retries"); do
-        if curl -sf "$url" > /dev/null 2>&1; then
-            return 0
-        fi
-        echo -ne "\r  Waiting for $label... ($i/$retries)"
-        sleep "$interval"
-    done
-    echo ""
-    err "$label not responding at $url after $retries attempts"
-    return 1
-}
-
-env_has_key() {
-    local file="$1" key="$2"
-    grep -q "^${key}=" "$file" 2>/dev/null
-}
-
-env_get() {
-    local file="$1" key="$2"
-    grep "^${key}=" "$file" 2>/dev/null | head -1 | cut -d= -f2-
-}
-
-env_set() {
-    local file="$1" key="$2" value="$3"
-    if env_has_key "$file" "$key"; then
-        if [[ "$OS" == "Darwin" ]]; then
-            sed -i '' "s|^${key}=.*|${key}=${value}|" "$file"
-        else
-            sed -i "s|^${key}=.*|${key}=${value}|" "$file"
-        fi
-    else
-        echo "${key}=${value}" >> "$file"
-    fi
-}
-
-compose_cmd() {
-    local profiles=""
-    for p in "${COMPOSE_PROFILES[@]}"; do
-        profiles="$profiles --profile $p"
-    done
-    docker compose -f "$COMPOSE_FILE" $profiles "$@"
-}
-
-# Compose command with only garage profile (for garage-only operations before full stack start)
-compose_garage_cmd() {
-    docker compose -f "$COMPOSE_FILE" --profile garage "$@"
-}
-
-# --- Parse arguments ---
-MODEL_MODE=""       # gpu or cpu (required, mutually exclusive)
-OLLAMA_MODE=""      # ollama-gpu or ollama-cpu (optional)
-USE_GARAGE=false
-USE_CADDY=false
-CUSTOM_DOMAIN=""    # optional domain for Let's Encrypt HTTPS
-BUILD_IMAGES=false  # build backend/frontend from source
-
-SKIP_NEXT=false
-ARGS=("$@")
-for i in "${!ARGS[@]}"; do
-    if [[ "$SKIP_NEXT" == "true" ]]; then
-        SKIP_NEXT=false
-        continue
-    fi
-    arg="${ARGS[$i]}"
-    case "$arg" in
-        --gpu)
-            [[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; }
-            MODEL_MODE="gpu" ;;
-        --cpu)
-            [[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; }
-            MODEL_MODE="cpu" ;;
-        --ollama-gpu)
-            [[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; }
-            OLLAMA_MODE="ollama-gpu" ;;
-        --ollama-cpu)
-            [[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; }
-            OLLAMA_MODE="ollama-cpu" ;;
-        --llm-model)
-            next_i=$((i + 1))
-            if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
-                err "--llm-model requires a model name (e.g. --llm-model mistral)"
-                exit 1
-            fi
-            OLLAMA_MODEL="${ARGS[$next_i]}"
-            SKIP_NEXT=true ;;
-        --garage)       USE_GARAGE=true ;;
-        --caddy)        USE_CADDY=true ;;
-        --build)        BUILD_IMAGES=true ;;
-        --domain)
-            next_i=$((i + 1))
-            if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
-                err "--domain requires a domain name (e.g. --domain reflector.example.com)"
-                exit 1
-            fi
-            CUSTOM_DOMAIN="${ARGS[$next_i]}"
-            USE_CADDY=true  # --domain implies --caddy
-            SKIP_NEXT=true ;;
-        *)
-            err "Unknown argument: $arg"
-            err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]"
-            exit 1
-            ;;
-    esac
-done
-
-if [[ -z "$MODEL_MODE" ]]; then
-    err "No model mode specified. You must choose --gpu or --cpu."
-    err ""
-    err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]"
-    err ""
-    err "Specialized models (required):"
-    err "  --gpu              NVIDIA GPU for transcription/diarization/translation"
-    err "  --cpu              CPU-only (slower but works without GPU)"
-    err ""
-    err "Local LLM (optional):"
-    err "  --ollama-gpu       Local Ollama with GPU (for summarization/topics)"
-    err "  --ollama-cpu       Local Ollama on CPU (for summarization/topics)"
-    err "  --llm-model MODEL  Ollama model to download (default: qwen2.5:14b)"
-    err "  (omit --ollama-* for external OpenAI-compatible LLM)"
-    err ""
-    err "Other options:"
-    err "  --garage           Local S3-compatible storage (Garage)"
-    err "  --caddy            Caddy reverse proxy with self-signed cert"
-    err "  --domain DOMAIN    Use a real domain with Let's Encrypt HTTPS (implies --caddy)"
-    err "  --build            Build backend/frontend images from source instead of pulling"
-    exit 1
-fi
-
-# Build profiles list — one profile per feature
-COMPOSE_PROFILES=("$MODEL_MODE")
-[[ -n "$OLLAMA_MODE" ]] && COMPOSE_PROFILES+=("$OLLAMA_MODE")
-[[ "$USE_GARAGE" == "true" ]] && COMPOSE_PROFILES+=("garage")
-[[ "$USE_CADDY" == "true" ]] && COMPOSE_PROFILES+=("caddy")
-
-# Derived flags
-NEEDS_NVIDIA=false
-[[ "$MODEL_MODE" == "gpu" ]] && NEEDS_NVIDIA=true
-[[ "$OLLAMA_MODE" == "ollama-gpu" ]] && NEEDS_NVIDIA=true
-
-USES_OLLAMA=false
-OLLAMA_SVC=""
-[[ "$OLLAMA_MODE" == "ollama-gpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama"
-[[ "$OLLAMA_MODE" == "ollama-cpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama-cpu"
-
-# Human-readable mode string for display
-MODE_DISPLAY="$MODEL_MODE"
-[[ -n "$OLLAMA_MODE" ]] && MODE_DISPLAY="$MODEL_MODE + $OLLAMA_MODE"
-
-# =========================================================
-# Step 0: Prerequisites
-# =========================================================
-step_prerequisites() {
-    info "Step 0: Checking prerequisites"
-
-    # Docker
-    if ! docker compose version 2>/dev/null | grep -qi compose; then
-        err "Docker Compose V2 not found."
-        err "Install Docker with Compose V2: https://docs.docker.com/engine/install/"
-        exit 1
-    fi
-    if ! docker info &>/dev/null; then
-        err "Docker daemon not running."
-        exit 1
-    fi
-    ok "Docker + Compose V2 ready"
-
-    # NVIDIA GPU check
-    if [[ "$NEEDS_NVIDIA" == "true" ]]; then
-        if ! command -v nvidia-smi &>/dev/null || ! nvidia-smi &>/dev/null; then
-            err "NVIDIA GPU required (model=$MODEL_MODE, ollama=$OLLAMA_MODE) but nvidia-smi failed."
-            err "Install NVIDIA drivers and nvidia-container-toolkit."
-            exit 1
-        fi
-        ok "NVIDIA GPU detected"
-    fi
-
-    # Compose file
-    if [[ ! -f "$COMPOSE_FILE" ]]; then
-        err "docker-compose.selfhosted.yml not found at $COMPOSE_FILE"
-        err "Run this script from the repo root: ./scripts/setup-selfhosted.sh"
-        exit 1
-    fi
-
-    ok "Prerequisites OK (models=$MODEL_MODE, ollama=$OLLAMA_MODE, garage=$USE_GARAGE, caddy=$USE_CADDY)"
-}
-
-# =========================================================
-# Step 1: Generate secrets
-# =========================================================
-step_secrets() {
-    info "Step 1: Generating secrets"
-
-    # These are used in later steps — generate once, reuse
-    if [[ -f "$SERVER_ENV" ]] && env_has_key "$SERVER_ENV" "SECRET_KEY"; then
-        SECRET_KEY=$(env_get "$SERVER_ENV" "SECRET_KEY")
-        if [[ "$SECRET_KEY" != "changeme"* ]]; then
-            ok "SECRET_KEY already set"
-        else
-            SECRET_KEY=$(openssl rand -hex 32)
-        fi
-    else
-        SECRET_KEY=$(openssl rand -hex 32)
-    fi
-
-    if [[ -f "$WWW_ENV" ]] && env_has_key "$WWW_ENV" "NEXTAUTH_SECRET"; then
-        NEXTAUTH_SECRET=$(env_get "$WWW_ENV" "NEXTAUTH_SECRET")
-        if [[ "$NEXTAUTH_SECRET" != "changeme"* ]]; then
-            ok "NEXTAUTH_SECRET already set"
-        else
-            NEXTAUTH_SECRET=$(openssl rand -hex 32)
-        fi
-    else
-        NEXTAUTH_SECRET=$(openssl rand -hex 32)
-    fi
-
-    ok "Secrets ready"
-}
-
-# =========================================================
-# Step 2: Generate server/.env
-# =========================================================
-step_server_env() {
-    info "Step 2: Generating server/.env"
-
-    if [[ -f "$SERVER_ENV" ]]; then
-        ok "server/.env already exists — ensuring required vars"
-    else
-        cp "$ROOT_DIR/server/.env.selfhosted.example" "$SERVER_ENV"
-        ok "Created server/.env from template"
-    fi
-
-    # Core infrastructure
-    env_set "$SERVER_ENV" "DATABASE_URL" "postgresql+asyncpg://reflector:reflector@postgres:5432/reflector"
-    env_set "$SERVER_ENV" "REDIS_HOST" "redis"
-    env_set "$SERVER_ENV" "CELERY_BROKER_URL" "redis://redis:6379/1"
-    env_set "$SERVER_ENV" "CELERY_RESULT_BACKEND" "redis://redis:6379/1"
-    env_set "$SERVER_ENV" "SECRET_KEY" "$SECRET_KEY"
-    env_set "$SERVER_ENV" "AUTH_BACKEND" "none"
-    env_set "$SERVER_ENV" "PUBLIC_MODE" "true"
-
-    # Public-facing URLs
-    local server_base_url
-    if [[ -n "$CUSTOM_DOMAIN" ]]; then
-        server_base_url="https://$CUSTOM_DOMAIN"
-    elif [[ "$USE_CADDY" == "true" ]]; then
-        if [[ -n "$PRIMARY_IP" ]]; then
-            server_base_url="https://$PRIMARY_IP"
-        else
-            server_base_url="https://localhost"
-        fi
-    else
-        if [[ -n "$PRIMARY_IP" ]]; then
-            server_base_url="http://$PRIMARY_IP"
-        else
-            server_base_url="http://localhost:1250"
-        fi
-    fi
-    env_set "$SERVER_ENV" "BASE_URL" "$server_base_url"
-    env_set "$SERVER_ENV" "CORS_ORIGIN" "$server_base_url"
-
-    # WebRTC: advertise host IP in ICE candidates so browsers can reach the server
-    if [[ -n "$PRIMARY_IP" ]]; then
-        env_set "$SERVER_ENV" "WEBRTC_HOST" "$PRIMARY_IP"
-    fi
-
-    # Specialized models (always via gpu/cpu container aliased as "transcription")
-    env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
-    env_set "$SERVER_ENV" "TRANSCRIPT_URL" "http://transcription:8000"
-    env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "selfhosted"
-    env_set "$SERVER_ENV" "DIARIZATION_ENABLED" "true"
-    env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
-    env_set "$SERVER_ENV" "DIARIZATION_URL" "http://transcription:8000"
-    env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
-    env_set "$SERVER_ENV" "TRANSLATE_URL" "http://transcription:8000"
-
-    # HuggingFace token for gated models (pyannote diarization)
-    # Written to root .env so docker compose picks it up for gpu/cpu containers
-    local root_env="$ROOT_DIR/.env"
-    local current_hf_token="${HF_TOKEN:-}"
-    if [[ -f "$root_env" ]] && env_has_key "$root_env" "HF_TOKEN"; then
-        current_hf_token=$(env_get "$root_env" "HF_TOKEN")
-    fi
-    if [[ -z "$current_hf_token" ]]; then
-        echo ""
-        warn "HF_TOKEN not set. Diarization will use a public model fallback."
-        warn "For best results, get a token at https://huggingface.co/settings/tokens"
-        warn "and accept pyannote licenses at https://huggingface.co/pyannote/speaker-diarization-3.1"
-        read -rp "  HuggingFace token (or press Enter to skip): " current_hf_token
-    fi
-    if [[ -n "$current_hf_token" ]]; then
-        touch "$root_env"
-        env_set "$root_env" "HF_TOKEN" "$current_hf_token"
-        export HF_TOKEN="$current_hf_token"
-        ok "HF_TOKEN configured"
-    else
-        touch "$root_env"
-        env_set "$root_env" "HF_TOKEN" ""
-        ok "HF_TOKEN skipped (using public model fallback)"
-    fi
-
-    # LLM configuration
-    if [[ "$USES_OLLAMA" == "true" ]]; then
-        local llm_host="$OLLAMA_SVC"
-        env_set "$SERVER_ENV" "LLM_URL" "http://${llm_host}:11434/v1"
-        env_set "$SERVER_ENV" "LLM_MODEL" "$OLLAMA_MODEL"
-        env_set "$SERVER_ENV" "LLM_API_KEY" "not-needed"
-        ok "LLM configured for local Ollama ($llm_host, model=$OLLAMA_MODEL)"
-    else
-        # Check if user already configured LLM
-        local current_llm_url=""
-        if env_has_key "$SERVER_ENV" "LLM_URL"; then
-            current_llm_url=$(env_get "$SERVER_ENV" "LLM_URL")
-        fi
-        if [[ -z "$current_llm_url" ]] || [[ "$current_llm_url" == "http://host.docker.internal"* ]]; then
-            warn "LLM not configured. Summarization and topic detection will NOT work."
-            warn "Edit server/.env and set LLM_URL, LLM_API_KEY, LLM_MODEL"
-            warn "Example: LLM_URL=https://api.openai.com/v1  LLM_MODEL=gpt-4o-mini"
-        else
-            ok "LLM already configured: $current_llm_url"
-        fi
-    fi
-
-    ok "server/.env ready"
-}
-
-# =========================================================
-# Step 3: Generate www/.env
-# =========================================================
-step_www_env() {
-    info "Step 3: Generating www/.env"
-
-    if [[ -f "$WWW_ENV" ]]; then
-        ok "www/.env already exists — ensuring required vars"
-    else
-        cp "$ROOT_DIR/www/.env.selfhosted.example" "$WWW_ENV"
-        ok "Created www/.env from template"
-    fi
-
-    # Public-facing URL for frontend
-    local base_url
-    if [[ -n "$CUSTOM_DOMAIN" ]]; then
-        base_url="https://$CUSTOM_DOMAIN"
-    elif [[ "$USE_CADDY" == "true" ]]; then
-        if [[ -n "$PRIMARY_IP" ]]; then
-            base_url="https://$PRIMARY_IP"
-        else
-            base_url="https://localhost"
-        fi
-    else
-        # No Caddy — user's proxy handles SSL. Use http for now, they'll override.
-        if [[ -n "$PRIMARY_IP" ]]; then
-            base_url="http://$PRIMARY_IP"
-        else
-            base_url="http://localhost"
-        fi
-    fi
-
-    env_set "$WWW_ENV" "SITE_URL" "$base_url"
-    env_set "$WWW_ENV" "NEXTAUTH_URL" "$base_url"
-    env_set "$WWW_ENV" "NEXTAUTH_SECRET" "$NEXTAUTH_SECRET"
-    env_set "$WWW_ENV" "API_URL" "$base_url"
-    env_set "$WWW_ENV" "WEBSOCKET_URL" "auto"
-    env_set "$WWW_ENV" "SERVER_API_URL" "http://server:1250"
-    env_set "$WWW_ENV" "KV_URL" "redis://redis:6379"
-    env_set "$WWW_ENV" "FEATURE_REQUIRE_LOGIN" "false"
-
-    ok "www/.env ready (URL=$base_url)"
-}
-
-# =========================================================
-# Step 4: Storage setup
-# =========================================================
-step_storage() {
-    info "Step 4: Storage setup"
-
-    if [[ "$USE_GARAGE" == "true" ]]; then
-        step_garage
-    else
-        step_external_s3
-    fi
-}
-
-step_garage() {
-    info "Configuring Garage (local S3)"
-
-    # Generate garage.toml from template
-    local garage_toml="$ROOT_DIR/scripts/garage.toml"
-    local garage_runtime="$ROOT_DIR/data/garage.toml"
-    mkdir -p "$ROOT_DIR/data"
-
-    if [[ -d "$garage_runtime" ]]; then
-        rm -rf "$garage_runtime"
-    fi
-    if [[ ! -f "$garage_runtime" ]]; then
-        local rpc_secret
-        rpc_secret=$(openssl rand -hex 32)
-        sed "s|__GARAGE_RPC_SECRET__|${rpc_secret}|" "$garage_toml" > "$garage_runtime"
-        ok "Generated data/garage.toml"
-    else
-        ok "data/garage.toml already exists"
-    fi
-
-    # Start garage container only
-    compose_garage_cmd up -d garage
-
-    # Wait for admin API (port 3903 exposed to host for health checks)
-    local garage_ready=false
-    for i in $(seq 1 30); do
-        if curl -sf http://localhost:3903/metrics > /dev/null 2>&1; then
-            garage_ready=true
-            break
-        fi
-        echo -ne "\r  Waiting for Garage admin API... ($i/30)"
-        sleep 2
-    done
-    echo ""
-    if [[ "$garage_ready" != "true" ]]; then
-        err "Garage not responding. Check: docker compose logs garage"
-        exit 1
-    fi
-
-    # Layout
-    local node_id
-    node_id=$(compose_garage_cmd exec -T garage /garage node id -q 2>/dev/null | tr -d '[:space:]')
-    local layout_status
-    layout_status=$(compose_garage_cmd exec -T garage /garage layout show 2>&1 || true)
-    if echo "$layout_status" | grep -q "No nodes"; then
-        compose_garage_cmd exec -T garage /garage layout assign "$node_id" -c 1G -z dc1
-        compose_garage_cmd exec -T garage /garage layout apply --version 1
-    fi
-
-    # Bucket
-    if ! compose_garage_cmd exec -T garage /garage bucket info reflector-media &>/dev/null; then
-        compose_garage_cmd exec -T garage /garage bucket create reflector-media
-    fi
-
-    # Key
-    local created_key=false
-    if compose_garage_cmd exec -T garage /garage key info reflector &>/dev/null; then
-        ok "Key 'reflector' already exists"
-    else
-        KEY_OUTPUT=$(compose_garage_cmd exec -T garage /garage key create reflector)
-        created_key=true
-    fi
-
-    # Permissions
-    compose_garage_cmd exec -T garage /garage bucket allow reflector-media --read --write --key reflector
-
-    # Write S3 credentials to server/.env
-    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_BACKEND" "aws"
-    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL" "http://garage:3900"
-    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_BUCKET_NAME" "reflector-media"
-    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_REGION" "garage"
-    if [[ "$created_key" == "true" ]]; then
-        local key_id key_secret
-        key_id=$(echo "$KEY_OUTPUT" | grep -i "key id" | awk '{print $NF}')
-        key_secret=$(echo "$KEY_OUTPUT" | grep -i "secret key" | awk '{print $NF}')
-        env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID" "$key_id"
-        env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY" "$key_secret"
-    fi
-
-    ok "Garage storage ready"
-}
-
-step_external_s3() {
-    info "Checking external S3 configuration"
-
-    env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_BACKEND" "aws"
-
-    local s3_vars=("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID" "TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY" "TRANSCRIPT_STORAGE_AWS_BUCKET_NAME" "TRANSCRIPT_STORAGE_AWS_REGION")
-    local missing=()
-
-    for var in "${s3_vars[@]}"; do
-        if ! env_has_key "$SERVER_ENV" "$var" || [[ -z "$(env_get "$SERVER_ENV" "$var")" ]]; then
-            missing+=("$var")
-        fi
-    done
-
-    if [[ ${#missing[@]} -gt 0 ]]; then
-        warn "S3 storage is REQUIRED. The following vars are missing in server/.env:"
-        for var in "${missing[@]}"; do
-            warn "  $var"
-        done
-        echo ""
-        info "Enter S3 credentials (or press Ctrl+C to abort and edit server/.env manually):"
-        echo ""
-
-        for var in "${missing[@]}"; do
-            local prompt_label
-            case "$var" in
-                *ACCESS_KEY_ID)      prompt_label="Access Key ID" ;;
-                *SECRET_ACCESS_KEY)  prompt_label="Secret Access Key" ;;
-                *BUCKET_NAME)        prompt_label="Bucket Name" ;;
-                *REGION)             prompt_label="Region (e.g. us-east-1)" ;;
-            esac
-            local value=""
-            while [[ -z "$value" ]]; do
-                read -rp "  $prompt_label: " value
-            done
-            env_set "$SERVER_ENV" "$var" "$value"
-        done
-
-        # Optional: endpoint URL for non-AWS S3
-        echo ""
-        read -rp "  S3 Endpoint URL (leave empty for AWS, or enter for MinIO/etc.): " endpoint_url
-        if [[ -n "$endpoint_url" ]]; then
-            env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL" "$endpoint_url"
-        fi
-    fi
-
-    ok "S3 storage configured"
-}
-
-# =========================================================
-# Step 5: Caddyfile
-# =========================================================
-step_caddyfile() {
-    if [[ "$USE_CADDY" != "true" ]]; then
-        return
-    fi
-
-    info "Step 5: Caddyfile setup"
-
-    local caddyfile="$ROOT_DIR/Caddyfile"
-    if [[ -d "$caddyfile" ]]; then
-        rm -rf "$caddyfile"
-    fi
-
-    if [[ -n "$CUSTOM_DOMAIN" ]]; then
-        # Real domain: Caddy auto-provisions Let's Encrypt certificate
-        cat > "$caddyfile" << CADDYEOF
-# Generated by setup-selfhosted.sh — Let's Encrypt HTTPS for $CUSTOM_DOMAIN
-$CUSTOM_DOMAIN {
-    handle /v1/* {
-        reverse_proxy server:1250
-    }
-    handle /health {
-        reverse_proxy server:1250
-    }
-    handle {
-        reverse_proxy web:3000
-    }
-}
-CADDYEOF
-        ok "Created Caddyfile for $CUSTOM_DOMAIN (Let's Encrypt auto-HTTPS)"
-    elif [[ -n "$PRIMARY_IP" ]]; then
-        # No domain, IP only: catch-all :443 with self-signed cert
-        # (IP connections don't send SNI, so we can't match by address)
-        cat > "$caddyfile" << CADDYEOF
-# Generated by setup-selfhosted.sh — self-signed cert for IP access
-:443 {
-    tls internal
-    handle /v1/* {
-        reverse_proxy server:1250
-    }
-    handle /health {
-        reverse_proxy server:1250
-    }
-    handle {
-        reverse_proxy web:3000
-    }
-}
-CADDYEOF
-        ok "Created Caddyfile for $PRIMARY_IP (catch-all :443 with self-signed cert)"
-    elif [[ ! -f "$caddyfile" ]]; then
-        cp "$ROOT_DIR/Caddyfile.selfhosted.example" "$caddyfile"
-        ok "Created Caddyfile from template"
-    else
-        ok "Caddyfile already exists"
-    fi
-}
-
-# =========================================================
-# Step 6: Start services
-# =========================================================
-step_services() {
-    info "Step 6: Starting Docker services"
-
-    # Build GPU/CPU image from source (always needed — no prebuilt image)
-    local build_svc="$MODEL_MODE"
-    info "Building $build_svc image (first build downloads ML models, may take a while)..."
-    compose_cmd build "$build_svc"
-    ok "$build_svc image built"
-
-    # Build or pull backend and frontend images
-    if [[ "$BUILD_IMAGES" == "true" ]]; then
-        info "Building backend image from source (server, worker, beat)..."
-        compose_cmd build server worker beat
-        ok "Backend image built"
-        info "Building frontend image from source..."
-        compose_cmd build web
-        ok "Frontend image built"
-    else
-        info "Pulling latest backend and frontend images..."
-        compose_cmd pull server web || warn "Pull failed — using cached images"
-    fi
-
-    # Start all services
-    compose_cmd up -d
-    ok "Containers started"
-
-    # Quick sanity check
-    sleep 3
-    local exited
-    exited=$(compose_cmd ps -a --format '{{.Name}} {{.Status}}' 2>/dev/null \
-        | grep -i 'exit' || true)
-    if [[ -n "$exited" ]]; then
-        warn "Some containers exited immediately:"
-        echo "$exited" | while read -r line; do warn "  $line"; done
-        dump_diagnostics
-    fi
-}
-
-# =========================================================
-# Step 7: Health checks
-# =========================================================
-step_health() {
-    info "Step 7: Health checks"
-
-    # Specialized model service (gpu or cpu)
-    local model_svc="$MODEL_MODE"
-
-    info "Waiting for $model_svc service (first start downloads ~1GB of models)..."
-    local model_ok=false
-    for i in $(seq 1 120); do
-        if curl -sf http://localhost:8000/docs > /dev/null 2>&1; then
-            model_ok=true
-            break
-        fi
-        echo -ne "\r  Waiting for $model_svc service... ($i/120)"
-        sleep 5
-    done
-    echo ""
-    if [[ "$model_ok" == "true" ]]; then
-        ok "$model_svc service healthy (transcription + diarization)"
-    else
-        warn "$model_svc service not ready yet — it will keep loading in the background"
-        warn "Check with: docker compose -f docker-compose.selfhosted.yml logs $model_svc"
-    fi
-
-    # Ollama (if applicable)
-    if [[ "$USES_OLLAMA" == "true" ]]; then
-        info "Waiting for Ollama service..."
-        local ollama_ok=false
-        for i in $(seq 1 60); do
-            if curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; then
-                ollama_ok=true
-                break
-            fi
-            echo -ne "\r  Waiting for Ollama... ($i/60)"
-            sleep 3
-        done
-        echo ""
-        if [[ "$ollama_ok" == "true" ]]; then
-            ok "Ollama service healthy"
-
-            # Pull model if not present
-            if compose_cmd exec -T "$OLLAMA_SVC" ollama list 2>/dev/null | awk '{print $1}' | grep -qxF "$OLLAMA_MODEL"; then
-                ok "Model $OLLAMA_MODEL already pulled"
-            else
-                info "Pulling model $OLLAMA_MODEL (this may take a while)..."
-                compose_cmd exec -T "$OLLAMA_SVC" ollama pull "$OLLAMA_MODEL"
-                ok "Model $OLLAMA_MODEL pulled"
-            fi
-        else
-            warn "Ollama not ready yet. Check: docker compose logs $OLLAMA_SVC"
-        fi
-    fi
-
-    # Server API
-    info "Waiting for Server API (first run includes database migrations)..."
-    local server_ok=false
-    for i in $(seq 1 90); do
-        local svc_status
-        svc_status=$(compose_cmd ps server --format '{{.Status}}' 2>/dev/null || true)
-        if [[ -z "$svc_status" ]] || echo "$svc_status" | grep -qi 'exit'; then
-            echo ""
-            err "Server container exited unexpectedly"
-            dump_diagnostics server
-            exit 1
-        fi
-        if curl -sf http://localhost:1250/health > /dev/null 2>&1; then
-            server_ok=true
-            break
-        fi
-        echo -ne "\r  Waiting for Server API... ($i/90)"
-        sleep 5
-    done
-    echo ""
-    if [[ "$server_ok" == "true" ]]; then
-        ok "Server API healthy"
-    else
-        err "Server API not ready after ~7 minutes"
-        dump_diagnostics server
-        exit 1
-    fi
-
-    # Frontend
-    info "Waiting for Frontend..."
-    local web_ok=false
-    for i in $(seq 1 30); do
-        if curl -sf http://localhost:3000 > /dev/null 2>&1; then
-            web_ok=true
-            break
-        fi
-        echo -ne "\r  Waiting for Frontend... ($i/30)"
-        sleep 3
-    done
-    echo ""
-    if [[ "$web_ok" == "true" ]]; then
-        ok "Frontend healthy"
-    else
-        warn "Frontend not responding. Check: docker compose logs web"
-    fi
-
-    # Caddy
-    if [[ "$USE_CADDY" == "true" ]]; then
-        sleep 2
-        if curl -sfk "https://localhost" > /dev/null 2>&1; then
-            ok "Caddy proxy healthy"
-        else
-            warn "Caddy proxy not responding. Check: docker compose logs caddy"
-        fi
-    fi
-
-    # LLM warning for non-Ollama modes
-    if [[ "$USES_OLLAMA" == "false" ]]; then
-        local llm_url=""
-        if env_has_key "$SERVER_ENV" "LLM_URL"; then
-            llm_url=$(env_get "$SERVER_ENV" "LLM_URL")
-        fi
-        if [[ -z "$llm_url" ]]; then
-            echo ""
-            warn "LLM is not configured. Transcription will work, but:"
-            warn "  - Summaries will NOT be generated"
-            warn "  - Topics will NOT be detected"
-            warn "  - Titles will NOT be auto-generated"
-            warn "Configure in server/.env: LLM_URL, LLM_API_KEY, LLM_MODEL"
-        fi
-    fi
-}
-
-# =========================================================
-# Main
-# =========================================================
-main() {
-    echo ""
-    echo "=========================================="
-    echo " Reflector — Self-Hosted Production Setup"
-    echo "=========================================="
-    echo ""
-    echo "  Models:  $MODEL_MODE"
-    echo "  LLM:     ${OLLAMA_MODE:-external}"
-    echo "  Garage:  $USE_GARAGE"
-    echo "  Caddy:   $USE_CADDY"
-    [[ -n "$CUSTOM_DOMAIN" ]] && echo "  Domain:  $CUSTOM_DOMAIN"
-    [[ "$BUILD_IMAGES" == "true" ]] && echo "  Build:   from source"
-    echo ""
-
-    # Detect primary IP
-    PRIMARY_IP=""
-    if [[ "$OS" == "Linux" ]]; then
-        PRIMARY_IP=$(hostname -I 2>/dev/null | awk '{print $1}' || true)
-        if [[ "$PRIMARY_IP" == "127."* ]] || [[ -z "$PRIMARY_IP" ]]; then
-            PRIMARY_IP=$(ip -4 route get 1 2>/dev/null | sed -n 's/.*src \([0-9.]*\).*/\1/p' || true)
-        fi
-    fi
-
-    # Touch env files so compose doesn't complain about missing env_file
-    mkdir -p "$ROOT_DIR/data"
-    touch "$SERVER_ENV" "$WWW_ENV"
-
-    # Ensure garage.toml exists if garage profile is active (compose needs it for volume mount)
-    if [[ "$USE_GARAGE" == "true" ]]; then
-        local garage_runtime="$ROOT_DIR/data/garage.toml"
-        if [[ ! -f "$garage_runtime" ]]; then
-            local rpc_secret
-            rpc_secret=$(openssl rand -hex 32)
-            sed "s|__GARAGE_RPC_SECRET__|${rpc_secret}|" "$ROOT_DIR/scripts/garage.toml" > "$garage_runtime"
-        fi
-    fi
-
-    step_prerequisites
-    echo ""
-    step_secrets
-    echo ""
-    step_server_env
-    echo ""
-    step_www_env
-    echo ""
-    step_storage
-    echo ""
-    step_caddyfile
-    echo ""
-    step_services
-    echo ""
-    step_health
-
-    echo ""
-    echo "=========================================="
-    echo -e " ${GREEN}Reflector is running!${NC}"
-    echo "=========================================="
-    echo ""
-    if [[ "$USE_CADDY" == "true" ]]; then
-        if [[ -n "$CUSTOM_DOMAIN" ]]; then
-            echo "  App:   https://$CUSTOM_DOMAIN"
-            echo "  API:   https://$CUSTOM_DOMAIN/v1/"
-        elif [[ -n "$PRIMARY_IP" ]]; then
-            echo "  App:   https://$PRIMARY_IP  (accept self-signed cert in browser)"
-            echo "  API:   https://$PRIMARY_IP/v1/"
-            echo "  Local: https://localhost"
-        else
-            echo "  App:   https://localhost  (accept self-signed cert in browser)"
-            echo "  API:   https://localhost/v1/"
-        fi
-    else
-        echo "  No Caddy — point your reverse proxy at:"
-        echo "    Frontend:  web:3000   (or localhost:3000 from host)"
-        echo "    API:       server:1250 (or localhost:1250 from host)"
-    fi
-    echo ""
-    echo "  Models:  $MODEL_MODE (transcription/diarization/translation)"
-    [[ "$USE_GARAGE" == "true" ]] && echo "  Storage: Garage (local S3)"
-    [[ "$USE_GARAGE" != "true" ]] && echo "  Storage: External S3"
-    [[ "$USES_OLLAMA" == "true" ]] && echo "  LLM:     Ollama ($OLLAMA_MODEL) for summarization/topics"
-    [[ "$USES_OLLAMA" != "true" ]] && echo "  LLM:     External (configure in server/.env)"
-    echo ""
-    echo "  To stop:   docker compose -f docker-compose.selfhosted.yml down"
-    echo "  To re-run: ./scripts/setup-selfhosted.sh $*"
-    echo ""
-}
-
-main "$@"
--- a/scripts/setup-standalone.sh
+++ b/scripts/setup-standalone.sh
@@ -35,98 +35,6 @@ err()   { echo -e "${RED}  ✗${NC} $*" >&2; }

 # --- Helpers ---

-dump_diagnostics() {
-    local failed_svc="${1:-}"
-    echo ""
-    err "========== DIAGNOSTICS =========="
-
-    err "Container status:"
-    compose_cmd ps -a --format "table {{.Name}}\t{{.Status}}" 2>/dev/null || true
-    echo ""
-
-    # Show logs for any container that exited
-    local stopped
-    stopped=$(compose_cmd ps -a --format '{{.Name}}\t{{.Status}}' 2>/dev/null \
-        | grep -iv 'up\|running' | awk -F'\t' '{print $1}' || true)
-    for c in $stopped; do
-        err "--- Logs for $c (exited/unhealthy) ---"
-        docker logs --tail 30 "$c" 2>&1 || true
-        echo ""
-    done
-
-    # If a specific service failed, always show its logs
-    if [[ -n "$failed_svc" ]]; then
-        err "--- Logs for $failed_svc (last 40) ---"
-        compose_cmd logs "$failed_svc" --tail 40 2>&1 || true
-        echo ""
-        # Try health check from inside the container as extra signal
-        err "--- Internal health check ($failed_svc) ---"
-        compose_cmd exec -T "$failed_svc" \
-            curl -sf http://localhost:1250/health 2>&1 || echo "(not reachable internally either)"
-    fi
-
-    err "================================="
-}
-
-trap 'dump_diagnostics' ERR
-
-# Get the image ID for a compose service (works even when containers are not running).
-svc_image_id() {
-    local svc="$1"
-    # Extract image name from compose config YAML, fall back to <project>-<service>
-    local img_name
-    img_name=$(compose_cmd config 2>/dev/null \
-        | sed -n "/^  ${svc}:/,/^  [a-z]/p" | grep '^\s*image:' | awk '{print $2}')
-    img_name="${img_name:-reflector-$svc}"
-    docker images -q "$img_name" 2>/dev/null | head -1
-}
-
-# Ensure images with build contexts are up-to-date.
-# Docker layer cache makes this fast (~seconds) when source hasn't changed.
-rebuild_images() {
-    local svc
-    for svc in web cpu; do
-        local old_id
-        old_id=$(svc_image_id "$svc")
-        old_id="${old_id:-<none>}"
-
-        info "Building $svc..."
-        compose_cmd build "$svc"
-
-        local new_id
-        new_id=$(svc_image_id "$svc")
-
-        if [[ "$old_id" == "$new_id" ]]; then
-            ok "$svc unchanged (${new_id:0:12})"
-        else
-            ok "$svc rebuilt (${old_id:0:12} -> ${new_id:0:12})"
-        fi
-    done
-}
-
-detect_lan_ip() {
-    # Returns the host's LAN IP — used for WebRTC ICE candidate rewriting.
-    case "$OS" in
-        Darwin)
-            # Try common interfaces: en0 (Wi-Fi), en1 (Ethernet)
-            for iface in en0 en1 en2 en3; do
-                local ip
-                ip=$(ipconfig getifaddr "$iface" 2>/dev/null || true)
-                if [[ -n "$ip" ]]; then
-                    echo "$ip"
-                    return
-                fi
-            done
-            ;;
-        Linux)
-            ip route get 1.1.1.1 2>/dev/null | sed -n 's/.*src \([^ ]*\).*/\1/p'
-            return
-            ;;
-    esac
-    # Fallback — empty means "not detected"
-    echo ""
-}
-
 wait_for_url() {
    local url="$1" label="$2" retries="${3:-30}" interval="${4:-2}"
    for i in $(seq 1 "$retries"); do
@@ -171,7 +79,7 @@ resolve_symlink() {
 }

 compose_cmd() {
-    local compose_files="-f $ROOT_DIR/docker-compose.standalone.yml"
+    local compose_files="-f $ROOT_DIR/docker-compose.yml -f $ROOT_DIR/docker-compose.standalone.yml"
    if [[ "$OS" == "Linux" ]] && [[ -n "${OLLAMA_PROFILE:-}" ]]; then
        docker compose $compose_files --profile "$OLLAMA_PROFILE" "$@"
    else
@@ -205,7 +113,7 @@ step_llm() {
            echo ""

            # Pull model if not already present
-            if ollama list 2>/dev/null | awk '{print $1}' | grep -qxF "$MODEL"; then
+            if ollama list 2>/dev/null | awk '{print $1}' | grep -qx "$MODEL"; then
                ok "Model $MODEL already pulled"
            else
                info "Pulling model $MODEL (this may take a while)..."
@@ -235,7 +143,7 @@ step_llm() {
            echo ""

            # Pull model inside container
-            if compose_cmd exec "$OLLAMA_SVC" ollama list 2>/dev/null | awk '{print $1}' | grep -qxF "$MODEL"; then
+            if compose_cmd exec "$OLLAMA_SVC" ollama list 2>/dev/null | awk '{print $1}' | grep -qx "$MODEL"; then
                ok "Model $MODEL already pulled"
            else
                info "Pulling model $MODEL inside container (this may take a while)..."
@@ -285,17 +193,6 @@ ENVEOF
    env_set "$SERVER_ENV" "LLM_MODEL" "$MODEL"
    env_set "$SERVER_ENV" "LLM_API_KEY" "not-needed"

-    # WebRTC: detect LAN IP for ICE candidate rewriting (bridge networking)
-    local lan_ip
-    lan_ip=$(detect_lan_ip)
-    if [[ -n "$lan_ip" ]]; then
-        env_set "$SERVER_ENV" "WEBRTC_HOST" "$lan_ip"
-        ok "WebRTC host IP: $lan_ip"
-    else
-        warn "Could not detect LAN IP — WebRTC recording from other devices may not work"
-        warn "Set WEBRTC_HOST=<your-lan-ip> in server/.env manually"
-    fi
-
    ok "Standalone vars set (LLM_URL=$LLM_URL_VALUE)"
 }

@@ -308,24 +205,15 @@ step_storage() {
    # Generate garage.toml from template (fill in RPC secret)
    GARAGE_TOML="$ROOT_DIR/scripts/garage.toml"
    GARAGE_TOML_RUNTIME="$ROOT_DIR/data/garage.toml"
-    mkdir -p "$ROOT_DIR/data"
-    if [[ -d "$GARAGE_TOML_RUNTIME" ]]; then
-        rm -rf "$GARAGE_TOML_RUNTIME"
-    fi
    if [[ ! -f "$GARAGE_TOML_RUNTIME" ]]; then
+        mkdir -p "$ROOT_DIR/data"
        RPC_SECRET=$(openssl rand -hex 32)
        sed "s|__GARAGE_RPC_SECRET__|${RPC_SECRET}|" "$GARAGE_TOML" > "$GARAGE_TOML_RUNTIME"
    fi

    compose_cmd up -d garage

-    # Use /metrics for readiness — /health returns 503 until layout is applied
-    if ! wait_for_url "http://localhost:3903/metrics" "Garage admin API"; then
-        echo ""
-        err "Garage container logs:"
-        compose_cmd logs garage --tail 30 2>&1 || true
-        exit 1
-    fi
+    wait_for_url "http://localhost:3903/health" "Garage admin API"
    echo ""

    # Layout: get node ID, assign, apply (skip if already applied)
@@ -385,17 +273,11 @@ ENVEOF
        ok "Created www/.env.local"
    fi

-    # Caddyfile.standalone.example serves API at /v1, /health — use base URL
-    if [[ -n "${PRIMARY_IP:-}" ]]; then
-        BASE_URL="https://$PRIMARY_IP:3043"
-    else
-        BASE_URL="https://localhost:3043"
-    fi
-    env_set "$WWW_ENV" "SITE_URL" "$BASE_URL"
-    env_set "$WWW_ENV" "NEXTAUTH_URL" "$BASE_URL"
+    env_set "$WWW_ENV" "SITE_URL" "http://localhost:3000"
+    env_set "$WWW_ENV" "NEXTAUTH_URL" "http://localhost:3000"
    env_set "$WWW_ENV" "NEXTAUTH_SECRET" "standalone-dev-secret-not-for-production"
-    env_set "$WWW_ENV" "API_URL" "$BASE_URL"
-    env_set "$WWW_ENV" "WEBSOCKET_URL" "auto"
+    env_set "$WWW_ENV" "API_URL" "http://localhost:1250"
+    env_set "$WWW_ENV" "WEBSOCKET_URL" "ws://localhost:1250"
    env_set "$WWW_ENV" "SERVER_API_URL" "http://server:1250"
    env_set "$WWW_ENV" "FEATURE_REQUIRE_LOGIN" "false"

@@ -411,7 +293,7 @@ step_services() {
    # Check for port conflicts — stale processes silently shadow Docker port mappings.
    # OrbStack/Docker Desktop bind ports for forwarding; ignore those PIDs.
    local ports_ok=true
-    for port in 3043 3000 1250 5432 6379 3900 3903; do
+    for port in 3000 1250 5432 6379 3900 3903; do
        local pids
        pids=$(lsof -ti :"$port" 2>/dev/null || true)
        for pid in $pids; do
@@ -431,24 +313,9 @@ step_services() {
        warn "Continuing anyway (services will start but may be shadowed)"
    fi

-    # Rebuild images if source has changed (Docker layer cache makes this fast when unchanged)
-    rebuild_images
-
    # server runs alembic migrations on startup automatically (see runserver.sh)
-    compose_cmd up -d postgres redis garage cpu server worker beat web caddy
+    compose_cmd up -d postgres redis garage cpu server worker beat web
    ok "Containers started"
-
-    # Quick sanity check — catch containers that exit immediately (bad image, missing file, etc.)
-    sleep 3
-    local exited
-    exited=$(compose_cmd ps -a --format '{{.Name}} {{.Status}}' 2>/dev/null \
-        | grep -i 'exit' || true)
-    if [[ -n "$exited" ]]; then
-        warn "Some containers exited immediately:"
-        echo "$exited" | while read -r line; do warn "  $line"; done
-        dump_diagnostics
-    fi
-
    info "Server is running migrations (alembic upgrade head)..."
 }

@@ -478,49 +345,14 @@ step_health() {
        warn "Check with: docker compose logs cpu"
    fi

-    # Server may take a long time on first run — alembic migrations run before uvicorn starts.
-    # Use docker exec so this works regardless of network_mode or port mapping.
-    info "Waiting for Server API (first run includes database migrations)..."
-    local server_ok=false
-    for i in $(seq 1 90); do
-        # Check if container is still running
-        local svc_status
-        svc_status=$(compose_cmd ps server --format '{{.Status}}' 2>/dev/null || true)
-        if [[ -z "$svc_status" ]] || echo "$svc_status" | grep -qi 'exit'; then
-            echo ""
-            err "Server container exited unexpectedly"
-            dump_diagnostics server
-            exit 1
-        fi
-        # Health check from inside container (avoids host networking issues)
-        if compose_cmd exec -T server curl -sf http://localhost:1250/health > /dev/null 2>&1; then
-            server_ok=true
-            break
-        fi
-        echo -ne "\r  Waiting for Server API... ($i/90)"
-        sleep 5
-    done
+    wait_for_url "http://localhost:1250/health" "Server API" 60 3
    echo ""
-    if [[ "$server_ok" == "true" ]]; then
-        ok "Server API healthy"
-    else
-        err "Server API not ready after ~7 minutes"
-        dump_diagnostics server
-        exit 1
-    fi
+    ok "Server API healthy"

    wait_for_url "http://localhost:3000" "Frontend" 90 3
    echo ""
    ok "Frontend responding"

-    # Caddy reverse proxy (self-signed TLS — curl needs -k)
-    if curl -sfk "https://localhost:3043" > /dev/null 2>&1; then
-        ok "Caddy proxy healthy (https://localhost:3043)"
-    else
-        warn "Caddy proxy not responding on https://localhost:3043"
-        warn "Check with: docker compose logs caddy"
-    fi
-
    # Check LLM reachability from inside a container
    if compose_cmd exec -T server \
        curl -sf "$LLM_URL_VALUE/models" > /dev/null 2>&1; then
@@ -548,39 +380,6 @@ main() {
        exit 1
    fi

-    # Docker: Compose plugin, buildx, and daemon. On Ubuntu, auto-install if missing.
-    docker_ready() {
-        docker compose version 2>/dev/null | grep -qi compose \
-            && docker buildx version &>/dev/null \
-            && docker info &>/dev/null
-    }
-
-    if ! docker_ready; then
-        RAN_INSTALL=false
-        if [[ "$OS" == "Linux" ]] && [[ -f /etc/os-release ]] && (source /etc/os-release 2>/dev/null; [[ "${ID:-}" == "ubuntu" || "${ID_LIKE:-}" == *"ubuntu"* ]]); then
-            info "Docker not ready. Running install-docker-ubuntu.sh..."
-            "$SCRIPT_DIR/install-docker-ubuntu.sh" || true
-            RAN_INSTALL=true
-            [[ -d /run/systemd/system ]] && command -v systemctl &>/dev/null && systemctl start docker 2>/dev/null || true
-            sleep 2
-        fi
-        if ! docker_ready; then
-            # Docker may be installed but current shell lacks docker group (needs newgrp)
-            if [[ "$RAN_INSTALL" == "true" ]] && [[ $(id -u) -ne 0 ]] && command -v sg &>/dev/null && getent group docker &>/dev/null; then
-                info "Re-running with docker group..."
-                exec sg docker -c "$(printf '%q' "$0" && printf ' %q' "$@")"
-            fi
-            if [[ "$OS" == "Darwin" ]]; then
-                err "Docker not ready. Install Docker Desktop or OrbStack."
-            elif [[ "$OS" == "Linux" ]]; then
-                err "Docker not ready. Run: ./scripts/install-docker-ubuntu.sh"
-                err "Then run: newgrp docker   (or log out and back in), then run this script again."
-            else
-                err "Docker not ready. Install Docker with Compose V2 and buildx."
-            fi
-            exit 1
-        fi
-    fi

    # LLM_URL_VALUE is set by step_llm, used by later steps
    LLM_URL_VALUE=""
@@ -590,57 +389,6 @@ main() {
    # touch them so compose_cmd works before the steps that populate them.
    touch "$SERVER_ENV" "$WWW_ENV"

-    # Ensure garage.toml exists before any compose up (step_llm starts all services including garage)
-    GARAGE_TOML="$ROOT_DIR/scripts/garage.toml"
-    GARAGE_TOML_RUNTIME="$ROOT_DIR/data/garage.toml"
-    mkdir -p "$ROOT_DIR/data"
-    if [[ -d "$GARAGE_TOML_RUNTIME" ]]; then
-        rm -rf "$GARAGE_TOML_RUNTIME"
-    fi
-    if [[ ! -f "$GARAGE_TOML_RUNTIME" ]]; then
-        RPC_SECRET=$(openssl rand -hex 32)
-        sed "s|__GARAGE_RPC_SECRET__|${RPC_SECRET}|" "$GARAGE_TOML" > "$GARAGE_TOML_RUNTIME"
-    fi
-
-    # Remove containers that may have bad mounts (was directory); force recreate
-    compose_cmd rm -f -s garage caddy 2>/dev/null || true
-
-    # Detect primary IP for droplet (used for Caddyfile, step_www_env, success message)
-    PRIMARY_IP=""
-    if [[ "$OS" == "Linux" ]]; then
-        PRIMARY_IP=$(hostname -I 2>/dev/null | awk '{print $1}' || true)
-        if [[ "$PRIMARY_IP" == "127."* ]] || [[ -z "$PRIMARY_IP" ]]; then
-            PRIMARY_IP=$(ip -4 route get 1 2>/dev/null | sed -n 's/.*src \([0-9.]*\).*/\1/p' || true)
-        fi
-    fi
-
-    # Ensure Caddyfile exists before any compose up (step_llm starts caddy)
-    # On droplet: explicit IP + localhost so Caddy provisions cert at startup (avoids on_demand/SNI issues)
-    CADDYFILE="$ROOT_DIR/Caddyfile"
-    if [[ -d "$CADDYFILE" ]]; then
-        rm -rf "$CADDYFILE"
-    fi
-    if [[ -n "$PRIMARY_IP" ]]; then
-        cat > "$CADDYFILE" << CADDYEOF
-# Generated by setup-standalone.sh — explicit IP for droplet (provisions cert at startup)
-https://$PRIMARY_IP, localhost {
-    tls internal
-    handle /v1/* {
-        reverse_proxy server:1250
-    }
-    handle /health {
-        reverse_proxy server:1250
-    }
-    handle {
-        reverse_proxy web:3000
-    }
-}
-CADDYEOF
-        ok "Created Caddyfile for $PRIMARY_IP and localhost"
-    elif [[ ! -f "$CADDYFILE" ]]; then
-        cp "$ROOT_DIR/Caddyfile.standalone.example" "$CADDYFILE"
-    fi
-
    step_llm
    echo ""
    step_server_env
@@ -658,14 +406,8 @@ CADDYEOF
    echo -e " ${GREEN}Reflector is running!${NC}"
    echo "=========================================="
    echo ""
-    if [[ -n "$PRIMARY_IP" ]]; then
-        echo "  App:       https://$PRIMARY_IP:3043  (accept self-signed cert in browser)"
-        echo "  API:       https://$PRIMARY_IP:3043/v1/"
-        echo "  Local:     https://localhost:3043"
-    else
-        echo "  App:       https://localhost:3043  (accept self-signed cert in browser)"
-        echo "  API:       https://localhost:3043/v1/"
-    fi
+    echo "  Frontend:  http://localhost:3000"
+    echo "  API:       http://localhost:1250"
    echo ""
    echo "  To stop:   docker compose down"
    echo "  To re-run: ./scripts/setup-standalone.sh"
--- a/server/.env.selfhosted.example
+++ b/server/.env.selfhosted.example
@@ -1,112 +0,0 @@
-# =======================================================
-# Reflector Self-Hosted Production — Backend Configuration
-# Generated by: ./scripts/setup-selfhosted.sh
-# Reference: server/reflector/settings.py
-# =======================================================
-
-# =======================================================
-# Database & Infrastructure
-# Pre-filled for Docker internal networking (docker-compose.selfhosted.yml)
-# =======================================================
-DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
-REDIS_HOST=redis
-REDIS_PORT=6379
-CELERY_BROKER_URL=redis://redis:6379/1
-CELERY_RESULT_BACKEND=redis://redis:6379/1
-
-# Secret key — auto-generated by setup script
-# Generate manually with: openssl rand -hex 32
-SECRET_KEY=changeme-generate-a-secure-random-string
-
-# =======================================================
-# Authentication
-# Disabled by default. Enable Authentik for multi-user access.
-# See docsv2/selfhosted-production.md for setup instructions.
-# =======================================================
-AUTH_BACKEND=none
-# AUTH_BACKEND=jwt
-# AUTH_JWT_AUDIENCE=
-
-# =======================================================
-# Specialized Models (Transcription, Diarization, Translation)
-# These run in the gpu/cpu container — NOT an LLM.
-# The "modal" backend means "HTTP API client" — it talks to
-# the self-hosted container, not Modal.com cloud.
-# =======================================================
-TRANSCRIPT_BACKEND=modal
-TRANSCRIPT_URL=http://transcription:8000
-TRANSCRIPT_MODAL_API_KEY=selfhosted
-
-DIARIZATION_ENABLED=true
-DIARIZATION_BACKEND=modal
-DIARIZATION_URL=http://transcription:8000
-
-TRANSLATION_BACKEND=modal
-TRANSLATE_URL=http://transcription:8000
-
-# HuggingFace token — optional, for gated models (e.g. pyannote).
-# Falls back to public S3 model bundle if not set.
-# HF_TOKEN=hf_xxxxx
-
-# =======================================================
-# LLM for Summarization & Topic Detection
-# Only summaries and topics use an LLM. Everything else
-# (transcription, diarization, translation) uses specialized models above.
-#
-# Supports any OpenAI-compatible endpoint.
-# Auto-configured by setup script if using --ollama-gpu or --ollama-cpu.
-# For --gpu or --cpu modes, you MUST configure an external LLM.
-# =======================================================
-
-# --- Option A: External OpenAI-compatible API ---
-# LLM_URL=https://api.openai.com/v1
-# LLM_API_KEY=sk-your-api-key
-# LLM_MODEL=gpt-4o-mini
-
-# --- Option B: Local Ollama (auto-set by --ollama-gpu/--ollama-cpu) ---
-# LLM_URL=http://ollama:11434/v1
-# LLM_API_KEY=not-needed
-# LLM_MODEL=llama3.1
-
-LLM_CONTEXT_WINDOW=16000
-
-# =======================================================
-# S3 Storage (REQUIRED)
-# Where to store audio files and transcripts.
-#
-# Option A: Use --garage flag (auto-configured by setup script)
-# Option B: Any S3-compatible endpoint (AWS, MinIO, etc.)
-#           Set TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL for non-AWS endpoints.
-# =======================================================
-TRANSCRIPT_STORAGE_BACKEND=aws
-TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID=
-TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY=
-TRANSCRIPT_STORAGE_AWS_BUCKET_NAME=reflector-media
-TRANSCRIPT_STORAGE_AWS_REGION=us-east-1
-
-# For non-AWS S3-compatible endpoints (Garage, MinIO, etc.):
-# TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://garage:3900
-
-# =======================================================
-# Daily.co Live Rooms (Optional)
-# Enable real-time meeting rooms with Daily.co integration.
-# Requires a Daily.co account: https://www.daily.co/
-# =======================================================
-# DEFAULT_VIDEO_PLATFORM=daily
-# DAILY_API_KEY=your-daily-api-key
-# DAILY_SUBDOMAIN=your-subdomain
-# DAILY_WEBHOOK_SECRET=your-daily-webhook-secret
-# DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco
-# DAILYCO_STORAGE_AWS_REGION=us-east-1
-# DAILYCO_STORAGE_AWS_ROLE_ARN=arn:aws:iam::role/DailyCoAccess
-
-# =======================================================
-# Feature Flags
-# =======================================================
-PUBLIC_MODE=true
-# FEATURE_ROOMS=true
-
-# =======================================================
-# Sentry (Optional)
-# =======================================================
-# SENTRY_DSN=
--- a/server/migrations/versions/623af934249a_add_change_seq_to_transcript.py
+++ b/server/migrations/versions/623af934249a_add_change_seq_to_transcript.py
@@ -1,74 +0,0 @@
-"""add_change_seq_to_transcript
-
-Revision ID: 623af934249a
-Revises: 3aa20b96d963
-Create Date: 2026-02-19 18:53:12.315440
-
-"""
-
-from typing import Sequence, Union
-
-import sqlalchemy as sa
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision: str = "623af934249a"
-down_revision: Union[str, None] = "3aa20b96d963"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    # Sequence
-    op.execute("CREATE SEQUENCE IF NOT EXISTS transcript_change_seq;")
-
-    # Column (nullable first for backfill)
-    op.add_column("transcript", sa.Column("change_seq", sa.BigInteger(), nullable=True))
-
-    # Backfill existing rows with sequential values (ordered by created_at for determinism)
-    op.execute("""
-        UPDATE transcript SET change_seq = sub.seq FROM (
-            SELECT id, nextval('transcript_change_seq') AS seq
-            FROM transcript ORDER BY created_at ASC
-        ) sub WHERE transcript.id = sub.id;
-    """)
-
-    # Now make NOT NULL
-    op.alter_column("transcript", "change_seq", nullable=False)
-
-    # Default for any inserts between now and trigger creation
-    op.alter_column(
-        "transcript",
-        "change_seq",
-        server_default=sa.text("nextval('transcript_change_seq')"),
-    )
-
-    # Trigger function
-    op.execute("""
-        CREATE OR REPLACE FUNCTION set_transcript_change_seq()
-        RETURNS TRIGGER AS $$
-        BEGIN
-            NEW.change_seq := nextval('transcript_change_seq');
-            RETURN NEW;
-        END;
-        $$ LANGUAGE plpgsql;
-    """)
-
-    # Trigger (fires on every INSERT or UPDATE)
-    op.execute("""
-        CREATE TRIGGER trigger_transcript_change_seq
-            BEFORE INSERT OR UPDATE ON transcript
-            FOR EACH ROW
-            EXECUTE FUNCTION set_transcript_change_seq();
-    """)
-
-    # Index for efficient polling
-    op.create_index("idx_transcript_change_seq", "transcript", ["change_seq"])
-
-
-def downgrade() -> None:
-    op.execute("DROP TRIGGER IF EXISTS trigger_transcript_change_seq ON transcript;")
-    op.execute("DROP FUNCTION IF EXISTS set_transcript_change_seq();")
-    op.drop_index("idx_transcript_change_seq", table_name="transcript")
-    op.drop_column("transcript", "change_seq")
-    op.execute("DROP SEQUENCE IF EXISTS transcript_change_seq;")
--- a/server/reflector/app.py
+++ b/server/reflector/app.py
@@ -37,13 +37,6 @@ try:
 except ImportError:
    sentry_sdk = None

-# Patch aioice port range if configured (must happen before any RTCPeerConnection)
-if settings.WEBRTC_PORT_RANGE:
-    from reflector.webrtc_ports import parse_port_range, patch_aioice_port_range
-
-    _min, _max = parse_port_range(settings.WEBRTC_PORT_RANGE)
-    patch_aioice_port_range(_min, _max)
-

 # lifespan events
@asynccontextmanager
@@ -66,7 +59,7 @@ else:
    logger.info("Sentry disabled")

 # build app
-app = FastAPI(lifespan=lifespan, root_path=settings.ROOT_PATH)
+app = FastAPI(lifespan=lifespan)
 app.add_middleware(
    CORSMiddleware,
    allow_credentials=settings.CORS_ALLOW_CREDENTIALS or False,
--- a/server/reflector/asynctask.py
+++ b/server/reflector/asynctask.py
@@ -4,9 +4,8 @@ from uuid import uuid4

 from celery import current_task

-from reflector.db import _database_context, get_database
+from reflector.db import get_database
 from reflector.llm import llm_session_id
-from reflector.ws_manager import reset_ws_manager


 def asynctask(f):
@@ -21,14 +20,6 @@ def asynctask(f):
                return await f(*args, **kwargs)
            finally:
                await database.disconnect()
-                _database_context.set(None)
-
-        if current_task:
-            # Reset cached connections before each Celery task.
-            # Each asyncio.run() creates a new event loop, making connections
-            # from previous tasks stale ("Future attached to a different loop").
-            _database_context.set(None)
-            reset_ws_manager()

        coro = run_with_db()
        if current_task:
--- a/server/reflector/auth/init.py
+++ b/server/reflector/auth/init.py
@@ -12,5 +12,3 @@ AccessTokenInfo = auth_module.AccessTokenInfo
 authenticated = auth_module.authenticated
 current_user = auth_module.current_user
 current_user_optional = auth_module.current_user_optional
-parse_ws_bearer_token = auth_module.parse_ws_bearer_token
-current_user_ws_optional = auth_module.current_user_ws_optional
--- a/server/reflector/auth/auth_jwt.py
+++ b/server/reflector/auth/auth_jwt.py
@@ -1,9 +1,6 @@
-from typing import TYPE_CHECKING, Annotated, List, Optional
+from typing import Annotated, List, Optional

 from fastapi import Depends, HTTPException
-
-if TYPE_CHECKING:
-    from fastapi import WebSocket
 from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
 from jose import JWTError, jwt
 from pydantic import BaseModel
@@ -127,20 +124,3 @@ async def current_user_optional(
    jwtauth: JWTAuth = Depends(),
 ):
    return await _authenticate_user(jwt_token, api_key, jwtauth)
-
-
-def parse_ws_bearer_token(
-    websocket: "WebSocket",
-) -> tuple[Optional[str], Optional[str]]:
-    raw = websocket.headers.get("sec-websocket-protocol") or ""
-    parts = [p.strip() for p in raw.split(",") if p.strip()]
-    if len(parts) >= 2 and parts[0].lower() == "bearer":
-        return parts[1], "bearer"
-    return None, None
-
-
-async def current_user_ws_optional(websocket: "WebSocket") -> Optional[UserInfo]:
-    token, _ = parse_ws_bearer_token(websocket)
-    if not token:
-        return None
-    return await _authenticate_user(token, None, JWTAuth())
--- a/server/reflector/auth/auth_none.py
+++ b/server/reflector/auth/auth_none.py
@@ -19,11 +19,3 @@ def current_user():

 def current_user_optional():
    return None
-
-
-def parse_ws_bearer_token(websocket):
-    return None, None
-
-
-async def current_user_ws_optional(websocket):
-    return None
--- a/server/reflector/db/search.py
+++ b/server/reflector/db/search.py
@@ -151,7 +151,6 @@ class SearchResultDB(BaseModel):
    title: str | None = None
    source_kind: SourceKind
    room_id: str | None = None
-    change_seq: int | None = None
    rank: float = Field(..., ge=0, le=1)


@@ -174,7 +173,6 @@ class SearchResult(BaseModel):
    total_match_count: NonNegativeInt = Field(
        default=0, description="Total number of matches found in the transcript"
    )
-    change_seq: int | None = None

    @field_serializer("created_at", when_used="json")
    def serialize_datetime(self, dt: datetime) -> str:
@@ -358,7 +356,6 @@ class SearchController:
            transcripts.c.user_id,
            transcripts.c.room_id,
            transcripts.c.source_kind,
-            transcripts.c.change_seq,
            transcripts.c.webvtt,
            transcripts.c.long_summary,
            sqlalchemy.case(
--- a/server/reflector/db/transcripts.py
+++ b/server/reflector/db/transcripts.py
@@ -5,10 +5,7 @@ import shutil
 from contextlib import asynccontextmanager
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal, Sequence
-
-if TYPE_CHECKING:
-    from reflector.ws_events import TranscriptEventName
+from typing import Any, Literal, Sequence

 import sqlalchemy
 from fastapi import HTTPException
@@ -35,8 +32,6 @@ class SourceKind(enum.StrEnum):
    FILE = enum.auto()


-transcript_change_seq = sqlalchemy.Sequence("transcript_change_seq", metadata=metadata)
-
 transcripts = sqlalchemy.Table(
    "transcript",
    metadata,
@@ -91,12 +86,6 @@ transcripts = sqlalchemy.Table(
    sqlalchemy.Column("webvtt", sqlalchemy.Text),
    # Hatchet workflow run ID for resumption of failed workflows
    sqlalchemy.Column("workflow_run_id", sqlalchemy.String),
-    sqlalchemy.Column(
-        "change_seq",
-        sqlalchemy.BigInteger,
-        transcript_change_seq,
-        server_default=transcript_change_seq.next_value(),
-    ),
    sqlalchemy.Index("idx_transcript_recording_id", "recording_id"),
    sqlalchemy.Index("idx_transcript_user_id", "user_id"),
    sqlalchemy.Index("idx_transcript_created_at", "created_at"),
@@ -195,7 +184,7 @@ class TranscriptWaveform(BaseModel):


 class TranscriptEvent(BaseModel):
-    event: str  # Typed at call sites via ws_events.TranscriptEventName; str here for DB compat
+    event: str
    data: dict


@@ -237,7 +226,6 @@ class Transcript(BaseModel):
    audio_deleted: bool | None = None
    webvtt: str | None = None
    workflow_run_id: str | None = None  # Hatchet workflow run ID for resumption
-    change_seq: int | None = None

    @field_serializer("created_at", when_used="json")
    def serialize_datetime(self, dt: datetime) -> str:
@@ -245,9 +233,7 @@ class Transcript(BaseModel):
            dt = dt.replace(tzinfo=timezone.utc)
        return dt.isoformat()

-    def add_event(
-        self, event: "TranscriptEventName", data: BaseModel
-    ) -> TranscriptEvent:
+    def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
        ev = TranscriptEvent(event=event, data=data.model_dump())
        self.events.append(ev)
        return ev
@@ -390,7 +376,6 @@ class TranscriptController:
        source_kind: SourceKind | None = None,
        room_id: str | None = None,
        search_term: str | None = None,
-        change_seq_from: int | None = None,
        return_query: bool = False,
        exclude_columns: list[str] = [
            "topics",
@@ -411,7 +396,6 @@ class TranscriptController:
        - `filter_recording`: filter out transcripts that are currently recording
        - `room_id`: filter transcripts by room ID
        - `search_term`: filter transcripts by search term
-        - `change_seq_from`: filter transcripts with change_seq > this value
        """

        query = transcripts.select().join(
@@ -434,9 +418,6 @@ class TranscriptController:
        if search_term:
            query = query.where(transcripts.c.title.ilike(f"%{search_term}%"))

-        if change_seq_from is not None:
-            query = query.where(transcripts.c.change_seq > change_seq_from)
-
        # Exclude heavy JSON columns from list queries
        transcript_columns = [
            col for col in transcripts.c if col.name not in exclude_columns
@@ -450,10 +431,9 @@ class TranscriptController:
        )

        if order_by is not None:
+            field = getattr(transcripts.c, order_by[1:])
            if order_by.startswith("-"):
-                field = getattr(transcripts.c, order_by[1:]).desc()
-            else:
-                field = getattr(transcripts.c, order_by)
+                field = field.desc()
            query = query.order_by(field)

        if filter_empty:
@@ -708,7 +688,7 @@ class TranscriptController:
    async def append_event(
        self,
        transcript: Transcript,
-        event: "TranscriptEventName",
+        event: str,
        data: Any,
    ) -> TranscriptEvent:
        """
--- a/server/reflector/hatchet/broadcast.py
+++ b/server/reflector/hatchet/broadcast.py
@@ -12,11 +12,10 @@ import structlog

 from reflector.db.transcripts import Transcript, TranscriptEvent, transcripts_controller
 from reflector.utils.string import NonEmptyString
-from reflector.ws_events import TranscriptEventName
 from reflector.ws_manager import get_ws_manager

 # Events that should also be sent to user room (matches Celery behavior)
-USER_ROOM_EVENTS: set[TranscriptEventName] = {"STATUS", "FINAL_TITLE", "DURATION"}
+USER_ROOM_EVENTS = {"STATUS", "FINAL_TITLE", "DURATION"}


 async def broadcast_event(
@@ -82,7 +81,8 @@ async def set_status_and_broadcast(
 async def append_event_and_broadcast(
    transcript_id: NonEmptyString,
    transcript: Transcript,
-    event_name: TranscriptEventName,
+    event_name: NonEmptyString,
+    # TODO proper dictionary event => type
    data: Any,
    logger: structlog.BoundLogger,
 ) -> TranscriptEvent:
--- a/server/reflector/hatchet/workflows/daily_multitrack_pipeline.py
+++ b/server/reflector/hatchet/workflows/daily_multitrack_pipeline.py
@@ -720,7 +720,6 @@ async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
                chunk_text=chunk["text"],
                timestamp=chunk["timestamp"],
                duration=chunk["duration"],
-                words=chunk["words"],
            )
        )
        for chunk in chunks
@@ -732,31 +731,41 @@ async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
        TopicChunkResult(**result[TaskName.DETECT_CHUNK_TOPIC]) for result in results
    ]

+    # Build index-to-words map from local chunks (words not in child workflow results)
+    chunks_by_index = {chunk["index"]: chunk["words"] for chunk in chunks}
+
    async with fresh_db_connection():
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if not transcript:
            raise ValueError(f"Transcript {input.transcript_id} not found")

+        # Clear topics for idempotency on retry (each topic gets a fresh UUID,
+        # so upsert_topic would append duplicates without this)
+        await transcripts_controller.update(transcript, {"topics": []})
+
        for chunk in topic_chunks:
+            chunk_words = chunks_by_index[chunk.chunk_index]
            topic = TranscriptTopic(
                title=chunk.title,
                summary=chunk.summary,
                timestamp=chunk.timestamp,
-                transcript=" ".join(w.text for w in chunk.words),
-                words=chunk.words,
+                transcript=" ".join(w.text for w in chunk_words),
+                words=chunk_words,
            )
            await transcripts_controller.upsert_topic(transcript, topic)
            await append_event_and_broadcast(
                input.transcript_id, transcript, "TOPIC", topic, logger=logger
            )

+    # Words omitted from TopicsResult — already persisted to DB above.
+    # Downstream tasks that need words refetch from DB.
    topics_list = [
        TitleSummary(
            title=chunk.title,
            summary=chunk.summary,
            timestamp=chunk.timestamp,
            duration=chunk.duration,
-            transcript=TranscriptType(words=chunk.words),
+            transcript=TranscriptType(words=[]),
        )
        for chunk in topic_chunks
    ]
@@ -842,9 +851,8 @@ async def extract_subjects(input: PipelineInput, ctx: Context) -> SubjectsResult
    ctx.log(f"extract_subjects: starting for transcript_id={input.transcript_id}")

    topics_result = ctx.task_output(detect_topics)
-    topics = topics_result.topics

-    if not topics:
+    if not topics_result.topics:
        ctx.log("extract_subjects: no topics, returning empty subjects")
        return SubjectsResult(
            subjects=[],
@@ -857,11 +865,13 @@ async def extract_subjects(input: PipelineInput, ctx: Context) -> SubjectsResult
    # sharing DB connections and LLM HTTP pools across forks
    from reflector.db.transcripts import transcripts_controller  # noqa: PLC0415
    from reflector.llm import LLM  # noqa: PLC0415
+    from reflector.processors.types import words_to_segments  # noqa: PLC0415

    async with fresh_db_connection():
        transcript = await transcripts_controller.get_by_id(input.transcript_id)

-        # Build transcript text from topics (same logic as TranscriptFinalSummaryProcessor)
+        # Build transcript text from DB topics (words omitted from task output
+        # to reduce Hatchet payload size — refetch from DB where they were persisted)
        speakermap = {}
        if transcript and transcript.participants:
            speakermap = {
@@ -871,8 +881,8 @@ async def extract_subjects(input: PipelineInput, ctx: Context) -> SubjectsResult
            }

        text_lines = []
-        for topic in topics:
-            for segment in topic.transcript.as_segments():
+        for db_topic in transcript.topics:
+            for segment in words_to_segments(db_topic.words):
                name = speakermap.get(segment.speaker, f"Speaker {segment.speaker}")
                text_lines.append(f"{name}: {segment.text}")

--- a/server/reflector/hatchet/workflows/models.py
+++ b/server/reflector/hatchet/workflows/models.py
@@ -95,7 +95,6 @@ class TopicChunkResult(BaseModel):
    summary: str
    timestamp: float
    duration: float
-    words: list[Word]


 class TopicsResult(BaseModel):
--- a/server/reflector/hatchet/workflows/topic_chunk_processing.py
+++ b/server/reflector/hatchet/workflows/topic_chunk_processing.py
@@ -20,7 +20,6 @@ from reflector.hatchet.constants import LLM_RATE_LIMIT_KEY, TIMEOUT_MEDIUM
 from reflector.hatchet.workflows.models import TopicChunkResult
 from reflector.logger import logger
 from reflector.processors.prompts import TOPIC_PROMPT
-from reflector.processors.types import Word


 class TopicChunkInput(BaseModel):
@@ -30,7 +29,6 @@ class TopicChunkInput(BaseModel):
    chunk_text: str
    timestamp: float
    duration: float
-    words: list[Word]


 hatchet = HatchetClientManager.get_client()
@@ -71,7 +69,7 @@ async def detect_chunk_topic(input: TopicChunkInput, ctx: Context) -> TopicChunk
    from reflector.settings import settings  # noqa: PLC0415
    from reflector.utils.text import clean_title  # noqa: PLC0415

-    llm = LLM(settings=settings, temperature=0.9)
+    llm = LLM(settings=settings, temperature=0.9, max_tokens=500)

    prompt = TOPIC_PROMPT.format(text=input.chunk_text)
    response = await llm.get_structured_response(
@@ -99,5 +97,4 @@ async def detect_chunk_topic(input: TopicChunkInput, ctx: Context) -> TopicChunk
        summary=response.summary,
        timestamp=input.timestamp,
        duration=input.duration,
-        words=input.words,
    )
--- a/server/reflector/llm.py
+++ b/server/reflector/llm.py
@@ -202,9 +202,7 @@ class StructuredOutputWorkflow(Workflow, Generic[OutputT]):


 class LLM:
-    def __init__(
-        self, settings, temperature: float = 0.4, max_tokens: int | None = None
-    ):
+    def __init__(self, settings, temperature: float = 0.4, max_tokens: int = 2048):
        self.settings_obj = settings
        self.model_name = settings.LLM_MODEL
        self.url = settings.LLM_URL
--- a/server/reflector/pipelines/main_live_pipeline.py
+++ b/server/reflector/pipelines/main_live_pipeline.py
@@ -62,8 +62,6 @@ from reflector.processors.types import (
 from reflector.processors.types import Transcript as TranscriptProcessorType
 from reflector.settings import settings
 from reflector.storage import get_transcripts_storage
-from reflector.views.transcripts import GetTranscriptTopic
-from reflector.ws_events import TranscriptEventName
 from reflector.ws_manager import WebsocketManager, get_ws_manager
 from reflector.zulip import (
    get_zulip_message,
@@ -91,11 +89,7 @@ def broadcast_to_sockets(func):
        if transcript and transcript.user_id:
            # Emit only relevant events to the user room to avoid noisy updates.
            # Allowed: STATUS, FINAL_TITLE, DURATION. All are prefixed with TRANSCRIPT_
-            allowed_user_events: set[TranscriptEventName] = {
-                "STATUS",
-                "FINAL_TITLE",
-                "DURATION",
-            }
+            allowed_user_events = {"STATUS", "FINAL_TITLE", "DURATION"}
            if resp.event in allowed_user_events:
                await self.ws_manager.send_json(
                    room_id=f"user:{transcript.user_id}",
@@ -250,14 +244,13 @@ class PipelineMainBase(PipelineRunner[PipelineMessage], Generic[PipelineMessage]
        )
        if isinstance(data, TitleSummaryWithIdProcessorType):
            topic.id = data.id
-        get_topic = GetTranscriptTopic.from_transcript_topic(topic)
        async with self.transaction():
            transcript = await self.get_transcript()
            await transcripts_controller.upsert_topic(transcript, topic)
            return await transcripts_controller.append_event(
                transcript=transcript,
                event="TOPIC",
-                data=get_topic,
+                data=topic,
            )

    @broadcast_to_sockets
--- a/server/reflector/processors/transcript_final_title.py
+++ b/server/reflector/processors/transcript_final_title.py
@@ -39,7 +39,7 @@ class TranscriptFinalTitleProcessor(Processor):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.chunks: list[TitleSummary] = []
-        self.llm = LLM(settings=settings, temperature=0.5)
+        self.llm = LLM(settings=settings, temperature=0.5, max_tokens=200)

    async def _push(self, data: TitleSummary):
        self.chunks.append(data)
--- a/server/reflector/processors/transcript_topic_detector.py
+++ b/server/reflector/processors/transcript_topic_detector.py
@@ -35,7 +35,7 @@ class TranscriptTopicDetectorProcessor(Processor):
        super().__init__(**kwargs)
        self.transcript = None
        self.min_transcript_length = min_transcript_length
-        self.llm = LLM(settings=settings, temperature=0.9)
+        self.llm = LLM(settings=settings, temperature=0.9, max_tokens=500)

    async def _push(self, data: Transcript):
        if self.transcript is None:
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -12,17 +12,6 @@ class Settings(BaseSettings):
        extra="ignore",
    )

-    ROOT_PATH: str = "/"
-
-    # WebRTC port range for ICE candidates (e.g. "50000-50100").
-    # When set, monkey-patches aioice to bind UDP sockets within this range,
-    # allowing Docker port mapping instead of network_mode: host.
-    WEBRTC_PORT_RANGE: str | None = None
-    # Host IP or hostname to advertise in ICE candidates instead of the
-    # container's internal IP. Use "host.docker.internal" in Docker with
-    # extra_hosts, or a specific LAN IP. Resolved at connection time.
-    WEBRTC_HOST: str | None = None
-
    # CORS
    UI_BASE_URL: str = "http://localhost:3000"
    CORS_ORIGIN: str = "*"
--- a/server/reflector/tools/process.py
+++ b/server/reflector/tools/process.py
@@ -24,9 +24,6 @@ from reflector.pipelines.main_live_pipeline import (
    pipeline_process as live_pipeline_process,
 )
 from reflector.storage import Storage
-from reflector.worker.app import (
-    app as celery_app,  # noqa: F401 - ensure Celery uses Redis broker
-)


 def validate_s3_bucket_name(bucket: str) -> None:
--- a/server/reflector/views/rtc_offer.py
+++ b/server/reflector/views/rtc_offer.py
@@ -10,7 +10,6 @@ from pydantic import BaseModel
 from reflector.events import subscribers_shutdown
 from reflector.logger import logger
 from reflector.pipelines.runner import PipelineRunner
-from reflector.settings import settings

 sessions = []
 router = APIRouter()
@@ -124,16 +123,7 @@ async def rtc_offer_base(
    # update metrics
    m_rtc_sessions.inc()

-    sdp = pc.localDescription.sdp
-
-    # Rewrite ICE candidate IPs when running behind Docker bridge networking
-    if settings.WEBRTC_HOST:
-        from reflector.webrtc_ports import resolve_webrtc_host, rewrite_sdp_host
-
-        host_ip = resolve_webrtc_host(settings.WEBRTC_HOST)
-        sdp = rewrite_sdp_host(sdp, host_ip)
-
-    return RtcOffer(sdp=sdp, type=pc.localDescription.type)
+    return RtcOffer(sdp=pc.localDescription.sdp, type=pc.localDescription.type)


@subscribers_shutdown.append
--- a/server/reflector/views/transcripts.py
+++ b/server/reflector/views/transcripts.py
@@ -111,7 +111,6 @@ class GetTranscriptMinimal(BaseModel):
    room_id: str | None = None
    room_name: str | None = None
    audio_deleted: bool | None = None
-    change_seq: int | None = None


 class TranscriptParticipantWithEmail(TranscriptParticipant):
@@ -267,22 +266,12 @@ async def transcripts_list(
    source_kind: SourceKind | None = None,
    room_id: str | None = None,
    search_term: str | None = None,
-    change_seq_from: int | None = None,
-    sort_by: Literal["created_at", "change_seq"] | None = None,
 ):
    if not user and not settings.PUBLIC_MODE:
        raise HTTPException(status_code=401, detail="Not authenticated")

    user_id = user["sub"] if user else None

-    # Default behavior preserved: sort_by=None → "-created_at"
-    if sort_by == "change_seq":
-        order_by = "change_seq"  # ASC (ascending for checkpoint-based polling)
-    elif sort_by == "created_at":
-        order_by = "-created_at"  # DESC (newest first, same as current default)
-    else:
-        order_by = "-created_at"  # default, backward compatible
-
    return await apaginate(
        get_database(),
        await transcripts_controller.get_all(
@@ -290,8 +279,7 @@ async def transcripts_list(
            source_kind=SourceKind(source_kind) if source_kind else None,
            room_id=room_id,
            search_term=search_term,
-            order_by=order_by,
-            change_seq_from=change_seq_from,
+            order_by="-created_at",
            return_query=True,
        ),
    )
@@ -524,7 +512,6 @@ async def transcript_get(
        "room_id": transcript.room_id,
        "room_name": room_name,
        "audio_deleted": transcript.audio_deleted,
-        "change_seq": transcript.change_seq,
        "participants": participants,
    }

--- a/server/reflector/views/transcripts_websocket.py
+++ b/server/reflector/views/transcripts_websocket.py
@@ -4,22 +4,18 @@ Transcripts websocket API

 """

-from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, WebSocket, WebSocketDisconnect

 import reflector.auth as auth
 from reflector.db.transcripts import transcripts_controller
-from reflector.ws_events import TranscriptWsEvent
 from reflector.ws_manager import get_ws_manager

 router = APIRouter()


-@router.get(
-    "/transcripts/{transcript_id}/events",
-    response_model=TranscriptWsEvent,
-    summary="Transcript WebSocket event schema",
-    description="Stub exposing the discriminated union of all transcript-level WS events for OpenAPI type generation. Real events are delivered over the WebSocket at the same path.",
-)
+@router.get("/transcripts/{transcript_id}/events")
 async def transcript_get_websocket_events(transcript_id: str):
    pass

@@ -28,9 +24,8 @@ async def transcript_get_websocket_events(transcript_id: str):
 async def transcript_events_websocket(
    transcript_id: str,
    websocket: WebSocket,
+    user: Optional[auth.UserInfo] = Depends(auth.current_user_optional),
 ):
-    _, negotiated_subprotocol = auth.parse_ws_bearer_token(websocket)
-    user = await auth.current_user_ws_optional(websocket)
    user_id = user["sub"] if user else None
    transcript = await transcripts_controller.get_by_id_for_http(
        transcript_id, user_id=user_id
@@ -42,9 +37,7 @@ async def transcript_events_websocket(
    # use ts:transcript_id as room id
    room_id = f"ts:{transcript_id}"
    ws_manager = get_ws_manager()
-    await ws_manager.add_user_to_room(
-        room_id, websocket, subprotocol=negotiated_subprotocol
-    )
+    await ws_manager.add_user_to_room(room_id, websocket)

    try:
        # on first connection, send all events only to the current user
--- a/server/reflector/views/user_websocket.py
+++ b/server/reflector/views/user_websocket.py
@@ -4,22 +4,10 @@ from fastapi import APIRouter, WebSocket, WebSocketDisconnect

 from reflector.auth.auth_jwt import JWTAuth  # type: ignore
 from reflector.db.users import user_controller
-from reflector.ws_events import UserWsEvent
 from reflector.ws_manager import get_ws_manager

 router = APIRouter()

-
-@router.get(
-    "/events",
-    response_model=UserWsEvent,
-    summary="User WebSocket event schema",
-    description="Stub exposing the discriminated union of all user-level WS events for OpenAPI type generation. Real events are delivered over the WebSocket at the same path.",
-)
-async def user_get_websocket_events():
-    pass
-
-
 # Close code for unauthorized WebSocket connections
 UNAUTHORISED = 4401

--- a/server/reflector/webrtc_ports.py
+++ b/server/reflector/webrtc_ports.py
@@ -1,111 +0,0 @@
-"""
-Monkey-patch aioice to use a fixed UDP port range for ICE candidates,
-and optionally rewrite SDP to advertise a different host IP.
-
-This allows running the server in Docker with bridge networking
-(no network_mode: host) by:
-  1. Restricting ICE UDP ports to a known range that can be mapped in Docker
-  2. Replacing container-internal IPs with the Docker host IP in SDP answers
-"""
-
-import asyncio
-import socket
-
-from reflector.logger import logger
-
-
-def parse_port_range(range_str: str) -> tuple[int, int]:
-    """Parse a 'min-max' string into (min_port, max_port)."""
-    parts = range_str.split("-")
-    if len(parts) != 2:
-        raise ValueError(f"WEBRTC_PORT_RANGE must be 'min-max', got: {range_str!r}")
-    min_port, max_port = int(parts[0]), int(parts[1])
-    if not (1024 <= min_port <= max_port <= 65535):
-        raise ValueError(
-            f"Invalid port range: {min_port}-{max_port} "
-            "(must be 1024-65535 with min <= max)"
-        )
-    return min_port, max_port
-
-
-def patch_aioice_port_range(min_port: int, max_port: int) -> None:
-    """
-    Monkey-patch aioice so that ICE candidate UDP sockets bind to ports
-    within [min_port, max_port] instead of OS-assigned ephemeral ports.
-
-    Works by temporarily wrapping loop.create_datagram_endpoint() during
-    aioice's get_component_candidates() to intercept bind(addr, 0) calls.
-    """
-    import aioice.ice as _ice
-
-    _original = _ice.Connection.get_component_candidates
-    _state = {"next_port": min_port}
-
-    async def _patched_get_component_candidates(self, component, addresses, timeout=5):
-        loop = asyncio.get_event_loop()
-        _orig_create = loop.create_datagram_endpoint
-
-        async def _create_with_port_range(*args, **kwargs):
-            local_addr = kwargs.get("local_addr")
-            if local_addr and local_addr[1] == 0:
-                addr = local_addr[0]
-                # Try each port in the range (wrapping around)
-                attempts = max_port - min_port + 1
-                for _ in range(attempts):
-                    port = _state["next_port"]
-                    _state["next_port"] = (
-                        min_port
-                        if _state["next_port"] >= max_port
-                        else _state["next_port"] + 1
-                    )
-                    try:
-                        kwargs["local_addr"] = (addr, port)
-                        return await _orig_create(*args, **kwargs)
-                    except OSError:
-                        continue
-                # All ports exhausted, fall back to OS assignment
-                logger.warning(
-                    "All WebRTC ports in range exhausted, falling back to OS",
-                    min_port=min_port,
-                    max_port=max_port,
-                )
-                kwargs["local_addr"] = (addr, 0)
-            return await _orig_create(*args, **kwargs)
-
-        loop.create_datagram_endpoint = _create_with_port_range
-        try:
-            return await _original(self, component, addresses, timeout)
-        finally:
-            loop.create_datagram_endpoint = _orig_create
-
-    _ice.Connection.get_component_candidates = _patched_get_component_candidates
-    logger.info(
-        "aioice patched for WebRTC port range",
-        min_port=min_port,
-        max_port=max_port,
-    )
-
-
-def resolve_webrtc_host(host: str) -> str:
-    """Resolve a hostname or IP to an IP address for ICE candidate rewriting."""
-    try:
-        ip = socket.gethostbyname(host)
-        logger.info("Resolved WEBRTC_HOST", host=host, ip=ip)
-        return ip
-    except socket.gaierror:
-        logger.warning("Could not resolve WEBRTC_HOST, using as-is", host=host)
-        return host
-
-
-def rewrite_sdp_host(sdp: str, target_ip: str) -> str:
-    """
-    Replace container-internal IPs in SDP with target_ip so that
-    ICE candidates advertise a routable address.
-    """
-    import aioice.ice
-
-    container_ips = aioice.ice.get_host_addresses(use_ipv4=True, use_ipv6=False)
-    for ip in container_ips:
-        if ip != "127.0.0.1" and ip != target_ip:
-            sdp = sdp.replace(ip, target_ip)
-    return sdp
--- a/server/reflector/ws_events.py
+++ b/server/reflector/ws_events.py
@@ -1,188 +0,0 @@
-"""Typed WebSocket event models.
-
-Defines Pydantic models with Literal discriminators for all WS events.
-Exposed via stub GET endpoints so ``pnpm openapi`` generates TS discriminated unions.
-"""
-
-from typing import Annotated, Literal, Union
-
-from pydantic import BaseModel, Discriminator
-
-from reflector.db.transcripts import (
-    TranscriptActionItems,
-    TranscriptDuration,
-    TranscriptFinalLongSummary,
-    TranscriptFinalShortSummary,
-    TranscriptFinalTitle,
-    TranscriptStatus,
-    TranscriptText,
-    TranscriptWaveform,
-)
-from reflector.utils.string import NonEmptyString
-from reflector.views.transcripts import GetTranscriptTopic
-
-# ---------------------------------------------------------------------------
-# Transcript-level event name literal
-# ---------------------------------------------------------------------------
-
-TranscriptEventName = Literal[
-    "TRANSCRIPT",
-    "TOPIC",
-    "STATUS",
-    "FINAL_TITLE",
-    "FINAL_LONG_SUMMARY",
-    "FINAL_SHORT_SUMMARY",
-    "ACTION_ITEMS",
-    "DURATION",
-    "WAVEFORM",
-]
-
-# ---------------------------------------------------------------------------
-# Transcript-level WS event wrappers
-# ---------------------------------------------------------------------------
-
-
-class TranscriptWsTranscript(BaseModel):
-    event: Literal["TRANSCRIPT"] = "TRANSCRIPT"
-    data: TranscriptText
-
-
-class TranscriptWsTopic(BaseModel):
-    event: Literal["TOPIC"] = "TOPIC"
-    data: GetTranscriptTopic
-
-
-class TranscriptWsStatusData(BaseModel):
-    value: TranscriptStatus
-
-
-class TranscriptWsStatus(BaseModel):
-    event: Literal["STATUS"] = "STATUS"
-    data: TranscriptWsStatusData
-
-
-class TranscriptWsFinalTitle(BaseModel):
-    event: Literal["FINAL_TITLE"] = "FINAL_TITLE"
-    data: TranscriptFinalTitle
-
-
-class TranscriptWsFinalLongSummary(BaseModel):
-    event: Literal["FINAL_LONG_SUMMARY"] = "FINAL_LONG_SUMMARY"
-    data: TranscriptFinalLongSummary
-
-
-class TranscriptWsFinalShortSummary(BaseModel):
-    event: Literal["FINAL_SHORT_SUMMARY"] = "FINAL_SHORT_SUMMARY"
-    data: TranscriptFinalShortSummary
-
-
-class TranscriptWsActionItems(BaseModel):
-    event: Literal["ACTION_ITEMS"] = "ACTION_ITEMS"
-    data: TranscriptActionItems
-
-
-class TranscriptWsDuration(BaseModel):
-    event: Literal["DURATION"] = "DURATION"
-    data: TranscriptDuration
-
-
-class TranscriptWsWaveform(BaseModel):
-    event: Literal["WAVEFORM"] = "WAVEFORM"
-    data: TranscriptWaveform
-
-
-TranscriptWsEvent = Annotated[
-    Union[
-        TranscriptWsTranscript,
-        TranscriptWsTopic,
-        TranscriptWsStatus,
-        TranscriptWsFinalTitle,
-        TranscriptWsFinalLongSummary,
-        TranscriptWsFinalShortSummary,
-        TranscriptWsActionItems,
-        TranscriptWsDuration,
-        TranscriptWsWaveform,
-    ],
-    Discriminator("event"),
-]
-
-# ---------------------------------------------------------------------------
-# User-level event name literal
-# ---------------------------------------------------------------------------
-
-UserEventName = Literal[
-    "TRANSCRIPT_CREATED",
-    "TRANSCRIPT_DELETED",
-    "TRANSCRIPT_STATUS",
-    "TRANSCRIPT_FINAL_TITLE",
-    "TRANSCRIPT_DURATION",
-]
-
-# ---------------------------------------------------------------------------
-# User-level WS event data models
-# ---------------------------------------------------------------------------
-
-
-class UserTranscriptCreatedData(BaseModel):
-    id: NonEmptyString
-
-
-class UserTranscriptDeletedData(BaseModel):
-    id: NonEmptyString
-
-
-class UserTranscriptStatusData(BaseModel):
-    id: NonEmptyString
-    value: TranscriptStatus
-
-
-class UserTranscriptFinalTitleData(BaseModel):
-    id: NonEmptyString
-    title: NonEmptyString
-
-
-class UserTranscriptDurationData(BaseModel):
-    id: NonEmptyString
-    duration: float
-
-
-# ---------------------------------------------------------------------------
-# User-level WS event wrappers
-# ---------------------------------------------------------------------------
-
-
-class UserWsTranscriptCreated(BaseModel):
-    event: Literal["TRANSCRIPT_CREATED"] = "TRANSCRIPT_CREATED"
-    data: UserTranscriptCreatedData
-
-
-class UserWsTranscriptDeleted(BaseModel):
-    event: Literal["TRANSCRIPT_DELETED"] = "TRANSCRIPT_DELETED"
-    data: UserTranscriptDeletedData
-
-
-class UserWsTranscriptStatus(BaseModel):
-    event: Literal["TRANSCRIPT_STATUS"] = "TRANSCRIPT_STATUS"
-    data: UserTranscriptStatusData
-
-
-class UserWsTranscriptFinalTitle(BaseModel):
-    event: Literal["TRANSCRIPT_FINAL_TITLE"] = "TRANSCRIPT_FINAL_TITLE"
-    data: UserTranscriptFinalTitleData
-
-
-class UserWsTranscriptDuration(BaseModel):
-    event: Literal["TRANSCRIPT_DURATION"] = "TRANSCRIPT_DURATION"
-    data: UserTranscriptDurationData
-
-
-UserWsEvent = Annotated[
-    Union[
-        UserWsTranscriptCreated,
-        UserWsTranscriptDeleted,
-        UserWsTranscriptStatus,
-        UserWsTranscriptFinalTitle,
-        UserWsTranscriptDuration,
-    ],
-    Discriminator("event"),
-]
--- a/server/tests/test_hatchet_payload_thinning.py
+++ b/server/tests/test_hatchet_payload_thinning.py
@@ -0,0 +1,185 @@
+"""
+Tests for Hatchet payload thinning optimizations.
+
+Verifies that:
+1. TopicChunkInput no longer carries words
+2. TopicChunkResult no longer carries words
+3. words_to_segments() matches Transcript.as_segments(is_multitrack=False) — behavioral equivalence
+   for the extract_subjects refactoring
+4. TopicsResult can be constructed with empty transcript words
+"""
+
+from reflector.hatchet.workflows.models import TopicChunkResult
+from reflector.hatchet.workflows.topic_chunk_processing import TopicChunkInput
+from reflector.processors.types import Word
+
+
+def _make_words(speaker: int = 0, start: float = 0.0) -> list[Word]:
+    return [
+        Word(text="Hello", start=start, end=start + 0.5, speaker=speaker),
+        Word(text=" world.", start=start + 0.5, end=start + 1.0, speaker=speaker),
+    ]
+
+
+class TestTopicChunkInputNoWords:
+    """TopicChunkInput must not have a words field."""
+
+    def test_no_words_field(self):
+        assert "words" not in TopicChunkInput.model_fields
+
+    def test_construction_without_words(self):
+        inp = TopicChunkInput(
+            chunk_index=0, chunk_text="Hello world.", timestamp=0.0, duration=1.0
+        )
+        assert inp.chunk_index == 0
+        assert inp.chunk_text == "Hello world."
+
+    def test_rejects_words_kwarg(self):
+        """Passing words= should raise a validation error (field doesn't exist)."""
+        import pydantic
+
+        try:
+            TopicChunkInput(
+                chunk_index=0,
+                chunk_text="text",
+                timestamp=0.0,
+                duration=1.0,
+                words=_make_words(),
+            )
+            # If pydantic is configured to ignore extra, this won't raise.
+            # Verify the field is still absent from the model.
+            assert "words" not in TopicChunkInput.model_fields
+        except pydantic.ValidationError:
+            pass  # Expected
+
+
+class TestTopicChunkResultNoWords:
+    """TopicChunkResult must not have a words field."""
+
+    def test_no_words_field(self):
+        assert "words" not in TopicChunkResult.model_fields
+
+    def test_construction_without_words(self):
+        result = TopicChunkResult(
+            chunk_index=0,
+            title="Test",
+            summary="Summary",
+            timestamp=0.0,
+            duration=1.0,
+        )
+        assert result.title == "Test"
+        assert result.chunk_index == 0
+
+    def test_serialization_roundtrip(self):
+        """Serialized TopicChunkResult has no words key."""
+        result = TopicChunkResult(
+            chunk_index=0,
+            title="Test",
+            summary="Summary",
+            timestamp=0.0,
+            duration=1.0,
+        )
+        data = result.model_dump()
+        assert "words" not in data
+        reconstructed = TopicChunkResult(**data)
+        assert reconstructed == result
+
+
+class TestWordsToSegmentsBehavioralEquivalence:
+    """words_to_segments() must produce same output as Transcript.as_segments(is_multitrack=False).
+
+    This ensures the extract_subjects refactoring (from task output topic.transcript.as_segments()
+    to words_to_segments(db_topic.words)) preserves identical behavior.
+    """
+
+    def test_single_speaker(self):
+        from reflector.processors.types import Transcript as TranscriptType
+        from reflector.processors.types import words_to_segments
+
+        words = _make_words(speaker=0)
+        direct = words_to_segments(words)
+        via_transcript = TranscriptType(words=words).as_segments(is_multitrack=False)
+
+        assert len(direct) == len(via_transcript)
+        for d, v in zip(direct, via_transcript):
+            assert d.text == v.text
+            assert d.speaker == v.speaker
+            assert d.start == v.start
+            assert d.end == v.end
+
+    def test_multiple_speakers(self):
+        from reflector.processors.types import Transcript as TranscriptType
+        from reflector.processors.types import words_to_segments
+
+        words = [
+            Word(text="Hello", start=0.0, end=0.5, speaker=0),
+            Word(text=" world.", start=0.5, end=1.0, speaker=0),
+            Word(text=" How", start=1.0, end=1.5, speaker=1),
+            Word(text=" are", start=1.5, end=2.0, speaker=1),
+            Word(text=" you?", start=2.0, end=2.5, speaker=1),
+        ]
+
+        direct = words_to_segments(words)
+        via_transcript = TranscriptType(words=words).as_segments(is_multitrack=False)
+
+        assert len(direct) == len(via_transcript)
+        for d, v in zip(direct, via_transcript):
+            assert d.text == v.text
+            assert d.speaker == v.speaker
+
+    def test_empty_words(self):
+        from reflector.processors.types import Transcript as TranscriptType
+        from reflector.processors.types import words_to_segments
+
+        assert words_to_segments([]) == []
+        assert TranscriptType(words=[]).as_segments(is_multitrack=False) == []
+
+
+class TestTopicsResultEmptyWords:
+    """TopicsResult can carry topics with empty transcript words."""
+
+    def test_construction_with_empty_words(self):
+        from reflector.hatchet.workflows.models import TopicsResult
+        from reflector.processors.types import TitleSummary
+        from reflector.processors.types import Transcript as TranscriptType
+
+        topics = [
+            TitleSummary(
+                title="Topic A",
+                summary="Summary A",
+                timestamp=0.0,
+                duration=5.0,
+                transcript=TranscriptType(words=[]),
+            ),
+            TitleSummary(
+                title="Topic B",
+                summary="Summary B",
+                timestamp=5.0,
+                duration=5.0,
+                transcript=TranscriptType(words=[]),
+            ),
+        ]
+        result = TopicsResult(topics=topics)
+        assert len(result.topics) == 2
+        for t in result.topics:
+            assert t.transcript.words == []
+
+    def test_serialization_roundtrip(self):
+        from reflector.hatchet.workflows.models import TopicsResult
+        from reflector.processors.types import TitleSummary
+        from reflector.processors.types import Transcript as TranscriptType
+
+        topics = [
+            TitleSummary(
+                title="Topic",
+                summary="Summary",
+                timestamp=0.0,
+                duration=1.0,
+                transcript=TranscriptType(words=[]),
+            )
+        ]
+        result = TopicsResult(topics=topics)
+        data = result.model_dump()
+        reconstructed = TopicsResult(**data)
+        assert len(reconstructed.topics) == 1
+        assert reconstructed.topics[0].transcript.words == []
--- a/www/.env.selfhosted.example
+++ b/www/.env.selfhosted.example
@@ -1,49 +0,0 @@
-# =======================================================
-# Reflector Self-Hosted Production — Frontend Configuration
-# Generated by: ./scripts/setup-selfhosted.sh
-# =======================================================
-
-# Site URL — set to your domain or server IP
-# The setup script auto-detects this on Linux.
-SITE_URL=https://localhost
-NEXTAUTH_URL=https://localhost
-NEXTAUTH_SECRET=changeme-generate-a-secure-random-string
-
-# API URLs
-# Public-facing (what the browser uses):
-API_URL=https://localhost
-WEBSOCKET_URL=auto
-
-# Internal Docker network (server-side rendering):
-SERVER_API_URL=http://server:1250
-KV_URL=redis://redis:6379
-
-# Authentication
-# Set to true when Authentik is configured
-FEATURE_REQUIRE_LOGIN=false
-
-# Nullify auth vars when not using Authentik
-AUTHENTIK_ISSUER=
-AUTHENTIK_REFRESH_TOKEN_URL=
-
-# =======================================================
-# Authentik OAuth/OIDC (Optional)
-# Uncomment and configure when enabling authentication.
-# See docsv2/selfhosted-production.md for setup instructions.
-# =======================================================
-# FEATURE_REQUIRE_LOGIN=true
-# AUTHENTIK_ISSUER=https://authentik.example.com/application/o/reflector
-# AUTHENTIK_REFRESH_TOKEN_URL=https://authentik.example.com/application/o/token/
-# AUTHENTIK_CLIENT_ID=your-client-id
-# AUTHENTIK_CLIENT_SECRET=your-client-secret
-
-# =======================================================
-# Feature Flags
-# =======================================================
-# FEATURE_ROOMS=true
-# FEATURE_BROWSE=true
-
-# =======================================================
-# Sentry (Optional)
-# =======================================================
-# SENTRY_DSN=
--- a/www/app/(app)/transcripts/useWebSockets.ts
+++ b/www/app/(app)/transcripts/useWebSockets.ts
@@ -1,22 +1,18 @@
 import { useEffect, useState } from "react";
 import { Topic, FinalSummary, Status } from "./webSocketTypes";
 import { useError } from "../../(errors)/errorContext";
-import type { components, operations } from "../../reflector-api";
+import type { components } from "../../reflector-api";
 type AudioWaveform = components["schemas"]["AudioWaveform"];
 type GetTranscriptSegmentTopic =
  components["schemas"]["GetTranscriptSegmentTopic"];
 import { useQueryClient } from "@tanstack/react-query";
-import { WEBSOCKET_URL } from "../../lib/apiClient";
+import { $api, WEBSOCKET_URL } from "../../lib/apiClient";
 import {
  invalidateTranscript,
  invalidateTranscriptTopics,
  invalidateTranscriptWaveform,
 } from "../../lib/apiHooks";
-import { useAuth } from "../../lib/AuthProvider";
-import { parseNonEmptyString } from "../../lib/utils";
-
-type TranscriptWsEvent =
-  operations["v1_transcript_get_websocket_events"]["responses"][200]["content"]["application/json"];
+import { NonEmptyString } from "../../lib/utils";

 export type UseWebSockets = {
  transcriptTextLive: string;
@@ -31,7 +27,6 @@ export type UseWebSockets = {
 };

 export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
-  const auth = useAuth();
  const [transcriptTextLive, setTranscriptTextLive] = useState<string>("");
  const [translateText, setTranslateText] = useState<string>("");
  const [title, setTitle] = useState<string>("");
@@ -336,168 +331,156 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
    };

    if (!transcriptId) return;
-    const tsId = parseNonEmptyString(transcriptId);

-    const MAX_RETRIES = 10;
    const url = `${WEBSOCKET_URL}/v1/transcripts/${transcriptId}/events`;
-    let ws: WebSocket | null = null;
-    let retryCount = 0;
-    let retryTimeout: ReturnType<typeof setTimeout> | null = null;
-    let intentionalClose = false;
+    let ws = new WebSocket(url);

-    const connect = () => {
-      const subprotocols = auth.accessToken
-        ? ["bearer", auth.accessToken]
-        : undefined;
-      ws = new WebSocket(url, subprotocols);
+    ws.onopen = () => {
+      console.debug("WebSocket connection opened");
+    };

-      ws.onopen = () => {
-        console.debug("WebSocket connection opened");
-        retryCount = 0;
-      };
+    ws.onmessage = (event) => {
+      const message = JSON.parse(event.data);

-      ws.onmessage = (event) => {
-        const message: TranscriptWsEvent = JSON.parse(event.data);
+      try {
+        switch (message.event) {
+          case "TRANSCRIPT":
+            const newText = (message.data.text ?? "").trim();
+            const newTranslation = (message.data.translation ?? "").trim();

-        try {
-          switch (message.event) {
-            case "TRANSCRIPT": {
-              const newText = (message.data.text ?? "").trim();
-              const newTranslation = (message.data.translation ?? "").trim();
+            if (!newText) break;

-              if (!newText) break;
+            console.debug("TRANSCRIPT event:", newText);
+            setTextQueue((prevQueue) => [...prevQueue, newText]);
+            setTranslationQueue((prevQueue) => [...prevQueue, newTranslation]);

-              console.debug("TRANSCRIPT event:", newText);
-              setTextQueue((prevQueue) => [...prevQueue, newText]);
-              setTranslationQueue((prevQueue) => [
-                ...prevQueue,
-                newTranslation,
-              ]);
+            setAccumulatedText((prevText) => prevText + " " + newText);
+            break;

-              setAccumulatedText((prevText) => prevText + " " + newText);
-              break;
-            }
-
-            case "TOPIC":
-              setTopics((prevTopics) => {
-                const topic = message.data;
-                const index = prevTopics.findIndex(
-                  (prevTopic) => prevTopic.id === topic.id,
-                );
-                if (index >= 0) {
-                  prevTopics[index] = topic;
-                  return prevTopics;
-                }
-                setAccumulatedText((prevText) =>
-                  prevText.slice(topic.transcript?.length ?? 0),
-                );
-                return [...prevTopics, topic];
-              });
-              console.debug("TOPIC event:", message.data);
-              invalidateTranscriptTopics(queryClient, tsId);
-              break;
-
-            case "FINAL_SHORT_SUMMARY":
-              console.debug("FINAL_SHORT_SUMMARY event:", message.data);
-              break;
-
-            case "FINAL_LONG_SUMMARY":
-              setFinalSummary({ summary: message.data.long_summary });
-              invalidateTranscript(queryClient, tsId);
-              break;
-
-            case "FINAL_TITLE":
-              console.debug("FINAL_TITLE event:", message.data);
-              setTitle(message.data.title);
-              invalidateTranscript(queryClient, tsId);
-              break;
-
-            case "WAVEFORM":
-              console.debug(
-                "WAVEFORM event length:",
-                message.data.waveform.length,
+          case "TOPIC":
+            setTopics((prevTopics) => {
+              const topic = message.data as Topic;
+              const index = prevTopics.findIndex(
+                (prevTopic) => prevTopic.id === topic.id,
              );
-              setWaveForm({ data: message.data.waveform });
-              invalidateTranscriptWaveform(queryClient, tsId);
-              break;
-
-            case "DURATION":
-              console.debug("DURATION event:", message.data);
-              setDuration(message.data.duration);
-              break;
-
-            case "STATUS":
-              console.log("STATUS event:", message.data);
-              if (message.data.value === "error") {
-                setError(
-                  Error("Websocket error status"),
-                  "There was an error processing this meeting.",
-                );
+              if (index >= 0) {
+                prevTopics[index] = topic;
+                return prevTopics;
              }
-              setStatus(message.data);
-              invalidateTranscript(queryClient, tsId);
-              if (message.data.value === "ended") {
-                intentionalClose = true;
-                ws?.close();
-              }
-              break;
-
-            case "ACTION_ITEMS":
-              console.debug("ACTION_ITEMS event:", message.data);
-              invalidateTranscript(queryClient, tsId);
-              break;
-
-            default: {
-              const _exhaustive: never = message;
-              console.warn(
-                `Received unknown WebSocket event: ${(_exhaustive as TranscriptWsEvent).event}`,
+              setAccumulatedText((prevText) =>
+                prevText.slice(topic.transcript.length),
              );
-            }
-          }
-        } catch (error) {
-          setError(error);
-        }
-      };

-      ws.onerror = (error) => {
-        console.error("WebSocket error:", error);
-      };
-
-      ws.onclose = (event) => {
-        console.debug("WebSocket connection closed, code:", event.code);
-        if (intentionalClose) return;
-
-        const normalCodes = [1000, 1001, 1005];
-        if (normalCodes.includes(event.code)) return;
-
-        if (retryCount < MAX_RETRIES) {
-          const delay = Math.min(1000 * Math.pow(2, retryCount), 30000);
-          console.log(
-            `WebSocket reconnecting in ${delay}ms (attempt ${retryCount + 1}/${MAX_RETRIES})`,
-          );
-          if (retryCount === 0) {
-            setError(
-              new Error("WebSocket connection lost"),
-              "Connection lost. Reconnecting...",
+              return [...prevTopics, topic];
+            });
+            console.debug("TOPIC event:", message.data);
+            // Invalidate topics query to sync with WebSocket data
+            invalidateTranscriptTopics(
+              queryClient,
+              transcriptId as NonEmptyString,
            );
-          }
-          retryCount++;
-          retryTimeout = setTimeout(connect, delay);
-        } else {
+            break;
+
+          case "FINAL_SHORT_SUMMARY":
+            console.debug("FINAL_SHORT_SUMMARY event:", message.data);
+            break;
+
+          case "FINAL_LONG_SUMMARY":
+            if (message.data) {
+              setFinalSummary(message.data);
+              // Invalidate transcript query to sync summary
+              invalidateTranscript(queryClient, transcriptId as NonEmptyString);
+            }
+            break;
+
+          case "FINAL_TITLE":
+            console.debug("FINAL_TITLE event:", message.data);
+            if (message.data) {
+              setTitle(message.data.title);
+              // Invalidate transcript query to sync title
+              invalidateTranscript(queryClient, transcriptId as NonEmptyString);
+            }
+            break;
+
+          case "WAVEFORM":
+            console.debug(
+              "WAVEFORM event length:",
+              message.data.waveform.length,
+            );
+            if (message.data) {
+              setWaveForm(message.data.waveform);
+              invalidateTranscriptWaveform(
+                queryClient,
+                transcriptId as NonEmptyString,
+              );
+            }
+            break;
+          case "DURATION":
+            console.debug("DURATION event:", message.data);
+            if (message.data) {
+              setDuration(message.data.duration);
+            }
+            break;
+
+          case "STATUS":
+            console.log("STATUS event:", message.data);
+            if (message.data.value === "error") {
+              setError(
+                Error("Websocket error status"),
+                "There was an error processing this meeting.",
+              );
+            }
+            setStatus(message.data);
+            invalidateTranscript(queryClient, transcriptId as NonEmptyString);
+            if (message.data.value === "ended") {
+              ws.close();
+            }
+            break;
+
+          default:
+            setError(
+              new Error(`Received unknown WebSocket event: ${message.event}`),
+            );
+        }
+      } catch (error) {
+        setError(error);
+      }
+    };
+
+    ws.onerror = (error) => {
+      console.error("WebSocket error:", error);
+      setError(new Error("A WebSocket error occurred."));
+    };
+
+    ws.onclose = (event) => {
+      console.debug("WebSocket connection closed");
+      switch (event.code) {
+        case 1000: // Normal Closure:
+          break;
+        case 1005: // Closure by client FF
+          break;
+        case 1001: // Navigate away
+          break;
+        case 1006: // Closed by client Chrome
+          console.warn(
+            "WebSocket closed by client, likely duplicated connection in react dev mode",
+          );
+          break;
+        default:
          setError(
            new Error(`WebSocket closed unexpectedly with code: ${event.code}`),
            "Disconnected from the server. Please refresh the page.",
          );
-        }
-      };
+          console.log(
+            "Socket is closed. Reconnect will be attempted in 1 second.",
+            event.reason,
+          );
+        // todo handle reconnect with socket.io
+      }
    };

-    connect();
-
    return () => {
-      intentionalClose = true;
-      if (retryTimeout) clearTimeout(retryTimeout);
-      ws?.close();
+      ws.close();
    };
  }, [transcriptId]);

--- a/www/app/lib/UserEventsProvider.tsx
+++ b/www/app/lib/UserEventsProvider.tsx
@@ -4,12 +4,14 @@ import React, { useEffect, useRef } from "react";
 import { useQueryClient } from "@tanstack/react-query";
 import { WEBSOCKET_URL } from "./apiClient";
 import { useAuth } from "./AuthProvider";
-import { invalidateTranscript, invalidateTranscriptLists } from "./apiHooks";
-import { parseNonEmptyString } from "./utils";
-import type { operations } from "../reflector-api";
+import { z } from "zod";
+import { invalidateTranscriptLists, TRANSCRIPT_SEARCH_URL } from "./apiHooks";

-type UserWsEvent =
-  operations["v1_user_get_websocket_events"]["responses"][200]["content"]["application/json"];
+const UserEvent = z.object({
+  event: z.string(),
+});
+
+type UserEvent = z.TypeOf<typeof UserEvent>;

 class UserEventsStore {
  private socket: WebSocket | null = null;
@@ -131,26 +133,23 @@ export function UserEventsProvider({
    if (!detachRef.current) {
      const onMessage = (event: MessageEvent) => {
        try {
-          const msg: UserWsEvent = JSON.parse(event.data);
+          const msg = UserEvent.parse(JSON.parse(event.data));
+          const eventName = msg.event;

-          switch (msg.event) {
+          const invalidateList = () => invalidateTranscriptLists(queryClient);
+
+          switch (eventName) {
            case "TRANSCRIPT_CREATED":
            case "TRANSCRIPT_DELETED":
            case "TRANSCRIPT_STATUS":
            case "TRANSCRIPT_FINAL_TITLE":
            case "TRANSCRIPT_DURATION":
-              invalidateTranscriptLists(queryClient).then(() => {});
-              invalidateTranscript(
-                queryClient,
-                parseNonEmptyString(msg.data.id),
-              ).then(() => {});
+              invalidateList().then(() => {});
+              break;
+
+            default:
+              // Ignore other content events for list updates
              break;
-            default: {
-              const _exhaustive: never = msg;
-              console.warn(
-                `Unknown user event: ${(_exhaustive as UserWsEvent).event}`,
-              );
-            }
          }
        } catch (err) {
          console.warn("Invalid user event message", event.data);
--- a/www/app/lib/apiClient.tsx
+++ b/www/app/lib/apiClient.tsx
@@ -13,33 +13,9 @@ export const API_URL = !isBuildPhase
  ? getClientEnv().API_URL
  : "http://localhost";

-/**
- * Derive a WebSocket URL from the API_URL.
- * Handles full URLs (http://host/api, https://host/api) and relative paths (/api).
- * For full URLs, ws/wss is derived from the URL's own protocol.
- * For relative URLs, ws/wss is derived from window.location.protocol.
- */
-const deriveWebSocketUrl = (apiUrl: string): string => {
-  if (typeof window === "undefined") {
-    return "ws://localhost";
-  }
-  const parsed = new URL(apiUrl, window.location.origin);
-  const wsProtocol = parsed.protocol === "https:" ? "wss:" : "ws:";
-  // Normalize: remove trailing slash from pathname
-  const pathname = parsed.pathname.replace(/\/+$/, "");
-  return `${wsProtocol}//${parsed.host}${pathname}`;
-};
-
-const resolveWebSocketUrl = (): string => {
-  if (isBuildPhase) return "ws://localhost";
-  const raw = getClientEnv().WEBSOCKET_URL;
-  if (!raw || raw === "auto") {
-    return deriveWebSocketUrl(API_URL);
-  }
-  return raw;
-};
-
-export const WEBSOCKET_URL = resolveWebSocketUrl();
+export const WEBSOCKET_URL = !isBuildPhase
+  ? getClientEnv().WEBSOCKET_URL || "ws://127.0.0.1:1250"
+  : "ws://localhost";

 export const client = createClient<paths>({
  baseUrl: API_URL,
--- a/www/app/lib/apiHooks.ts
+++ b/www/app/lib/apiHooks.ts
@@ -7,7 +7,6 @@ import type { components } from "../reflector-api";
 import { useAuth } from "./AuthProvider";
 import { MeetingId } from "./types";
 import { NonEmptyString } from "./utils";
-import type { TranscriptStatus } from "./transcript";

 /*
 * XXX error types returned from the hooks are not always correct; declared types are ValidationError but real type could be string or any other
@@ -105,12 +104,6 @@ export function useTranscriptProcess() {
  });
 }

-const ACTIVE_TRANSCRIPT_STATUSES = new Set<TranscriptStatus>([
-  "processing",
-  "uploaded",
-  "recording",
-]);
-
 export function useTranscriptGet(transcriptId: NonEmptyString | null) {
  return $api.useQuery(
    "get",
@@ -124,10 +117,6 @@ export function useTranscriptGet(transcriptId: NonEmptyString | null) {
    },
    {
      enabled: !!transcriptId,
-      refetchInterval: (query) => {
-        const status = query.state.data?.status;
-        return status && ACTIVE_TRANSCRIPT_STATUSES.has(status) ? 5000 : false;
-      },
    },
  );
 }
--- a/www/app/lib/clientEnv.ts
+++ b/www/app/lib/clientEnv.ts
@@ -2,7 +2,6 @@ import {
  assertExists,
  assertExistsAndNonEmptyString,
  NonEmptyString,
-  parseMaybeNonEmptyString,
  parseNonEmptyString,
 } from "./utils";
 import { isBuildPhase } from "./next";
@@ -75,14 +74,14 @@ export const getClientEnvServer = (): ClientEnvCommon => {
  if (isBuildPhase) {
    return {
      API_URL: getNextEnvVar("API_URL"),
-      WEBSOCKET_URL: parseMaybeNonEmptyString(process.env.WEBSOCKET_URL ?? ""),
+      WEBSOCKET_URL: getNextEnvVar("WEBSOCKET_URL"),
      ...features,
    };
  }

  clientEnv = {
    API_URL: getNextEnvVar("API_URL"),
-    WEBSOCKET_URL: parseMaybeNonEmptyString(process.env.WEBSOCKET_URL ?? ""),
+    WEBSOCKET_URL: getNextEnvVar("WEBSOCKET_URL"),
    ...features,
  };
  return clientEnv;
--- a/www/app/reflector-api.d.ts
+++ b/www/app/reflector-api.d.ts
@@ -568,10 +568,7 @@ export interface paths {
      path?: never;
      cookie?: never;
    };
-    /**
-     * Transcript WebSocket event schema
-     * @description Stub exposing the discriminated union of all transcript-level WS events for OpenAPI type generation. Real events are delivered over the WebSocket at the same path.
-     */
+    /** Transcript Get Websocket Events */
    get: operations["v1_transcript_get_websocket_events"];
    put?: never;
    post?: never;
@@ -667,26 +664,6 @@ export interface paths {
    patch?: never;
    trace?: never;
  };
-  "/v1/events": {
-    parameters: {
-      query?: never;
-      header?: never;
-      path?: never;
-      cookie?: never;
-    };
-    /**
-     * User WebSocket event schema
-     * @description Stub exposing the discriminated union of all user-level WS events for OpenAPI type generation. Real events are delivered over the WebSocket at the same path.
-     */
-    get: operations["v1_user_get_websocket_events"];
-    put?: never;
-    post?: never;
-    delete?: never;
-    options?: never;
-    head?: never;
-    patch?: never;
-    trace?: never;
-  };
  "/v1/zulip/streams": {
    parameters: {
      query?: never;
@@ -1032,8 +1009,6 @@ export interface components {
      room_name?: string | null;
      /** Audio Deleted */
      audio_deleted?: boolean | null;
-      /** Change Seq */
-      change_seq?: number | null;
    };
    /** GetTranscriptSegmentTopic */
    GetTranscriptSegmentTopic: {
@@ -1180,8 +1155,6 @@ export interface components {
      room_name?: string | null;
      /** Audio Deleted */
      audio_deleted?: boolean | null;
-      /** Change Seq */
-      change_seq?: number | null;
      /** Participants */
      participants:
        | components["schemas"]["TranscriptParticipantWithEmail"][]
@@ -1245,8 +1218,6 @@ export interface components {
      room_name?: string | null;
      /** Audio Deleted */
      audio_deleted?: boolean | null;
-      /** Change Seq */
-      change_seq?: number | null;
      /** Participants */
      participants:
        | components["schemas"]["TranscriptParticipantWithEmail"][]
@@ -1311,8 +1282,6 @@ export interface components {
      room_name?: string | null;
      /** Audio Deleted */
      audio_deleted?: boolean | null;
-      /** Change Seq */
-      change_seq?: number | null;
      /** Participants */
      participants:
        | components["schemas"]["TranscriptParticipantWithEmail"][]
@@ -1384,8 +1353,6 @@ export interface components {
      room_name?: string | null;
      /** Audio Deleted */
      audio_deleted?: boolean | null;
-      /** Change Seq */
-      change_seq?: number | null;
      /** Participants */
      participants:
        | components["schemas"]["TranscriptParticipantWithEmail"][]
@@ -1459,8 +1426,6 @@ export interface components {
      room_name?: string | null;
      /** Audio Deleted */
      audio_deleted?: boolean | null;
-      /** Change Seq */
-      change_seq?: number | null;
      /** Participants */
      participants:
        | components["schemas"]["TranscriptParticipantWithEmail"][]
@@ -1846,8 +1811,6 @@ export interface components {
       * @default 0
       */
      total_match_count: number;
-      /** Change Seq */
-      change_seq?: number | null;
    };
    /**
     * SourceKind
@@ -1914,33 +1877,6 @@ export interface components {
      /** Name */
      name: string;
    };
-    /** TranscriptActionItems */
-    TranscriptActionItems: {
-      /** Action Items */
-      action_items: {
-        [key: string]: unknown;
-      };
-    };
-    /** TranscriptDuration */
-    TranscriptDuration: {
-      /** Duration */
-      duration: number;
-    };
-    /** TranscriptFinalLongSummary */
-    TranscriptFinalLongSummary: {
-      /** Long Summary */
-      long_summary: string;
-    };
-    /** TranscriptFinalShortSummary */
-    TranscriptFinalShortSummary: {
-      /** Short Summary */
-      short_summary: string;
-    };
-    /** TranscriptFinalTitle */
-    TranscriptFinalTitle: {
-      /** Title */
-      title: string;
-    };
    /** TranscriptParticipant */
    TranscriptParticipant: {
      /** Id */
@@ -1981,113 +1917,6 @@ export interface components {
      /** End */
      end: number;
    };
-    /** TranscriptText */
-    TranscriptText: {
-      /** Text */
-      text: string;
-      /** Translation */
-      translation: string | null;
-    };
-    /** TranscriptWaveform */
-    TranscriptWaveform: {
-      /** Waveform */
-      waveform: number[];
-    };
-    /** TranscriptWsActionItems */
-    TranscriptWsActionItems: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "ACTION_ITEMS";
-      data: components["schemas"]["TranscriptActionItems"];
-    };
-    /** TranscriptWsDuration */
-    TranscriptWsDuration: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "DURATION";
-      data: components["schemas"]["TranscriptDuration"];
-    };
-    /** TranscriptWsFinalLongSummary */
-    TranscriptWsFinalLongSummary: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "FINAL_LONG_SUMMARY";
-      data: components["schemas"]["TranscriptFinalLongSummary"];
-    };
-    /** TranscriptWsFinalShortSummary */
-    TranscriptWsFinalShortSummary: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "FINAL_SHORT_SUMMARY";
-      data: components["schemas"]["TranscriptFinalShortSummary"];
-    };
-    /** TranscriptWsFinalTitle */
-    TranscriptWsFinalTitle: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "FINAL_TITLE";
-      data: components["schemas"]["TranscriptFinalTitle"];
-    };
-    /** TranscriptWsStatus */
-    TranscriptWsStatus: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "STATUS";
-      data: components["schemas"]["TranscriptWsStatusData"];
-    };
-    /** TranscriptWsStatusData */
-    TranscriptWsStatusData: {
-      /**
-       * Value
-       * @enum {string}
-       */
-      value:
-        | "idle"
-        | "uploaded"
-        | "recording"
-        | "processing"
-        | "error"
-        | "ended";
-    };
-    /** TranscriptWsTopic */
-    TranscriptWsTopic: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "TOPIC";
-      data: components["schemas"]["GetTranscriptTopic"];
-    };
-    /** TranscriptWsTranscript */
-    TranscriptWsTranscript: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "TRANSCRIPT";
-      data: components["schemas"]["TranscriptText"];
-    };
-    /** TranscriptWsWaveform */
-    TranscriptWsWaveform: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "WAVEFORM";
-      data: components["schemas"]["TranscriptWaveform"];
-    };
    /** UpdateParticipant */
    UpdateParticipant: {
      /** Speaker */
@@ -2158,109 +1987,6 @@ export interface components {
      /** Email */
      email: string | null;
    };
-    /** UserTranscriptCreatedData */
-    UserTranscriptCreatedData: {
-      /**
-       * Id
-       * @description A non-empty string
-       */
-      id: string;
-    };
-    /** UserTranscriptDeletedData */
-    UserTranscriptDeletedData: {
-      /**
-       * Id
-       * @description A non-empty string
-       */
-      id: string;
-    };
-    /** UserTranscriptDurationData */
-    UserTranscriptDurationData: {
-      /**
-       * Id
-       * @description A non-empty string
-       */
-      id: string;
-      /** Duration */
-      duration: number;
-    };
-    /** UserTranscriptFinalTitleData */
-    UserTranscriptFinalTitleData: {
-      /**
-       * Id
-       * @description A non-empty string
-       */
-      id: string;
-      /**
-       * Title
-       * @description A non-empty string
-       */
-      title: string;
-    };
-    /** UserTranscriptStatusData */
-    UserTranscriptStatusData: {
-      /**
-       * Id
-       * @description A non-empty string
-       */
-      id: string;
-      /**
-       * Value
-       * @enum {string}
-       */
-      value:
-        | "idle"
-        | "uploaded"
-        | "recording"
-        | "processing"
-        | "error"
-        | "ended";
-    };
-    /** UserWsTranscriptCreated */
-    UserWsTranscriptCreated: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "TRANSCRIPT_CREATED";
-      data: components["schemas"]["UserTranscriptCreatedData"];
-    };
-    /** UserWsTranscriptDeleted */
-    UserWsTranscriptDeleted: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "TRANSCRIPT_DELETED";
-      data: components["schemas"]["UserTranscriptDeletedData"];
-    };
-    /** UserWsTranscriptDuration */
-    UserWsTranscriptDuration: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "TRANSCRIPT_DURATION";
-      data: components["schemas"]["UserTranscriptDurationData"];
-    };
-    /** UserWsTranscriptFinalTitle */
-    UserWsTranscriptFinalTitle: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "TRANSCRIPT_FINAL_TITLE";
-      data: components["schemas"]["UserTranscriptFinalTitleData"];
-    };
-    /** UserWsTranscriptStatus */
-    UserWsTranscriptStatus: {
-      /**
-       * @description discriminator enum property added by openapi-typescript
-       * @enum {string}
-       */
-      event: "TRANSCRIPT_STATUS";
-      data: components["schemas"]["UserTranscriptStatusData"];
-    };
    /** ValidationError */
    ValidationError: {
      /** Location */
@@ -2967,8 +2693,6 @@ export interface operations {
        source_kind?: components["schemas"]["SourceKind"] | null;
        room_id?: string | null;
        search_term?: string | null;
-        change_seq_from?: number | null;
-        sort_by?: ("created_at" | "change_seq") | null;
        /** @description Page number */
        page?: number;
        /** @description Page size */
@@ -3699,16 +3423,7 @@ export interface operations {
          [name: string]: unknown;
        };
        content: {
-          "application/json":
-            | components["schemas"]["TranscriptWsTranscript"]
-            | components["schemas"]["TranscriptWsTopic"]
-            | components["schemas"]["TranscriptWsStatus"]
-            | components["schemas"]["TranscriptWsFinalTitle"]
-            | components["schemas"]["TranscriptWsFinalLongSummary"]
-            | components["schemas"]["TranscriptWsFinalShortSummary"]
-            | components["schemas"]["TranscriptWsActionItems"]
-            | components["schemas"]["TranscriptWsDuration"]
-            | components["schemas"]["TranscriptWsWaveform"];
+          "application/json": unknown;
        };
      };
      /** @description Validation Error */
@@ -3892,31 +3607,6 @@ export interface operations {
      };
    };
  };
-  v1_user_get_websocket_events: {
-    parameters: {
-      query?: never;
-      header?: never;
-      path?: never;
-      cookie?: never;
-    };
-    requestBody?: never;
-    responses: {
-      /** @description Successful Response */
-      200: {
-        headers: {
-          [name: string]: unknown;
-        };
-        content: {
-          "application/json":
-            | components["schemas"]["UserWsTranscriptCreated"]
-            | components["schemas"]["UserWsTranscriptDeleted"]
-            | components["schemas"]["UserWsTranscriptStatus"]
-            | components["schemas"]["UserWsTranscriptFinalTitle"]
-            | components["schemas"]["UserWsTranscriptDuration"];
-        };
-      };
-    };
-  };
  v1_zulip_get_streams: {
    parameters: {
      query?: never;
--- a/yarn.lock
+++ b/yarn.lock
@@ -1,4 +0,0 @@
-# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
-# yarn lockfile v1
-
-