diff --git a/.gitignore b/.gitignore index 2cebdf5c..d6532d82 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ www/.env.production docs/pnpm-lock.yaml .secrets opencode.json + +vibedocs/ diff --git a/Caddyfile.selfhosted.example b/Caddyfile.selfhosted.example new file mode 100644 index 00000000..4abb2762 --- /dev/null +++ b/Caddyfile.selfhosted.example @@ -0,0 +1,25 @@ +# Reflector self-hosted production — HTTPS via Caddy reverse proxy +# Copy to Caddyfile: cp Caddyfile.selfhosted.example Caddyfile +# Run: ./scripts/setup-selfhosted.sh --ollama-gpu --garage --caddy +# +# DOMAIN defaults to localhost (self-signed cert). +# Set to your real domain for automatic Let's Encrypt: +# export DOMAIN=reflector.example.com +# +# TLS_MODE defaults to "internal" (self-signed). +# Set to "" for automatic Let's Encrypt (requires real domain + ports 80/443 open): +# export TLS_MODE="" + +{$DOMAIN:localhost} { + tls {$TLS_MODE:internal} + + handle /v1/* { + reverse_proxy server:1250 + } + handle /health { + reverse_proxy server:1250 + } + handle { + reverse_proxy web:3000 + } +} diff --git a/docker-compose.selfhosted.yml b/docker-compose.selfhosted.yml new file mode 100644 index 00000000..a6830b13 --- /dev/null +++ b/docker-compose.selfhosted.yml @@ -0,0 +1,315 @@ +# Self-hosted production Docker Compose — single file for everything. +# +# Usage: ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy +# or: docker compose -f docker-compose.selfhosted.yml --profile gpu [--profile ollama-gpu] [--profile garage] [--profile caddy] up -d +# +# Specialized models (pick ONE — required): +# --profile gpu NVIDIA GPU for transcription/diarization/translation +# --profile cpu CPU-only for transcription/diarization/translation +# +# Local LLM (optional — for summarization/topics): +# --profile ollama-gpu Local Ollama with NVIDIA GPU +# --profile ollama-cpu Local Ollama on CPU only +# +# Other optional services: +# --profile garage Local S3-compatible storage (Garage) +# --profile caddy Reverse proxy with auto-SSL +# +# Prerequisites: +# 1. Run ./scripts/setup-selfhosted.sh to generate env files and secrets +# 2. Or manually create server/.env and www/.env from the .selfhosted.example templates + +services: + # =========================================================== + # Always-on core services (no profile required) + # =========================================================== + + server: + build: + context: ./server + dockerfile: Dockerfile + image: monadicalsas/reflector-backend:latest + restart: unless-stopped + ports: + - "127.0.0.1:1250:1250" + - "50000-50100:50000-50100/udp" + env_file: + - ./server/.env + environment: + ENTRYPOINT: server + # Docker-internal overrides (always correct inside compose network) + DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector + REDIS_HOST: redis + CELERY_BROKER_URL: redis://redis:6379/1 + CELERY_RESULT_BACKEND: redis://redis:6379/1 + HATCHET_CLIENT_SERVER_URL: "" + HATCHET_CLIENT_HOST_PORT: "" + # Specialized models via gpu/cpu container (aliased as "transcription") + TRANSCRIPT_BACKEND: modal + TRANSCRIPT_URL: http://transcription:8000 + TRANSCRIPT_MODAL_API_KEY: selfhosted + DIARIZATION_BACKEND: modal + DIARIZATION_URL: http://transcription:8000 + TRANSLATION_BACKEND: modal + TRANSLATE_URL: http://transcription:8000 + # WebRTC: fixed UDP port range for ICE candidates (mapped above) + WEBRTC_PORT_RANGE: "50000-50100" + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_started + volumes: + - server_data:/app/data + + worker: + build: + context: ./server + dockerfile: Dockerfile + image: monadicalsas/reflector-backend:latest + restart: unless-stopped + env_file: + - ./server/.env + environment: + ENTRYPOINT: worker + DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector + REDIS_HOST: redis + CELERY_BROKER_URL: redis://redis:6379/1 + CELERY_RESULT_BACKEND: redis://redis:6379/1 + HATCHET_CLIENT_SERVER_URL: "" + HATCHET_CLIENT_HOST_PORT: "" + TRANSCRIPT_BACKEND: modal + TRANSCRIPT_URL: http://transcription:8000 + TRANSCRIPT_MODAL_API_KEY: selfhosted + DIARIZATION_BACKEND: modal + DIARIZATION_URL: http://transcription:8000 + TRANSLATION_BACKEND: modal + TRANSLATE_URL: http://transcription:8000 + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_started + volumes: + - server_data:/app/data + + beat: + build: + context: ./server + dockerfile: Dockerfile + image: monadicalsas/reflector-backend:latest + restart: unless-stopped + env_file: + - ./server/.env + environment: + ENTRYPOINT: beat + DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector + REDIS_HOST: redis + CELERY_BROKER_URL: redis://redis:6379/1 + CELERY_RESULT_BACKEND: redis://redis:6379/1 + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_started + + web: + build: + context: ./www + dockerfile: Dockerfile + image: monadicalsas/reflector-frontend:latest + restart: unless-stopped + ports: + - "127.0.0.1:3000:3000" + env_file: + - ./www/.env + environment: + NODE_ENV: production + SERVER_API_URL: http://server:1250 + KV_URL: redis://redis:6379 + KV_USE_TLS: "false" + AUTHENTIK_ISSUER: "" + AUTHENTIK_REFRESH_TOKEN_URL: "" + depends_on: + - redis + + redis: + image: redis:7.2-alpine + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 30s + timeout: 3s + retries: 3 + volumes: + - redis_data:/data + + postgres: + image: postgres:17-alpine + restart: unless-stopped + environment: + POSTGRES_USER: reflector + POSTGRES_PASSWORD: reflector + POSTGRES_DB: reflector + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U reflector"] + interval: 30s + timeout: 3s + retries: 3 + + # =========================================================== + # Specialized model containers (transcription, diarization, translation) + # Both gpu and cpu get alias "transcription" so server config never changes. + # =========================================================== + + gpu: + build: + context: ./gpu/self_hosted + dockerfile: Dockerfile + profiles: [gpu] + restart: unless-stopped + ports: + - "127.0.0.1:8000:8000" + environment: + HF_TOKEN: ${HF_TOKEN:-} + volumes: + - gpu_cache:/root/.cache + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/docs"] + interval: 15s + timeout: 5s + retries: 10 + start_period: 120s + networks: + default: + aliases: + - transcription + + cpu: + build: + context: ./gpu/self_hosted + dockerfile: Dockerfile.cpu + profiles: [cpu] + restart: unless-stopped + ports: + - "127.0.0.1:8000:8000" + environment: + HF_TOKEN: ${HF_TOKEN:-} + volumes: + - gpu_cache:/root/.cache + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/docs"] + interval: 15s + timeout: 5s + retries: 10 + start_period: 120s + networks: + default: + aliases: + - transcription + + # =========================================================== + # Ollama — local LLM for summarization & topic detection + # Only started with --ollama-gpu or --ollama-cpu modes. + # =========================================================== + + ollama: + image: ollama/ollama:latest + profiles: [ollama-gpu] + restart: unless-stopped + ports: + - "127.0.0.1:11434:11434" + volumes: + - ollama_data:/root/.ollama + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 10s + timeout: 5s + retries: 5 + + ollama-cpu: + image: ollama/ollama:latest + profiles: [ollama-cpu] + restart: unless-stopped + ports: + - "127.0.0.1:11434:11434" + volumes: + - ollama_data:/root/.ollama + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 10s + timeout: 5s + retries: 5 + + # =========================================================== + # Garage — local S3-compatible object storage (optional) + # =========================================================== + + garage: + image: dxflrs/garage:v1.1.0 + profiles: [garage] + restart: unless-stopped + ports: + - "3900:3900" # S3 API + - "3903:3903" # Admin API + volumes: + - garage_data:/var/lib/garage/data + - garage_meta:/var/lib/garage/meta + - ./data/garage.toml:/etc/garage.toml:ro + healthcheck: + test: ["CMD", "/garage", "stats"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 5s + + # =========================================================== + # Caddy — reverse proxy with automatic SSL (optional) + # Maps 80:80 and 443:443 — only exposed ports in the stack. + # =========================================================== + + caddy: + image: caddy:2-alpine + profiles: [caddy] + restart: unless-stopped + ports: + - "80:80" + - "443:443" + volumes: + - ./Caddyfile:/etc/caddy/Caddyfile:ro + - caddy_data:/data + - caddy_config:/config + depends_on: + - web + - server + +volumes: + postgres_data: + redis_data: + server_data: + gpu_cache: + garage_data: + garage_meta: + ollama_data: + caddy_data: + caddy_config: + +networks: + default: + attachable: true diff --git a/docsv2/selfhosted-architecture.md b/docsv2/selfhosted-architecture.md new file mode 100644 index 00000000..0b764d7c --- /dev/null +++ b/docsv2/selfhosted-architecture.md @@ -0,0 +1,468 @@ +# How the Self-Hosted Setup Works + +This document explains the internals of the self-hosted deployment: how the setup script orchestrates everything, how the Docker Compose profiles work, how services communicate, and how configuration flows from flags to running containers. + +> For quick-start instructions and flag reference, see [Self-Hosted Production Deployment](selfhosted-production.md). + +## Table of Contents + +- [Overview](#overview) +- [The Setup Script Step by Step](#the-setup-script-step-by-step) +- [Docker Compose Profile System](#docker-compose-profile-system) +- [Service Architecture](#service-architecture) +- [Configuration Flow](#configuration-flow) +- [Storage Architecture](#storage-architecture) +- [SSL/TLS and Reverse Proxy](#ssltls-and-reverse-proxy) +- [Build vs Pull Workflow](#build-vs-pull-workflow) +- [Background Task Processing](#background-task-processing) +- [Network and Port Layout](#network-and-port-layout) + +--- + +## Overview + +The self-hosted deployment runs the entire Reflector platform on a single server using Docker Compose. A single bash script (`scripts/setup-selfhosted.sh`) handles all configuration and orchestration. The key design principles are: + +- **One command to deploy** — flags select which features to enable +- **Idempotent** — safe to re-run without losing existing configuration +- **Profile-based composition** — Docker Compose profiles activate optional services +- **No external dependencies required** — with `--garage` and `--ollama-*`, everything runs locally + +## The Setup Script Step by Step + +The script (`scripts/setup-selfhosted.sh`) runs 7 sequential steps. Here's what each one does and why. + +### Step 0: Prerequisites + +Validates the environment before doing anything: + +- **Docker Compose V2** — checks `docker compose version` output (not the legacy `docker-compose`) +- **Docker daemon** — verifies `docker info` succeeds +- **NVIDIA GPU** — only checked when `--gpu` or `--ollama-gpu` is used; runs `nvidia-smi` to confirm drivers are installed +- **Compose file** — verifies `docker-compose.selfhosted.yml` exists at the expected path + +If any check fails, the script exits with a clear error message and remediation steps. + +### Step 1: Generate Secrets + +Creates cryptographic secrets needed by the backend and frontend: + +- **`SECRET_KEY`** — used by the FastAPI server for session signing (64 hex chars via `openssl rand -hex 32`) +- **`NEXTAUTH_SECRET`** — used by Next.js NextAuth for JWT signing + +Secrets are only generated if they don't already exist or are still set to the placeholder value `changeme`. This is what makes the script idempotent for secrets. + +### Step 2: Generate `server/.env` + +Creates or updates the backend environment file from `server/.env.selfhosted.example`. Sets: + +- **Infrastructure** — PostgreSQL URL, Redis host, Celery broker (all pointing to Docker-internal hostnames) +- **Public URLs** — `BASE_URL` and `CORS_ORIGIN` computed from the domain (if `--domain`), IP (if detected on Linux), or `localhost` +- **WebRTC** — `WEBRTC_HOST` set to the server's LAN IP so browsers can reach UDP ICE candidates +- **Specialized models** — always points to `http://transcription:8000` (the Docker network alias shared by GPU and CPU containers) +- **HuggingFace token** — prompts interactively for pyannote model access; writes to root `.env` so Docker Compose can inject it into GPU/CPU containers +- **LLM** — if `--ollama-*` is used, configures `LLM_URL` pointing to the Ollama container. Otherwise, warns that the user needs to configure an external LLM +- **Public mode** — sets `PUBLIC_MODE=true` so the app is accessible without authentication by default + +The script uses `env_set` for each variable, which either updates an existing line or appends a new one. This means re-running the script updates values in-place without duplicating keys. + +### Step 3: Generate `www/.env` + +Creates or updates the frontend environment file from `www/.env.selfhosted.example`. Sets: + +- **`SITE_URL` / `NEXTAUTH_URL` / `API_URL`** — all set to the same public-facing URL (with `https://` if Caddy is enabled) +- **`WEBSOCKET_URL`** — set to `auto`, which tells the frontend to derive the WebSocket URL from the page URL automatically +- **`SERVER_API_URL`** — always `http://server:1250` (Docker-internal, used for server-side rendering) +- **`KV_URL`** — Redis URL for Next.js caching +- **`FEATURE_REQUIRE_LOGIN`** — `false` by default (matches `PUBLIC_MODE=true` on the backend) + +### Step 4: Storage Setup + +Branches based on whether `--garage` was passed: + +**With `--garage` (local S3):** + +1. Generates `data/garage.toml` from a template, injecting a random RPC secret +2. Starts only the Garage container (`docker compose --profile garage up -d garage`) +3. Waits for the Garage admin API to respond on port 3903 +4. Assigns the node to a storage layout (1GB capacity, zone `dc1`) +5. Creates the `reflector-media` bucket +6. Creates an access key named `reflector` and grants it read/write on the bucket +7. Writes all S3 credentials (`ENDPOINT_URL`, `BUCKET_NAME`, `REGION`, `ACCESS_KEY_ID`, `SECRET_ACCESS_KEY`) to `server/.env` + +The Garage endpoint is `http://garage:3900` (Docker-internal), and the region is set to `garage` (arbitrary, Garage ignores it). The boto3 client uses path-style addressing when an endpoint URL is configured, which is required for S3-compatible services like Garage. + +**Without `--garage` (external S3):** + +1. Checks `server/.env` for the four required S3 variables +2. If any are missing, prompts interactively for each one +3. Optionally prompts for an endpoint URL (for MinIO, Backblaze B2, etc.) + +### Step 5: Caddyfile + +Only runs when `--caddy` or `--domain` is used. Generates a Caddy configuration file: + +**With `--domain`:** Creates a named site block (`reflector.example.com { ... }`). Caddy automatically provisions a Let's Encrypt certificate for this domain. Requires DNS pointing to the server and ports 80/443 open. + +**Without `--domain` (IP access):** Creates a catch-all `:443 { tls internal ... }` block. Caddy generates a self-signed certificate. Browsers will show a security warning. + +Both configurations route: +- `/v1/*` and `/health` to the backend (`server:1250`) +- Everything else to the frontend (`web:3000`) + +### Step 6: Start Services + +1. **Always builds the GPU/CPU model image** — these are never prebuilt because they contain ML model download logic specific to the host's hardware +2. **With `--build`:** Also builds backend (server, worker, beat) and frontend (web) images from source +3. **Without `--build`:** Pulls prebuilt images from the Docker registry (`monadicalsas/reflector-backend:latest`, `monadicalsas/reflector-frontend:latest`) +4. **Starts all services** — `docker compose up -d` with the active profiles +5. **Quick sanity check** — after 3 seconds, checks for any containers that exited immediately + +### Step 7: Health Checks + +Waits for each service in order, with generous timeouts: + +| Service | Check | Timeout | Notes | +|---------|-------|---------|-------| +| GPU/CPU models | `curl http://localhost:8000/docs` | 10 min (120 x 5s) | First start downloads ~1GB of models | +| Ollama | `curl http://localhost:11434/api/tags` | 3 min (60 x 3s) | Then pulls the selected model | +| Server API | `curl http://localhost:1250/health` | 7.5 min (90 x 5s) | First start runs database migrations | +| Frontend | `curl http://localhost:3000` | 1.5 min (30 x 3s) | Next.js build on first start | +| Caddy | `curl -k https://localhost` | Quick check | After other services are up | + +If the server container exits during the health check, the script dumps diagnostics (container statuses + logs) before exiting. + +After the Ollama health check passes, the script checks if the selected model is already pulled. If not, it runs `ollama pull ` inside the container. + +--- + +## Docker Compose Profile System + +The compose file (`docker-compose.selfhosted.yml`) uses Docker Compose profiles to make services optional. Only services whose profiles match the active `--profile` flags are started. + +### Always-on Services (no profile) + +These start regardless of which flags you pass: + +| Service | Role | Image | +|---------|------|-------| +| `server` | FastAPI backend, API endpoints, WebRTC | `monadicalsas/reflector-backend:latest` | +| `worker` | Celery worker for background processing | Same image, `ENTRYPOINT=worker` | +| `beat` | Celery beat scheduler for periodic tasks | Same image, `ENTRYPOINT=beat` | +| `web` | Next.js frontend | `monadicalsas/reflector-frontend:latest` | +| `redis` | Message broker + caching | `redis:7.2-alpine` | +| `postgres` | Primary database | `postgres:17-alpine` | + +### Profile-Based Services + +| Profile | Service | Role | +|---------|---------|------| +| `gpu` | `gpu` | NVIDIA GPU-accelerated transcription/diarization/translation | +| `cpu` | `cpu` | CPU-only transcription/diarization/translation | +| `ollama-gpu` | `ollama` | Local Ollama LLM with GPU | +| `ollama-cpu` | `ollama-cpu` | Local Ollama LLM on CPU | +| `garage` | `garage` | Local S3-compatible object storage | +| `caddy` | `caddy` | Reverse proxy with SSL | + +### The "transcription" Alias + +Both the `gpu` and `cpu` services define a Docker network alias of `transcription`. This means the backend always connects to `http://transcription:8000` regardless of which profile is active. The alias is defined in the compose file's `networks.default.aliases` section. + +--- + +## Service Architecture + +``` + ┌─────────────┐ + Internet ────────>│ Caddy │ :80/:443 (profile: caddy) + └──────┬──────┘ + │ + ┌────────────┼────────────┐ + │ │ │ + v v │ + ┌─────────┐ ┌─────────┐ │ + │ web │ │ server │ │ + │ :3000 │ │ :1250 │ │ + └─────────┘ └────┬────┘ │ + │ │ + ┌────┴────┐ │ + │ worker │ │ + │ beat │ │ + └────┬────┘ │ + │ │ + ┌──────────────┼────────────┤ + │ │ │ + v v v + ┌───────────┐ ┌─────────┐ ┌─────────┐ + │transcription│ │postgres │ │ redis │ + │ (gpu/cpu) │ │ :5432 │ │ :6379 │ + │ :8000 │ └─────────┘ └─────────┘ + └───────────┘ + │ + ┌─────┴─────┐ ┌─────────┐ + │ ollama │ │ garage │ + │(optional) │ │(optional│ + │ :11434 │ │ S3) │ + └───────────┘ └─────────┘ +``` + +### How Services Interact + +1. **User request** hits Caddy (if enabled), which routes to `web` (pages) or `server` (API) +2. **`web`** renders pages server-side using `SERVER_API_URL=http://server:1250` and client-side using the public `API_URL` +3. **`server`** handles API requests, file uploads, WebRTC streaming. Dispatches background work to Celery via Redis +4. **`worker`** picks up Celery tasks (transcription pipelines, audio processing). Calls `transcription:8000` for ML inference and uploads results to S3 storage +5. **`beat`** schedules periodic tasks (cleanup, webhook retries) by pushing them onto the Celery queue +6. **`transcription` (gpu/cpu)** runs Whisper/Parakeet (transcription), Pyannote (diarization), and translation models. Stateless HTTP API +7. **`ollama`** provides an OpenAI-compatible API for summarization and topic detection. Called by the worker during post-processing +8. **`garage`** provides S3-compatible storage for audio files and processed results. Accessed by the worker via boto3 + +--- + +## Configuration Flow + +Environment variables flow through multiple layers. Understanding this prevents confusion when debugging: + +``` +Flags (--gpu, --garage, etc.) + │ + ├── setup-selfhosted.sh interprets flags + │ │ + │ ├── Writes server/.env (backend config) + │ ├── Writes www/.env (frontend config) + │ ├── Writes .env (HF_TOKEN for compose interpolation) + │ └── Writes Caddyfile (proxy routes) + │ + └── docker-compose.selfhosted.yml reads: + ├── env_file: ./server/.env (loaded into server, worker, beat) + ├── env_file: ./www/.env (loaded into web) + ├── .env (compose variable interpolation, e.g. ${HF_TOKEN}) + └── environment: {...} (hardcoded overrides, always win over env_file) +``` + +### Precedence Rules + +Docker Compose `environment:` keys **always override** `env_file:` values. This is by design — the compose file hardcodes infrastructure values that must be correct inside the Docker network (like `DATABASE_URL=postgresql+asyncpg://...@postgres:5432/...`) regardless of what's in `server/.env`. + +The `server/.env` file is still useful for: +- Values not overridden in the compose file (LLM config, storage credentials, auth settings) +- Running the server outside Docker during development + +### The Three `.env` Files + +| File | Used By | Contains | +|------|---------|----------| +| `server/.env` | server, worker, beat | Backend config: database, Redis, S3, LLM, auth, public URLs | +| `www/.env` | web | Frontend config: site URL, auth, feature flags | +| `.env` (root) | Docker Compose interpolation | Only `HF_TOKEN` — injected into GPU/CPU container env | + +--- + +## Storage Architecture + +All audio files and processing results are stored in S3-compatible object storage. The backend uses boto3 (via aioboto3) with automatic path-style addressing when a custom endpoint URL is configured. + +### How Garage Works + +Garage is a lightweight, self-hosted S3-compatible storage engine. In this deployment: + +- Runs as a single-node cluster with 1GB capacity allocation +- Listens on port 3900 (S3 API) and 3903 (admin API) +- Data persists in Docker volumes (`garage_data`, `garage_meta`) +- Accessed by the worker at `http://garage:3900` (Docker-internal) + +The setup script creates: +- A bucket called `reflector-media` +- An access key called `reflector` with read/write permissions on that bucket + +### Path-Style vs Virtual-Hosted Addressing + +AWS S3 uses virtual-hosted addressing by default (`bucket.s3.amazonaws.com`). S3-compatible services like Garage require path-style addressing (`endpoint/bucket`). The `AwsStorage` class detects this automatically: when `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL` is set, it configures boto3 with `addressing_style: "path"`. + +--- + +## SSL/TLS and Reverse Proxy + +### With `--domain` (Production) + +Caddy automatically obtains and renews a Let's Encrypt certificate. Requirements: +- DNS A record pointing to the server +- Ports 80 (HTTP challenge) and 443 (HTTPS) open to the internet + +The generated Caddyfile uses the domain as the site address, which triggers Caddy's automatic HTTPS. + +### Without `--domain` (Development/LAN) + +Caddy generates a self-signed certificate and listens on `:443` as a catch-all. Browsers will show a security warning that must be accepted manually. + +### Without `--caddy` (BYO Proxy) + +No ports are exposed to the internet. The services listen on `127.0.0.1` only: +- Frontend: `localhost:3000` +- Backend API: `localhost:1250` + +You can point your own reverse proxy (nginx, Traefik, etc.) at these ports. + +### WebRTC and UDP + +The server exposes UDP ports 50000-50100 for WebRTC ICE candidates. The `WEBRTC_HOST` variable tells the server which IP to advertise in ICE candidates — this must be the server's actual IP address (not a domain), because WebRTC uses UDP which doesn't go through the HTTP reverse proxy. + +--- + +## Build vs Pull Workflow + +### Default (no `--build` flag) + +``` +GPU/CPU model image: Always built from source (./gpu/self_hosted/) +Backend image: Pulled from monadicalsas/reflector-backend:latest +Frontend image: Pulled from monadicalsas/reflector-frontend:latest +``` + +The GPU/CPU image is always built because it contains hardware-specific build steps and ML model download logic. + +### With `--build` + +``` +GPU/CPU model image: Built from source (./gpu/self_hosted/) +Backend image: Built from source (./server/) +Frontend image: Built from source (./www/) +``` + +Use `--build` when: +- You've made local code changes +- The prebuilt registry images are outdated +- You want to verify the build works on your hardware + +### Rebuilding Individual Services + +```bash +# Rebuild just the backend +docker compose -f docker-compose.selfhosted.yml build server worker beat + +# Rebuild just the frontend +docker compose -f docker-compose.selfhosted.yml build web + +# Rebuild the GPU model container +docker compose -f docker-compose.selfhosted.yml build gpu + +# Force a clean rebuild (no cache) +docker compose -f docker-compose.selfhosted.yml build --no-cache server +``` + +--- + +## Background Task Processing + +### Celery Architecture + +The backend uses Celery for all background work, with Redis as the message broker: + +- **`worker`** — picks up tasks from the Redis queue and executes them +- **`beat`** — schedules periodic tasks (cron-like) by pushing them onto the queue +- **`Redis`** — acts as both message broker and result backend + +### The Audio Processing Pipeline + +When a file is uploaded, the worker runs a multi-step pipeline: + +``` +Upload → Extract Audio → Upload to S3 + │ + ┌──────┼──────┐ + │ │ │ + v v v + Transcribe Diarize Waveform + │ │ │ + └──────┼──────┘ + │ + Assemble + │ + ┌──────┼──────┐ + v v v + Topics Title Summaries + │ + Done +``` + +Transcription, diarization, and waveform generation run in parallel. After assembly, topic detection, title generation, and summarization also run in parallel. Each step calls the appropriate service (transcription container for ML, Ollama/external LLM for text generation, S3 for storage). + +### Event Loop Management + +Each Celery task runs in its own `asyncio.run()` call, which creates a fresh event loop. The `asynctask` decorator in `server/reflector/asynctask.py` handles: + +1. **Database connections** — resets the connection pool before each task (connections from a previous event loop would cause "Future attached to a different loop" errors) +2. **Redis connections** — resets the WebSocket manager singleton so Redis pub/sub reconnects on the current loop +3. **Cleanup** — disconnects the database and clears the context variable in the `finally` block + +--- + +## Network and Port Layout + +All services communicate over Docker's default bridge network. Only specific ports are exposed to the host: + +| Port | Service | Binding | Purpose | +|------|---------|---------|---------| +| 80 | Caddy | `0.0.0.0:80` | HTTP (redirect to HTTPS / Let's Encrypt challenge) | +| 443 | Caddy | `0.0.0.0:443` | HTTPS (main entry point) | +| 1250 | Server | `127.0.0.1:1250` | Backend API (localhost only) | +| 3000 | Web | `127.0.0.1:3000` | Frontend (localhost only) | +| 3900 | Garage | `0.0.0.0:3900` | S3 API (for admin/debug access) | +| 3903 | Garage | `0.0.0.0:3903` | Garage admin API | +| 8000 | GPU/CPU | `127.0.0.1:8000` | ML model API (localhost only) | +| 11434 | Ollama | `127.0.0.1:11434` | Ollama API (localhost only) | +| 50000-50100/udp | Server | `0.0.0.0:50000-50100` | WebRTC ICE candidates | + +Services bound to `127.0.0.1` are only accessible from the host itself (not from the network). Caddy is the only service exposed to the internet on standard HTTP/HTTPS ports. + +### Docker-Internal Hostnames + +Inside the Docker network, services reach each other by their compose service name: + +| Hostname | Resolves To | +|----------|-------------| +| `server` | Backend API container | +| `web` | Frontend container | +| `postgres` | PostgreSQL container | +| `redis` | Redis container | +| `transcription` | GPU or CPU container (network alias) | +| `ollama` / `ollama-cpu` | Ollama container | +| `garage` | Garage S3 container | + +--- + +## Diagnostics and Error Handling + +The setup script includes an `ERR` trap that automatically dumps diagnostics when any command fails: + +1. Lists all container statuses +2. Shows the last 30 lines of logs for any stopped/exited containers +3. Shows the last 40 lines of the specific failing service + +This means if something goes wrong during setup, you'll see the relevant logs immediately without having to run manual debug commands. + +### Common Debug Commands + +```bash +# Overall status +docker compose -f docker-compose.selfhosted.yml ps + +# Logs for a specific service +docker compose -f docker-compose.selfhosted.yml logs server --tail 50 +docker compose -f docker-compose.selfhosted.yml logs worker --tail 50 + +# Check environment inside a container +docker compose -f docker-compose.selfhosted.yml exec server env | grep TRANSCRIPT + +# Health check from inside the network +docker compose -f docker-compose.selfhosted.yml exec server curl http://localhost:1250/health + +# Check S3 storage connectivity +docker compose -f docker-compose.selfhosted.yml exec server curl http://garage:3900 + +# Database access +docker compose -f docker-compose.selfhosted.yml exec postgres psql -U reflector -c "SELECT id, status FROM transcript ORDER BY created_at DESC LIMIT 5;" + +# List files in server data directory +docker compose -f docker-compose.selfhosted.yml exec server ls -la /app/data/ +``` diff --git a/docsv2/selfhosted-production.md b/docsv2/selfhosted-production.md new file mode 100644 index 00000000..d161d5e8 --- /dev/null +++ b/docsv2/selfhosted-production.md @@ -0,0 +1,373 @@ +# Self-Hosted Production Deployment + +Deploy Reflector on a single server with everything running in Docker. Transcription, diarization, and translation use specialized ML models (Whisper/Parakeet, Pyannote); only summarization and topic detection require an LLM. + +> For a detailed walkthrough of how the setup script and infrastructure work under the hood, see [How the Self-Hosted Setup Works](selfhosted-architecture.md). + +## Prerequisites + +### Hardware +- **With GPU**: Linux server with NVIDIA GPU (8GB+ VRAM recommended), 16GB+ RAM, 50GB+ disk +- **CPU-only**: 8+ cores, 32GB+ RAM (transcription is slower but works) +- Disk space for ML models (~2GB on first run) + audio storage + +### Software +- Docker Engine 24+ with Compose V2 +- NVIDIA drivers + `nvidia-container-toolkit` (GPU modes only) +- `curl`, `openssl` (usually pre-installed) + +### Accounts & Credentials (depending on options) + +**Always recommended:** +- **HuggingFace token** — For downloading pyannote speaker diarization models. Get one at https://huggingface.co/settings/tokens and accept the model licenses: + - https://huggingface.co/pyannote/speaker-diarization-3.1 + - https://huggingface.co/pyannote/segmentation-3.0 + - The setup script will prompt for this. If skipped, diarization falls back to a public model bundle (may be less reliable). + +**LLM for summarization & topic detection (pick one):** +- **With `--ollama-gpu` or `--ollama-cpu`**: Nothing extra — Ollama runs locally and pulls the model automatically +- **Without `--ollama-*`**: An OpenAI-compatible LLM API key and endpoint. Examples: + - OpenAI: `LLM_URL=https://api.openai.com/v1`, `LLM_API_KEY=sk-...`, `LLM_MODEL=gpt-4o-mini` + - Anthropic, Together, Groq, or any OpenAI-compatible API + - A self-managed vLLM or Ollama instance elsewhere on the network + +**Object storage (pick one):** +- **With `--garage`**: Nothing extra — Garage (local S3-compatible storage) is auto-configured by the script +- **Without `--garage`**: S3-compatible storage credentials. The script will prompt for these, or you can pre-fill `server/.env`. Options include: + - **AWS S3**: Access Key ID, Secret Access Key, bucket name, region + - **MinIO**: Same credentials + `TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://your-minio:9000` + - **Any S3-compatible provider** (Backblaze B2, Cloudflare R2, DigitalOcean Spaces, etc.): same fields + custom endpoint URL + +**Optional add-ons (configure after initial setup):** +- **Daily.co** (live meeting rooms): Requires a Daily.co account (https://www.daily.co/), API key, subdomain, and an AWS S3 bucket + IAM Role for recording storage. See [Enabling Daily.co Live Rooms](#enabling-dailyco-live-rooms) below. +- **Authentik** (user authentication): Requires an Authentik instance with an OAuth2/OIDC application configured for Reflector. See [Enabling Authentication](#enabling-authentication-authentik) below. + +## Quick Start + +```bash +git clone https://github.com/Monadical-SAS/reflector.git +cd reflector + +# GPU + local Ollama LLM + local Garage storage + Caddy SSL (with domain): +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com + +# Same but without a domain (self-signed cert, access via IP): +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy + +# CPU-only (same, but slower): +./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy + +# Build from source instead of pulling prebuilt images: +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --build +``` + +That's it. The script generates env files, secrets, starts all containers, waits for health checks, and prints the URL. + +## Specialized Models (Required) + +Pick `--gpu` or `--cpu`. This determines how **transcription, diarization, and translation** run: + +| Flag | What it does | Requires | +|------|-------------|----------| +| `--gpu` | NVIDIA GPU acceleration for ML models | NVIDIA GPU + drivers + `nvidia-container-toolkit` | +| `--cpu` | CPU-only (slower but works without GPU) | 8+ cores, 32GB+ RAM recommended | + +## Local LLM (Optional) + +Optionally add `--ollama-gpu` or `--ollama-cpu` for a **local Ollama instance** that handles summarization and topic detection. If omitted, configure an external OpenAI-compatible LLM in `server/.env`. + +| Flag | What it does | Requires | +|------|-------------|----------| +| `--ollama-gpu` | Local Ollama with NVIDIA GPU acceleration | NVIDIA GPU | +| `--ollama-cpu` | Local Ollama on CPU only | Nothing extra | +| `--llm-model MODEL` | Choose which Ollama model to download (default: `qwen2.5:14b`) | `--ollama-gpu` or `--ollama-cpu` | +| *(omitted)* | User configures external LLM (OpenAI, Anthropic, etc.) | LLM API key | + +### Choosing an Ollama model + +The default model is `qwen2.5:14b` (~9GB download, good multilingual support and summary quality). Override with `--llm-model`: + +```bash +# Default (qwen2.5:14b) +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy + +# Mistral — good balance of speed and quality (~4.1GB) +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy + +# Phi-4 — smaller and faster (~9.1GB) +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model phi4 --garage --caddy + +# Llama 3.3 70B — best quality, needs 48GB+ RAM or GPU VRAM (~43GB) +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model llama3.3:70b --garage --caddy + +# Gemma 2 9B (~5.4GB) +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model gemma2 --garage --caddy + +# DeepSeek R1 8B — reasoning model, verbose but thorough summaries (~4.9GB) +./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model deepseek-r1:8b --garage --caddy +``` + +Browse all available models at https://ollama.com/library. + +### Recommended combinations + +- **`--gpu --ollama-gpu`**: Best for servers with NVIDIA GPU. Fully self-contained, no external API keys needed. +- **`--cpu --ollama-cpu`**: No GPU available but want everything self-contained. Slower but works. +- **`--gpu --ollama-cpu`**: GPU for transcription, CPU for LLM. Saves GPU VRAM for ML models. +- **`--gpu`**: Have NVIDIA GPU but prefer a cloud LLM (faster/better summaries with GPT-4, Claude, etc.). +- **`--cpu`**: No GPU, prefer cloud LLM. Slowest transcription but best summary quality. + +## Other Optional Flags + +| Flag | What it does | +|------|-------------| +| `--garage` | Starts Garage (local S3-compatible storage). Auto-configures bucket, keys, and env vars. | +| `--caddy` | Starts Caddy reverse proxy on ports 80/443 with self-signed cert. | +| `--domain DOMAIN` | Use a real domain with Let's Encrypt auto-HTTPS (implies `--caddy`). Requires DNS A record pointing to this server and ports 80/443 open. | +| `--build` | Build backend (server, worker, beat) and frontend (web) Docker images from source instead of pulling prebuilt images from the registry. Useful for development or when running a version with local changes. | + +Without `--garage`, you **must** provide S3-compatible credentials (the script will prompt interactively or you can pre-fill `server/.env`). + +Without `--caddy` or `--domain`, no ports are exposed. Point your own reverse proxy at `web:3000` (frontend) and `server:1250` (API). + +**Using a domain (recommended for production):** Point a DNS A record at your server's IP, then pass `--domain your.domain.com`. Caddy will automatically obtain and renew a Let's Encrypt certificate. Ports 80 and 443 must be open. + +**Without a domain:** `--caddy` alone uses a self-signed certificate. Browsers will show a security warning that must be accepted. + +## What the Script Does + +1. **Prerequisites check** — Docker, NVIDIA GPU (if needed), compose file exists +2. **Generate secrets** — `SECRET_KEY`, `NEXTAUTH_SECRET` via `openssl rand` +3. **Generate `server/.env`** — From template, sets infrastructure defaults, configures LLM based on mode, enables `PUBLIC_MODE` +4. **Generate `www/.env`** — Auto-detects server IP, sets URLs +5. **Storage setup** — Either initializes Garage (bucket, keys, permissions) or prompts for external S3 credentials +6. **Caddyfile** — Generates domain-specific (Let's Encrypt) or IP-specific (self-signed) configuration +7. **Build & start** — Always builds GPU/CPU model image from source. With `--build`, also builds backend and frontend from source; otherwise pulls prebuilt images from the registry +8. **Health checks** — Waits for each service, pulls Ollama model if needed, warns about missing LLM config + +> For a deeper dive into each step, see [How the Self-Hosted Setup Works](selfhosted-architecture.md). + +## Configuration Reference + +### Server Environment (`server/.env`) + +| Variable | Description | Default | +|----------|-------------|---------| +| `DATABASE_URL` | PostgreSQL connection | Auto-set (Docker internal) | +| `REDIS_HOST` | Redis hostname | Auto-set (`redis`) | +| `SECRET_KEY` | App secret | Auto-generated | +| `AUTH_BACKEND` | Authentication method | `none` | +| `PUBLIC_MODE` | Allow unauthenticated access | `true` | +| `WEBRTC_HOST` | IP advertised in WebRTC ICE candidates | Auto-detected (server IP) | +| `TRANSCRIPT_URL` | Specialized model endpoint | `http://transcription:8000` | +| `LLM_URL` | OpenAI-compatible LLM endpoint | Auto-set for Ollama modes | +| `LLM_API_KEY` | LLM API key | `not-needed` for Ollama | +| `LLM_MODEL` | LLM model name | `qwen2.5:14b` for Ollama (override with `--llm-model`) | +| `TRANSCRIPT_STORAGE_BACKEND` | Storage backend | `aws` | +| `TRANSCRIPT_STORAGE_AWS_*` | S3 credentials | Auto-set for Garage | + +### Frontend Environment (`www/.env`) + +| Variable | Description | Default | +|----------|-------------|---------| +| `SITE_URL` | Public-facing URL | Auto-detected | +| `API_URL` | API URL (browser-side) | Same as SITE_URL | +| `SERVER_API_URL` | API URL (server-side) | `http://server:1250` | +| `NEXTAUTH_SECRET` | Auth secret | Auto-generated | +| `FEATURE_REQUIRE_LOGIN` | Require authentication | `false` | + +## Storage Options + +### Garage (Recommended for Self-Hosted) + +Use `--garage` flag. The script automatically: +- Generates `data/garage.toml` with a random RPC secret +- Starts the Garage container +- Creates the `reflector-media` bucket +- Creates an access key with read/write permissions +- Writes all S3 credentials to `server/.env` + +### External S3 (AWS, MinIO, etc.) + +Don't use `--garage`. The script will prompt for: +- Access Key ID +- Secret Access Key +- Bucket Name +- Region +- Endpoint URL (for non-AWS like MinIO) + +Or pre-fill in `server/.env`: +```env +TRANSCRIPT_STORAGE_BACKEND=aws +TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID=your-key +TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY=your-secret +TRANSCRIPT_STORAGE_AWS_BUCKET_NAME=reflector-media +TRANSCRIPT_STORAGE_AWS_REGION=us-east-1 +# For non-AWS S3 (MinIO, etc.): +TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://minio:9000 +``` + +## Enabling Authentication (Authentik) + +By default, authentication is disabled (`AUTH_BACKEND=none`, `FEATURE_REQUIRE_LOGIN=false`). To enable: + +1. Deploy an Authentik instance (see [Authentik docs](https://goauthentik.io/docs/installation)) +2. Create an OAuth2/OIDC application for Reflector +3. Update `server/.env`: + ```env + AUTH_BACKEND=jwt + AUTH_JWT_AUDIENCE=your-client-id + ``` +4. Update `www/.env`: + ```env + FEATURE_REQUIRE_LOGIN=true + AUTHENTIK_ISSUER=https://authentik.example.com/application/o/reflector + AUTHENTIK_REFRESH_TOKEN_URL=https://authentik.example.com/application/o/token/ + AUTHENTIK_CLIENT_ID=your-client-id + AUTHENTIK_CLIENT_SECRET=your-client-secret + ``` +5. Restart: `docker compose -f docker-compose.selfhosted.yml down && ./scripts/setup-selfhosted.sh ` + +## Enabling Daily.co Live Rooms + +Daily.co enables real-time meeting rooms with automatic recording and transcription. + +1. Create a [Daily.co](https://www.daily.co/) account +2. Add to `server/.env`: + ```env + DEFAULT_VIDEO_PLATFORM=daily + DAILY_API_KEY=your-daily-api-key + DAILY_SUBDOMAIN=your-subdomain + DAILY_WEBHOOK_SECRET=your-webhook-secret + DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco + DAILYCO_STORAGE_AWS_REGION=us-east-1 + DAILYCO_STORAGE_AWS_ROLE_ARN=arn:aws:iam::role/DailyCoAccess + ``` +3. Restart the server: `docker compose -f docker-compose.selfhosted.yml restart server worker` + +## Enabling Real Domain with Let's Encrypt + +By default, Caddy uses self-signed certificates. For a real domain: + +1. Point your domain's DNS to your server's IP +2. Ensure ports 80 and 443 are open +3. Edit `Caddyfile`: + ``` + reflector.example.com { + handle /v1/* { + reverse_proxy server:1250 + } + handle /health { + reverse_proxy server:1250 + } + handle { + reverse_proxy web:3000 + } + } + ``` +4. Update `www/.env`: + ```env + SITE_URL=https://reflector.example.com + NEXTAUTH_URL=https://reflector.example.com + API_URL=https://reflector.example.com + ``` +5. Restart Caddy: `docker compose -f docker-compose.selfhosted.yml restart caddy web` + +## Troubleshooting + +### Check service status +```bash +docker compose -f docker-compose.selfhosted.yml ps +``` + +### View logs for a specific service +```bash +docker compose -f docker-compose.selfhosted.yml logs server --tail 50 +docker compose -f docker-compose.selfhosted.yml logs gpu --tail 50 +docker compose -f docker-compose.selfhosted.yml logs web --tail 50 +``` + +### GPU service taking too long +First start downloads ~1-2GB of ML models. Check progress: +```bash +docker compose -f docker-compose.selfhosted.yml logs gpu -f +``` + +### Server exits immediately +Usually a database migration issue. Check: +```bash +docker compose -f docker-compose.selfhosted.yml logs server --tail 50 +``` + +### Caddy certificate issues +For self-signed certs, your browser will warn. Click Advanced > Proceed. +For Let's Encrypt, ensure ports 80/443 are open and DNS is pointed correctly. + +### Summaries/topics not generating +Check LLM configuration: +```bash +grep LLM_ server/.env +``` +If you didn't use `--ollama-gpu` or `--ollama-cpu`, you must set `LLM_URL`, `LLM_API_KEY`, and `LLM_MODEL`. + +### Health check from inside containers +```bash +docker compose -f docker-compose.selfhosted.yml exec server curl http://localhost:1250/health +docker compose -f docker-compose.selfhosted.yml exec gpu curl http://localhost:8000/docs +``` + +## Updating + +```bash +# Option A: Pull latest prebuilt images and restart +docker compose -f docker-compose.selfhosted.yml down +./scripts/setup-selfhosted.sh + +# Option B: Build from source (after git pull) and restart +git pull +docker compose -f docker-compose.selfhosted.yml down +./scripts/setup-selfhosted.sh --build + +# Rebuild only the GPU/CPU model image (picks up model updates) +docker compose -f docker-compose.selfhosted.yml build gpu # or cpu +``` + +The setup script is idempotent — it won't overwrite existing secrets or env vars that are already set. + +## Architecture Overview + +``` + ┌─────────┐ + Internet ────────>│ Caddy │ :80/:443 + └────┬────┘ + │ + ┌────────────┼────────────┐ + │ │ │ + v v │ + ┌─────────┐ ┌─────────┐ │ + │ web │ │ server │ │ + │ :3000 │ │ :1250 │ │ + └─────────┘ └────┬────┘ │ + │ │ + ┌────┴────┐ │ + │ worker │ │ + │ beat │ │ + └────┬────┘ │ + │ │ + ┌──────────────┼────────────┤ + │ │ │ + v v v + ┌───────────┐ ┌─────────┐ ┌─────────┐ + │transcription│ │postgres │ │ redis │ + │(gpu/cpu) │ │ :5432 │ │ :6379 │ + │ :8000 │ └─────────┘ └─────────┘ + └───────────┘ + │ + ┌─────┴─────┐ ┌─────────┐ + │ ollama │ │ garage │ + │ (optional)│ │(optional│ + │ :11434 │ │ S3) │ + └───────────┘ └─────────┘ +``` + +All services communicate over Docker's internal network. Only Caddy (if enabled) exposes ports to the internet. diff --git a/node_modules/.yarn-integrity b/node_modules/.yarn-integrity new file mode 100644 index 00000000..0f19eb7a --- /dev/null +++ b/node_modules/.yarn-integrity @@ -0,0 +1,10 @@ +{ + "systemParams": "darwin-x64-83", + "modulesFolders": [], + "flags": [], + "linkedModules": [], + "topLevelPatterns": [], + "lockfileEntries": {}, + "files": [], + "artifacts": {} +} \ No newline at end of file diff --git a/scripts/setup-selfhosted.sh b/scripts/setup-selfhosted.sh new file mode 100755 index 00000000..f68518c0 --- /dev/null +++ b/scripts/setup-selfhosted.sh @@ -0,0 +1,945 @@ +#!/usr/bin/env bash +# +# Self-hosted production setup for Reflector. +# Single script to configure and launch everything on one server. +# +# Usage: +# ./scripts/setup-selfhosted.sh <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build] +# +# Specialized models (pick ONE — required): +# --gpu NVIDIA GPU for transcription/diarization/translation +# --cpu CPU-only for transcription/diarization/translation (slower) +# +# Local LLM (optional — for summarization & topic detection): +# --ollama-gpu Local Ollama with NVIDIA GPU acceleration +# --ollama-cpu Local Ollama on CPU only +# --llm-model MODEL Ollama model to use (default: qwen2.5:14b) +# (If omitted, configure an external OpenAI-compatible LLM in server/.env) +# +# Optional flags: +# --garage Use Garage for local S3-compatible storage +# --caddy Enable Caddy reverse proxy with auto-SSL +# --domain DOMAIN Use a real domain for Caddy (enables Let's Encrypt auto-HTTPS) +# Requires: DNS pointing to this server + ports 80/443 open +# Without --domain: Caddy uses self-signed cert for IP access +# --build Build backend and frontend images from source instead of pulling +# +# Examples: +# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy +# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com +# ./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy +# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy +# ./scripts/setup-selfhosted.sh --gpu --garage --caddy +# ./scripts/setup-selfhosted.sh --cpu +# +# Idempotent — safe to re-run at any time. +# +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +COMPOSE_FILE="$ROOT_DIR/docker-compose.selfhosted.yml" +SERVER_ENV="$ROOT_DIR/server/.env" +WWW_ENV="$ROOT_DIR/www/.env" + +OLLAMA_MODEL="qwen2.5:14b" +OS="$(uname -s)" + +# --- Colors --- +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +info() { echo -e "${CYAN}==>${NC} $*"; } +ok() { echo -e "${GREEN} ✓${NC} $*"; } +warn() { echo -e "${YELLOW} !${NC} $*"; } +err() { echo -e "${RED} ✗${NC} $*" >&2; } + +# --- Helpers --- + +dump_diagnostics() { + local failed_svc="${1:-}" + echo "" + err "========== DIAGNOSTICS ==========" + + err "Container status:" + compose_cmd ps -a --format "table {{.Name}}\t{{.Status}}" 2>/dev/null || true + echo "" + + local stopped + stopped=$(compose_cmd ps -a --format '{{.Name}}\t{{.Status}}' 2>/dev/null \ + | grep -iv 'up\|running' | awk -F'\t' '{print $1}' || true) + for c in $stopped; do + err "--- Logs for $c (exited/unhealthy) ---" + docker logs --tail 30 "$c" 2>&1 || true + echo "" + done + + if [[ -n "$failed_svc" ]]; then + err "--- Logs for $failed_svc (last 40) ---" + compose_cmd logs "$failed_svc" --tail 40 2>&1 || true + fi + + err "=================================" +} + +trap 'dump_diagnostics' ERR + +detect_lan_ip() { + case "$OS" in + Darwin) + for iface in en0 en1 en2 en3; do + local ip + ip=$(ipconfig getifaddr "$iface" 2>/dev/null || true) + if [[ -n "$ip" ]]; then + echo "$ip" + return + fi + done + ;; + Linux) + ip route get 1.1.1.1 2>/dev/null | sed -n 's/.*src \([^ ]*\).*/\1/p' + return + ;; + esac + echo "" +} + +wait_for_url() { + local url="$1" label="$2" retries="${3:-30}" interval="${4:-2}" + for i in $(seq 1 "$retries"); do + if curl -sf "$url" > /dev/null 2>&1; then + return 0 + fi + echo -ne "\r Waiting for $label... ($i/$retries)" + sleep "$interval" + done + echo "" + err "$label not responding at $url after $retries attempts" + return 1 +} + +env_has_key() { + local file="$1" key="$2" + grep -q "^${key}=" "$file" 2>/dev/null +} + +env_get() { + local file="$1" key="$2" + grep "^${key}=" "$file" 2>/dev/null | head -1 | cut -d= -f2- +} + +env_set() { + local file="$1" key="$2" value="$3" + if env_has_key "$file" "$key"; then + if [[ "$OS" == "Darwin" ]]; then + sed -i '' "s|^${key}=.*|${key}=${value}|" "$file" + else + sed -i "s|^${key}=.*|${key}=${value}|" "$file" + fi + else + echo "${key}=${value}" >> "$file" + fi +} + +compose_cmd() { + local profiles="" + for p in "${COMPOSE_PROFILES[@]}"; do + profiles="$profiles --profile $p" + done + docker compose -f "$COMPOSE_FILE" $profiles "$@" +} + +# Compose command with only garage profile (for garage-only operations before full stack start) +compose_garage_cmd() { + docker compose -f "$COMPOSE_FILE" --profile garage "$@" +} + +# --- Parse arguments --- +MODEL_MODE="" # gpu or cpu (required, mutually exclusive) +OLLAMA_MODE="" # ollama-gpu or ollama-cpu (optional) +USE_GARAGE=false +USE_CADDY=false +CUSTOM_DOMAIN="" # optional domain for Let's Encrypt HTTPS +BUILD_IMAGES=false # build backend/frontend from source + +SKIP_NEXT=false +ARGS=("$@") +for i in "${!ARGS[@]}"; do + if [[ "$SKIP_NEXT" == "true" ]]; then + SKIP_NEXT=false + continue + fi + arg="${ARGS[$i]}" + case "$arg" in + --gpu) + [[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; } + MODEL_MODE="gpu" ;; + --cpu) + [[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; } + MODEL_MODE="cpu" ;; + --ollama-gpu) + [[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; } + OLLAMA_MODE="ollama-gpu" ;; + --ollama-cpu) + [[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; } + OLLAMA_MODE="ollama-cpu" ;; + --llm-model) + next_i=$((i + 1)) + if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then + err "--llm-model requires a model name (e.g. --llm-model mistral)" + exit 1 + fi + OLLAMA_MODEL="${ARGS[$next_i]}" + SKIP_NEXT=true ;; + --garage) USE_GARAGE=true ;; + --caddy) USE_CADDY=true ;; + --build) BUILD_IMAGES=true ;; + --domain) + next_i=$((i + 1)) + if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then + err "--domain requires a domain name (e.g. --domain reflector.example.com)" + exit 1 + fi + CUSTOM_DOMAIN="${ARGS[$next_i]}" + USE_CADDY=true # --domain implies --caddy + SKIP_NEXT=true ;; + *) + err "Unknown argument: $arg" + err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]" + exit 1 + ;; + esac +done + +if [[ -z "$MODEL_MODE" ]]; then + err "No model mode specified. You must choose --gpu or --cpu." + err "" + err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--build]" + err "" + err "Specialized models (required):" + err " --gpu NVIDIA GPU for transcription/diarization/translation" + err " --cpu CPU-only (slower but works without GPU)" + err "" + err "Local LLM (optional):" + err " --ollama-gpu Local Ollama with GPU (for summarization/topics)" + err " --ollama-cpu Local Ollama on CPU (for summarization/topics)" + err " --llm-model MODEL Ollama model to download (default: qwen2.5:14b)" + err " (omit --ollama-* for external OpenAI-compatible LLM)" + err "" + err "Other options:" + err " --garage Local S3-compatible storage (Garage)" + err " --caddy Caddy reverse proxy with self-signed cert" + err " --domain DOMAIN Use a real domain with Let's Encrypt HTTPS (implies --caddy)" + err " --build Build backend/frontend images from source instead of pulling" + exit 1 +fi + +# Build profiles list — one profile per feature +COMPOSE_PROFILES=("$MODEL_MODE") +[[ -n "$OLLAMA_MODE" ]] && COMPOSE_PROFILES+=("$OLLAMA_MODE") +[[ "$USE_GARAGE" == "true" ]] && COMPOSE_PROFILES+=("garage") +[[ "$USE_CADDY" == "true" ]] && COMPOSE_PROFILES+=("caddy") + +# Derived flags +NEEDS_NVIDIA=false +[[ "$MODEL_MODE" == "gpu" ]] && NEEDS_NVIDIA=true +[[ "$OLLAMA_MODE" == "ollama-gpu" ]] && NEEDS_NVIDIA=true + +USES_OLLAMA=false +OLLAMA_SVC="" +[[ "$OLLAMA_MODE" == "ollama-gpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama" +[[ "$OLLAMA_MODE" == "ollama-cpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama-cpu" + +# Human-readable mode string for display +MODE_DISPLAY="$MODEL_MODE" +[[ -n "$OLLAMA_MODE" ]] && MODE_DISPLAY="$MODEL_MODE + $OLLAMA_MODE" + +# ========================================================= +# Step 0: Prerequisites +# ========================================================= +step_prerequisites() { + info "Step 0: Checking prerequisites" + + # Docker + if ! docker compose version 2>/dev/null | grep -qi compose; then + err "Docker Compose V2 not found." + err "Install Docker with Compose V2: https://docs.docker.com/engine/install/" + exit 1 + fi + if ! docker info &>/dev/null; then + err "Docker daemon not running." + exit 1 + fi + ok "Docker + Compose V2 ready" + + # NVIDIA GPU check + if [[ "$NEEDS_NVIDIA" == "true" ]]; then + if ! command -v nvidia-smi &>/dev/null || ! nvidia-smi &>/dev/null; then + err "NVIDIA GPU required (model=$MODEL_MODE, ollama=$OLLAMA_MODE) but nvidia-smi failed." + err "Install NVIDIA drivers and nvidia-container-toolkit." + exit 1 + fi + ok "NVIDIA GPU detected" + fi + + # Compose file + if [[ ! -f "$COMPOSE_FILE" ]]; then + err "docker-compose.selfhosted.yml not found at $COMPOSE_FILE" + err "Run this script from the repo root: ./scripts/setup-selfhosted.sh" + exit 1 + fi + + ok "Prerequisites OK (models=$MODEL_MODE, ollama=$OLLAMA_MODE, garage=$USE_GARAGE, caddy=$USE_CADDY)" +} + +# ========================================================= +# Step 1: Generate secrets +# ========================================================= +step_secrets() { + info "Step 1: Generating secrets" + + # These are used in later steps — generate once, reuse + if [[ -f "$SERVER_ENV" ]] && env_has_key "$SERVER_ENV" "SECRET_KEY"; then + SECRET_KEY=$(env_get "$SERVER_ENV" "SECRET_KEY") + if [[ "$SECRET_KEY" != "changeme"* ]]; then + ok "SECRET_KEY already set" + else + SECRET_KEY=$(openssl rand -hex 32) + fi + else + SECRET_KEY=$(openssl rand -hex 32) + fi + + if [[ -f "$WWW_ENV" ]] && env_has_key "$WWW_ENV" "NEXTAUTH_SECRET"; then + NEXTAUTH_SECRET=$(env_get "$WWW_ENV" "NEXTAUTH_SECRET") + if [[ "$NEXTAUTH_SECRET" != "changeme"* ]]; then + ok "NEXTAUTH_SECRET already set" + else + NEXTAUTH_SECRET=$(openssl rand -hex 32) + fi + else + NEXTAUTH_SECRET=$(openssl rand -hex 32) + fi + + ok "Secrets ready" +} + +# ========================================================= +# Step 2: Generate server/.env +# ========================================================= +step_server_env() { + info "Step 2: Generating server/.env" + + if [[ -f "$SERVER_ENV" ]]; then + ok "server/.env already exists — ensuring required vars" + else + cp "$ROOT_DIR/server/.env.selfhosted.example" "$SERVER_ENV" + ok "Created server/.env from template" + fi + + # Core infrastructure + env_set "$SERVER_ENV" "DATABASE_URL" "postgresql+asyncpg://reflector:reflector@postgres:5432/reflector" + env_set "$SERVER_ENV" "REDIS_HOST" "redis" + env_set "$SERVER_ENV" "CELERY_BROKER_URL" "redis://redis:6379/1" + env_set "$SERVER_ENV" "CELERY_RESULT_BACKEND" "redis://redis:6379/1" + env_set "$SERVER_ENV" "SECRET_KEY" "$SECRET_KEY" + env_set "$SERVER_ENV" "AUTH_BACKEND" "none" + env_set "$SERVER_ENV" "PUBLIC_MODE" "true" + + # Public-facing URLs + local server_base_url + if [[ -n "$CUSTOM_DOMAIN" ]]; then + server_base_url="https://$CUSTOM_DOMAIN" + elif [[ "$USE_CADDY" == "true" ]]; then + if [[ -n "$PRIMARY_IP" ]]; then + server_base_url="https://$PRIMARY_IP" + else + server_base_url="https://localhost" + fi + else + if [[ -n "$PRIMARY_IP" ]]; then + server_base_url="http://$PRIMARY_IP" + else + server_base_url="http://localhost:1250" + fi + fi + env_set "$SERVER_ENV" "BASE_URL" "$server_base_url" + env_set "$SERVER_ENV" "CORS_ORIGIN" "$server_base_url" + + # WebRTC: advertise host IP in ICE candidates so browsers can reach the server + if [[ -n "$PRIMARY_IP" ]]; then + env_set "$SERVER_ENV" "WEBRTC_HOST" "$PRIMARY_IP" + fi + + # Specialized models (always via gpu/cpu container aliased as "transcription") + env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal" + env_set "$SERVER_ENV" "TRANSCRIPT_URL" "http://transcription:8000" + env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "selfhosted" + env_set "$SERVER_ENV" "DIARIZATION_ENABLED" "true" + env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal" + env_set "$SERVER_ENV" "DIARIZATION_URL" "http://transcription:8000" + env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal" + env_set "$SERVER_ENV" "TRANSLATE_URL" "http://transcription:8000" + + # HuggingFace token for gated models (pyannote diarization) + # Written to root .env so docker compose picks it up for gpu/cpu containers + local root_env="$ROOT_DIR/.env" + local current_hf_token="${HF_TOKEN:-}" + if [[ -f "$root_env" ]] && env_has_key "$root_env" "HF_TOKEN"; then + current_hf_token=$(env_get "$root_env" "HF_TOKEN") + fi + if [[ -z "$current_hf_token" ]]; then + echo "" + warn "HF_TOKEN not set. Diarization will use a public model fallback." + warn "For best results, get a token at https://huggingface.co/settings/tokens" + warn "and accept pyannote licenses at https://huggingface.co/pyannote/speaker-diarization-3.1" + read -rp " HuggingFace token (or press Enter to skip): " current_hf_token + fi + if [[ -n "$current_hf_token" ]]; then + touch "$root_env" + env_set "$root_env" "HF_TOKEN" "$current_hf_token" + export HF_TOKEN="$current_hf_token" + ok "HF_TOKEN configured" + else + touch "$root_env" + env_set "$root_env" "HF_TOKEN" "" + ok "HF_TOKEN skipped (using public model fallback)" + fi + + # LLM configuration + if [[ "$USES_OLLAMA" == "true" ]]; then + local llm_host="$OLLAMA_SVC" + env_set "$SERVER_ENV" "LLM_URL" "http://${llm_host}:11434/v1" + env_set "$SERVER_ENV" "LLM_MODEL" "$OLLAMA_MODEL" + env_set "$SERVER_ENV" "LLM_API_KEY" "not-needed" + ok "LLM configured for local Ollama ($llm_host, model=$OLLAMA_MODEL)" + else + # Check if user already configured LLM + local current_llm_url="" + if env_has_key "$SERVER_ENV" "LLM_URL"; then + current_llm_url=$(env_get "$SERVER_ENV" "LLM_URL") + fi + if [[ -z "$current_llm_url" ]] || [[ "$current_llm_url" == "http://host.docker.internal"* ]]; then + warn "LLM not configured. Summarization and topic detection will NOT work." + warn "Edit server/.env and set LLM_URL, LLM_API_KEY, LLM_MODEL" + warn "Example: LLM_URL=https://api.openai.com/v1 LLM_MODEL=gpt-4o-mini" + else + ok "LLM already configured: $current_llm_url" + fi + fi + + ok "server/.env ready" +} + +# ========================================================= +# Step 3: Generate www/.env +# ========================================================= +step_www_env() { + info "Step 3: Generating www/.env" + + if [[ -f "$WWW_ENV" ]]; then + ok "www/.env already exists — ensuring required vars" + else + cp "$ROOT_DIR/www/.env.selfhosted.example" "$WWW_ENV" + ok "Created www/.env from template" + fi + + # Public-facing URL for frontend + local base_url + if [[ -n "$CUSTOM_DOMAIN" ]]; then + base_url="https://$CUSTOM_DOMAIN" + elif [[ "$USE_CADDY" == "true" ]]; then + if [[ -n "$PRIMARY_IP" ]]; then + base_url="https://$PRIMARY_IP" + else + base_url="https://localhost" + fi + else + # No Caddy — user's proxy handles SSL. Use http for now, they'll override. + if [[ -n "$PRIMARY_IP" ]]; then + base_url="http://$PRIMARY_IP" + else + base_url="http://localhost" + fi + fi + + env_set "$WWW_ENV" "SITE_URL" "$base_url" + env_set "$WWW_ENV" "NEXTAUTH_URL" "$base_url" + env_set "$WWW_ENV" "NEXTAUTH_SECRET" "$NEXTAUTH_SECRET" + env_set "$WWW_ENV" "API_URL" "$base_url" + env_set "$WWW_ENV" "WEBSOCKET_URL" "auto" + env_set "$WWW_ENV" "SERVER_API_URL" "http://server:1250" + env_set "$WWW_ENV" "KV_URL" "redis://redis:6379" + env_set "$WWW_ENV" "FEATURE_REQUIRE_LOGIN" "false" + + ok "www/.env ready (URL=$base_url)" +} + +# ========================================================= +# Step 4: Storage setup +# ========================================================= +step_storage() { + info "Step 4: Storage setup" + + if [[ "$USE_GARAGE" == "true" ]]; then + step_garage + else + step_external_s3 + fi +} + +step_garage() { + info "Configuring Garage (local S3)" + + # Generate garage.toml from template + local garage_toml="$ROOT_DIR/scripts/garage.toml" + local garage_runtime="$ROOT_DIR/data/garage.toml" + mkdir -p "$ROOT_DIR/data" + + if [[ -d "$garage_runtime" ]]; then + rm -rf "$garage_runtime" + fi + if [[ ! -f "$garage_runtime" ]]; then + local rpc_secret + rpc_secret=$(openssl rand -hex 32) + sed "s|__GARAGE_RPC_SECRET__|${rpc_secret}|" "$garage_toml" > "$garage_runtime" + ok "Generated data/garage.toml" + else + ok "data/garage.toml already exists" + fi + + # Start garage container only + compose_garage_cmd up -d garage + + # Wait for admin API (port 3903 exposed to host for health checks) + local garage_ready=false + for i in $(seq 1 30); do + if curl -sf http://localhost:3903/metrics > /dev/null 2>&1; then + garage_ready=true + break + fi + echo -ne "\r Waiting for Garage admin API... ($i/30)" + sleep 2 + done + echo "" + if [[ "$garage_ready" != "true" ]]; then + err "Garage not responding. Check: docker compose logs garage" + exit 1 + fi + + # Layout + local node_id + node_id=$(compose_garage_cmd exec -T garage /garage node id -q 2>/dev/null | tr -d '[:space:]') + local layout_status + layout_status=$(compose_garage_cmd exec -T garage /garage layout show 2>&1 || true) + if echo "$layout_status" | grep -q "No nodes"; then + compose_garage_cmd exec -T garage /garage layout assign "$node_id" -c 1G -z dc1 + compose_garage_cmd exec -T garage /garage layout apply --version 1 + fi + + # Bucket + if ! compose_garage_cmd exec -T garage /garage bucket info reflector-media &>/dev/null; then + compose_garage_cmd exec -T garage /garage bucket create reflector-media + fi + + # Key + local created_key=false + if compose_garage_cmd exec -T garage /garage key info reflector &>/dev/null; then + ok "Key 'reflector' already exists" + else + KEY_OUTPUT=$(compose_garage_cmd exec -T garage /garage key create reflector) + created_key=true + fi + + # Permissions + compose_garage_cmd exec -T garage /garage bucket allow reflector-media --read --write --key reflector + + # Write S3 credentials to server/.env + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_BACKEND" "aws" + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL" "http://garage:3900" + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_BUCKET_NAME" "reflector-media" + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_REGION" "garage" + if [[ "$created_key" == "true" ]]; then + local key_id key_secret + key_id=$(echo "$KEY_OUTPUT" | grep -i "key id" | awk '{print $NF}') + key_secret=$(echo "$KEY_OUTPUT" | grep -i "secret key" | awk '{print $NF}') + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID" "$key_id" + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY" "$key_secret" + fi + + ok "Garage storage ready" +} + +step_external_s3() { + info "Checking external S3 configuration" + + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_BACKEND" "aws" + + local s3_vars=("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID" "TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY" "TRANSCRIPT_STORAGE_AWS_BUCKET_NAME" "TRANSCRIPT_STORAGE_AWS_REGION") + local missing=() + + for var in "${s3_vars[@]}"; do + if ! env_has_key "$SERVER_ENV" "$var" || [[ -z "$(env_get "$SERVER_ENV" "$var")" ]]; then + missing+=("$var") + fi + done + + if [[ ${#missing[@]} -gt 0 ]]; then + warn "S3 storage is REQUIRED. The following vars are missing in server/.env:" + for var in "${missing[@]}"; do + warn " $var" + done + echo "" + info "Enter S3 credentials (or press Ctrl+C to abort and edit server/.env manually):" + echo "" + + for var in "${missing[@]}"; do + local prompt_label + case "$var" in + *ACCESS_KEY_ID) prompt_label="Access Key ID" ;; + *SECRET_ACCESS_KEY) prompt_label="Secret Access Key" ;; + *BUCKET_NAME) prompt_label="Bucket Name" ;; + *REGION) prompt_label="Region (e.g. us-east-1)" ;; + esac + local value="" + while [[ -z "$value" ]]; do + read -rp " $prompt_label: " value + done + env_set "$SERVER_ENV" "$var" "$value" + done + + # Optional: endpoint URL for non-AWS S3 + echo "" + read -rp " S3 Endpoint URL (leave empty for AWS, or enter for MinIO/etc.): " endpoint_url + if [[ -n "$endpoint_url" ]]; then + env_set "$SERVER_ENV" "TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL" "$endpoint_url" + fi + fi + + ok "S3 storage configured" +} + +# ========================================================= +# Step 5: Caddyfile +# ========================================================= +step_caddyfile() { + if [[ "$USE_CADDY" != "true" ]]; then + return + fi + + info "Step 5: Caddyfile setup" + + local caddyfile="$ROOT_DIR/Caddyfile" + if [[ -d "$caddyfile" ]]; then + rm -rf "$caddyfile" + fi + + if [[ -n "$CUSTOM_DOMAIN" ]]; then + # Real domain: Caddy auto-provisions Let's Encrypt certificate + cat > "$caddyfile" << CADDYEOF +# Generated by setup-selfhosted.sh — Let's Encrypt HTTPS for $CUSTOM_DOMAIN +$CUSTOM_DOMAIN { + handle /v1/* { + reverse_proxy server:1250 + } + handle /health { + reverse_proxy server:1250 + } + handle { + reverse_proxy web:3000 + } +} +CADDYEOF + ok "Created Caddyfile for $CUSTOM_DOMAIN (Let's Encrypt auto-HTTPS)" + elif [[ -n "$PRIMARY_IP" ]]; then + # No domain, IP only: catch-all :443 with self-signed cert + # (IP connections don't send SNI, so we can't match by address) + cat > "$caddyfile" << CADDYEOF +# Generated by setup-selfhosted.sh — self-signed cert for IP access +:443 { + tls internal + handle /v1/* { + reverse_proxy server:1250 + } + handle /health { + reverse_proxy server:1250 + } + handle { + reverse_proxy web:3000 + } +} +CADDYEOF + ok "Created Caddyfile for $PRIMARY_IP (catch-all :443 with self-signed cert)" + elif [[ ! -f "$caddyfile" ]]; then + cp "$ROOT_DIR/Caddyfile.selfhosted.example" "$caddyfile" + ok "Created Caddyfile from template" + else + ok "Caddyfile already exists" + fi +} + +# ========================================================= +# Step 6: Start services +# ========================================================= +step_services() { + info "Step 6: Starting Docker services" + + # Build GPU/CPU image from source (always needed — no prebuilt image) + local build_svc="$MODEL_MODE" + info "Building $build_svc image (first build downloads ML models, may take a while)..." + compose_cmd build "$build_svc" + ok "$build_svc image built" + + # Build or pull backend and frontend images + if [[ "$BUILD_IMAGES" == "true" ]]; then + info "Building backend image from source (server, worker, beat)..." + compose_cmd build server worker beat + ok "Backend image built" + info "Building frontend image from source..." + compose_cmd build web + ok "Frontend image built" + else + info "Pulling latest backend and frontend images..." + compose_cmd pull server web || warn "Pull failed — using cached images" + fi + + # Start all services + compose_cmd up -d + ok "Containers started" + + # Quick sanity check + sleep 3 + local exited + exited=$(compose_cmd ps -a --format '{{.Name}} {{.Status}}' 2>/dev/null \ + | grep -i 'exit' || true) + if [[ -n "$exited" ]]; then + warn "Some containers exited immediately:" + echo "$exited" | while read -r line; do warn " $line"; done + dump_diagnostics + fi +} + +# ========================================================= +# Step 7: Health checks +# ========================================================= +step_health() { + info "Step 7: Health checks" + + # Specialized model service (gpu or cpu) + local model_svc="$MODEL_MODE" + + info "Waiting for $model_svc service (first start downloads ~1GB of models)..." + local model_ok=false + for i in $(seq 1 120); do + if curl -sf http://localhost:8000/docs > /dev/null 2>&1; then + model_ok=true + break + fi + echo -ne "\r Waiting for $model_svc service... ($i/120)" + sleep 5 + done + echo "" + if [[ "$model_ok" == "true" ]]; then + ok "$model_svc service healthy (transcription + diarization)" + else + warn "$model_svc service not ready yet — it will keep loading in the background" + warn "Check with: docker compose -f docker-compose.selfhosted.yml logs $model_svc" + fi + + # Ollama (if applicable) + if [[ "$USES_OLLAMA" == "true" ]]; then + info "Waiting for Ollama service..." + local ollama_ok=false + for i in $(seq 1 60); do + if curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; then + ollama_ok=true + break + fi + echo -ne "\r Waiting for Ollama... ($i/60)" + sleep 3 + done + echo "" + if [[ "$ollama_ok" == "true" ]]; then + ok "Ollama service healthy" + + # Pull model if not present + if compose_cmd exec -T "$OLLAMA_SVC" ollama list 2>/dev/null | awk '{print $1}' | grep -qxF "$OLLAMA_MODEL"; then + ok "Model $OLLAMA_MODEL already pulled" + else + info "Pulling model $OLLAMA_MODEL (this may take a while)..." + compose_cmd exec -T "$OLLAMA_SVC" ollama pull "$OLLAMA_MODEL" + ok "Model $OLLAMA_MODEL pulled" + fi + else + warn "Ollama not ready yet. Check: docker compose logs $OLLAMA_SVC" + fi + fi + + # Server API + info "Waiting for Server API (first run includes database migrations)..." + local server_ok=false + for i in $(seq 1 90); do + local svc_status + svc_status=$(compose_cmd ps server --format '{{.Status}}' 2>/dev/null || true) + if [[ -z "$svc_status" ]] || echo "$svc_status" | grep -qi 'exit'; then + echo "" + err "Server container exited unexpectedly" + dump_diagnostics server + exit 1 + fi + if curl -sf http://localhost:1250/health > /dev/null 2>&1; then + server_ok=true + break + fi + echo -ne "\r Waiting for Server API... ($i/90)" + sleep 5 + done + echo "" + if [[ "$server_ok" == "true" ]]; then + ok "Server API healthy" + else + err "Server API not ready after ~7 minutes" + dump_diagnostics server + exit 1 + fi + + # Frontend + info "Waiting for Frontend..." + local web_ok=false + for i in $(seq 1 30); do + if curl -sf http://localhost:3000 > /dev/null 2>&1; then + web_ok=true + break + fi + echo -ne "\r Waiting for Frontend... ($i/30)" + sleep 3 + done + echo "" + if [[ "$web_ok" == "true" ]]; then + ok "Frontend healthy" + else + warn "Frontend not responding. Check: docker compose logs web" + fi + + # Caddy + if [[ "$USE_CADDY" == "true" ]]; then + sleep 2 + if curl -sfk "https://localhost" > /dev/null 2>&1; then + ok "Caddy proxy healthy" + else + warn "Caddy proxy not responding. Check: docker compose logs caddy" + fi + fi + + # LLM warning for non-Ollama modes + if [[ "$USES_OLLAMA" == "false" ]]; then + local llm_url="" + if env_has_key "$SERVER_ENV" "LLM_URL"; then + llm_url=$(env_get "$SERVER_ENV" "LLM_URL") + fi + if [[ -z "$llm_url" ]]; then + echo "" + warn "LLM is not configured. Transcription will work, but:" + warn " - Summaries will NOT be generated" + warn " - Topics will NOT be detected" + warn " - Titles will NOT be auto-generated" + warn "Configure in server/.env: LLM_URL, LLM_API_KEY, LLM_MODEL" + fi + fi +} + +# ========================================================= +# Main +# ========================================================= +main() { + echo "" + echo "==========================================" + echo " Reflector — Self-Hosted Production Setup" + echo "==========================================" + echo "" + echo " Models: $MODEL_MODE" + echo " LLM: ${OLLAMA_MODE:-external}" + echo " Garage: $USE_GARAGE" + echo " Caddy: $USE_CADDY" + [[ -n "$CUSTOM_DOMAIN" ]] && echo " Domain: $CUSTOM_DOMAIN" + [[ "$BUILD_IMAGES" == "true" ]] && echo " Build: from source" + echo "" + + # Detect primary IP + PRIMARY_IP="" + if [[ "$OS" == "Linux" ]]; then + PRIMARY_IP=$(hostname -I 2>/dev/null | awk '{print $1}' || true) + if [[ "$PRIMARY_IP" == "127."* ]] || [[ -z "$PRIMARY_IP" ]]; then + PRIMARY_IP=$(ip -4 route get 1 2>/dev/null | sed -n 's/.*src \([0-9.]*\).*/\1/p' || true) + fi + fi + + # Touch env files so compose doesn't complain about missing env_file + mkdir -p "$ROOT_DIR/data" + touch "$SERVER_ENV" "$WWW_ENV" + + # Ensure garage.toml exists if garage profile is active (compose needs it for volume mount) + if [[ "$USE_GARAGE" == "true" ]]; then + local garage_runtime="$ROOT_DIR/data/garage.toml" + if [[ ! -f "$garage_runtime" ]]; then + local rpc_secret + rpc_secret=$(openssl rand -hex 32) + sed "s|__GARAGE_RPC_SECRET__|${rpc_secret}|" "$ROOT_DIR/scripts/garage.toml" > "$garage_runtime" + fi + fi + + step_prerequisites + echo "" + step_secrets + echo "" + step_server_env + echo "" + step_www_env + echo "" + step_storage + echo "" + step_caddyfile + echo "" + step_services + echo "" + step_health + + echo "" + echo "==========================================" + echo -e " ${GREEN}Reflector is running!${NC}" + echo "==========================================" + echo "" + if [[ "$USE_CADDY" == "true" ]]; then + if [[ -n "$CUSTOM_DOMAIN" ]]; then + echo " App: https://$CUSTOM_DOMAIN" + echo " API: https://$CUSTOM_DOMAIN/v1/" + elif [[ -n "$PRIMARY_IP" ]]; then + echo " App: https://$PRIMARY_IP (accept self-signed cert in browser)" + echo " API: https://$PRIMARY_IP/v1/" + echo " Local: https://localhost" + else + echo " App: https://localhost (accept self-signed cert in browser)" + echo " API: https://localhost/v1/" + fi + else + echo " No Caddy — point your reverse proxy at:" + echo " Frontend: web:3000 (or localhost:3000 from host)" + echo " API: server:1250 (or localhost:1250 from host)" + fi + echo "" + echo " Models: $MODEL_MODE (transcription/diarization/translation)" + [[ "$USE_GARAGE" == "true" ]] && echo " Storage: Garage (local S3)" + [[ "$USE_GARAGE" != "true" ]] && echo " Storage: External S3" + [[ "$USES_OLLAMA" == "true" ]] && echo " LLM: Ollama ($OLLAMA_MODEL) for summarization/topics" + [[ "$USES_OLLAMA" != "true" ]] && echo " LLM: External (configure in server/.env)" + echo "" + echo " To stop: docker compose -f docker-compose.selfhosted.yml down" + echo " To re-run: ./scripts/setup-selfhosted.sh $*" + echo "" +} + +main "$@" diff --git a/server/.env.selfhosted.example b/server/.env.selfhosted.example new file mode 100644 index 00000000..dd9ef7d4 --- /dev/null +++ b/server/.env.selfhosted.example @@ -0,0 +1,112 @@ +# ======================================================= +# Reflector Self-Hosted Production — Backend Configuration +# Generated by: ./scripts/setup-selfhosted.sh +# Reference: server/reflector/settings.py +# ======================================================= + +# ======================================================= +# Database & Infrastructure +# Pre-filled for Docker internal networking (docker-compose.selfhosted.yml) +# ======================================================= +DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector +REDIS_HOST=redis +REDIS_PORT=6379 +CELERY_BROKER_URL=redis://redis:6379/1 +CELERY_RESULT_BACKEND=redis://redis:6379/1 + +# Secret key — auto-generated by setup script +# Generate manually with: openssl rand -hex 32 +SECRET_KEY=changeme-generate-a-secure-random-string + +# ======================================================= +# Authentication +# Disabled by default. Enable Authentik for multi-user access. +# See docsv2/selfhosted-production.md for setup instructions. +# ======================================================= +AUTH_BACKEND=none +# AUTH_BACKEND=jwt +# AUTH_JWT_AUDIENCE= + +# ======================================================= +# Specialized Models (Transcription, Diarization, Translation) +# These run in the gpu/cpu container — NOT an LLM. +# The "modal" backend means "HTTP API client" — it talks to +# the self-hosted container, not Modal.com cloud. +# ======================================================= +TRANSCRIPT_BACKEND=modal +TRANSCRIPT_URL=http://transcription:8000 +TRANSCRIPT_MODAL_API_KEY=selfhosted + +DIARIZATION_ENABLED=true +DIARIZATION_BACKEND=modal +DIARIZATION_URL=http://transcription:8000 + +TRANSLATION_BACKEND=modal +TRANSLATE_URL=http://transcription:8000 + +# HuggingFace token — optional, for gated models (e.g. pyannote). +# Falls back to public S3 model bundle if not set. +# HF_TOKEN=hf_xxxxx + +# ======================================================= +# LLM for Summarization & Topic Detection +# Only summaries and topics use an LLM. Everything else +# (transcription, diarization, translation) uses specialized models above. +# +# Supports any OpenAI-compatible endpoint. +# Auto-configured by setup script if using --ollama-gpu or --ollama-cpu. +# For --gpu or --cpu modes, you MUST configure an external LLM. +# ======================================================= + +# --- Option A: External OpenAI-compatible API --- +# LLM_URL=https://api.openai.com/v1 +# LLM_API_KEY=sk-your-api-key +# LLM_MODEL=gpt-4o-mini + +# --- Option B: Local Ollama (auto-set by --ollama-gpu/--ollama-cpu) --- +# LLM_URL=http://ollama:11434/v1 +# LLM_API_KEY=not-needed +# LLM_MODEL=llama3.1 + +LLM_CONTEXT_WINDOW=16000 + +# ======================================================= +# S3 Storage (REQUIRED) +# Where to store audio files and transcripts. +# +# Option A: Use --garage flag (auto-configured by setup script) +# Option B: Any S3-compatible endpoint (AWS, MinIO, etc.) +# Set TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL for non-AWS endpoints. +# ======================================================= +TRANSCRIPT_STORAGE_BACKEND=aws +TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID= +TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY= +TRANSCRIPT_STORAGE_AWS_BUCKET_NAME=reflector-media +TRANSCRIPT_STORAGE_AWS_REGION=us-east-1 + +# For non-AWS S3-compatible endpoints (Garage, MinIO, etc.): +# TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL=http://garage:3900 + +# ======================================================= +# Daily.co Live Rooms (Optional) +# Enable real-time meeting rooms with Daily.co integration. +# Requires a Daily.co account: https://www.daily.co/ +# ======================================================= +# DEFAULT_VIDEO_PLATFORM=daily +# DAILY_API_KEY=your-daily-api-key +# DAILY_SUBDOMAIN=your-subdomain +# DAILY_WEBHOOK_SECRET=your-daily-webhook-secret +# DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco +# DAILYCO_STORAGE_AWS_REGION=us-east-1 +# DAILYCO_STORAGE_AWS_ROLE_ARN=arn:aws:iam::role/DailyCoAccess + +# ======================================================= +# Feature Flags +# ======================================================= +PUBLIC_MODE=true +# FEATURE_ROOMS=true + +# ======================================================= +# Sentry (Optional) +# ======================================================= +# SENTRY_DSN= diff --git a/server/reflector/asynctask.py b/server/reflector/asynctask.py index 36a5fd1d..2f5e95bd 100644 --- a/server/reflector/asynctask.py +++ b/server/reflector/asynctask.py @@ -4,8 +4,9 @@ from uuid import uuid4 from celery import current_task -from reflector.db import get_database +from reflector.db import _database_context, get_database from reflector.llm import llm_session_id +from reflector.ws_manager import reset_ws_manager def asynctask(f): @@ -20,6 +21,14 @@ def asynctask(f): return await f(*args, **kwargs) finally: await database.disconnect() + _database_context.set(None) + + if current_task: + # Reset cached connections before each Celery task. + # Each asyncio.run() creates a new event loop, making connections + # from previous tasks stale ("Future attached to a different loop"). + _database_context.set(None) + reset_ws_manager() coro = run_with_db() if current_task: diff --git a/www/.env.selfhosted.example b/www/.env.selfhosted.example new file mode 100644 index 00000000..fecf3072 --- /dev/null +++ b/www/.env.selfhosted.example @@ -0,0 +1,49 @@ +# ======================================================= +# Reflector Self-Hosted Production — Frontend Configuration +# Generated by: ./scripts/setup-selfhosted.sh +# ======================================================= + +# Site URL — set to your domain or server IP +# The setup script auto-detects this on Linux. +SITE_URL=https://localhost +NEXTAUTH_URL=https://localhost +NEXTAUTH_SECRET=changeme-generate-a-secure-random-string + +# API URLs +# Public-facing (what the browser uses): +API_URL=https://localhost +WEBSOCKET_URL=auto + +# Internal Docker network (server-side rendering): +SERVER_API_URL=http://server:1250 +KV_URL=redis://redis:6379 + +# Authentication +# Set to true when Authentik is configured +FEATURE_REQUIRE_LOGIN=false + +# Nullify auth vars when not using Authentik +AUTHENTIK_ISSUER= +AUTHENTIK_REFRESH_TOKEN_URL= + +# ======================================================= +# Authentik OAuth/OIDC (Optional) +# Uncomment and configure when enabling authentication. +# See docsv2/selfhosted-production.md for setup instructions. +# ======================================================= +# FEATURE_REQUIRE_LOGIN=true +# AUTHENTIK_ISSUER=https://authentik.example.com/application/o/reflector +# AUTHENTIK_REFRESH_TOKEN_URL=https://authentik.example.com/application/o/token/ +# AUTHENTIK_CLIENT_ID=your-client-id +# AUTHENTIK_CLIENT_SECRET=your-client-secret + +# ======================================================= +# Feature Flags +# ======================================================= +# FEATURE_ROOMS=true +# FEATURE_BROWSE=true + +# ======================================================= +# Sentry (Optional) +# ======================================================= +# SENTRY_DSN= diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 00000000..fb57ccd1 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,4 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + +