test: full integration tests (#916)

* test: full integration tests * fix: add env vars as secrets in CI
2026-05-06 03:15:17 +00:00 · 2026-03-18 15:29:21 -05:00
parent a9200d35bf
commit 9a2f973a2e
16 changed files with 1098 additions and 3 deletions
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -0,0 +1,139 @@
+name: Integration Tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      llm_model:
+        description: "LLM model name (overrides LLM_MODEL secret)"
+        required: false
+        default: ""
+        type: string
+
+jobs:
+  integration:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Start infrastructure services
+        working-directory: server/tests
+        env:
+          LLM_URL: ${{ secrets.LLM_URL }}
+          LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          docker compose -f docker-compose.integration.yml up -d --build postgres redis garage hatchet mock-daily
+
+      - name: Set up Garage bucket and keys
+        working-directory: server/tests
+        run: |
+          GARAGE="docker compose -f docker-compose.integration.yml exec -T garage /garage"
+          GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
+          GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
+
+          echo "Waiting for Garage to be healthy..."
+          for i in $(seq 1 60); do
+            if $GARAGE stats &>/dev/null; then break; fi
+            sleep 2
+          done
+
+          echo "Setting up Garage..."
+          NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
+          LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
+          if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
+            $GARAGE layout assign "$NODE_ID" -c 1G -z dc1
+            $GARAGE layout apply --version 1
+          fi
+
+          $GARAGE bucket info reflector-media &>/dev/null || $GARAGE bucket create reflector-media
+          if ! $GARAGE key info reflector-test &>/dev/null; then
+            $GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
+            $GARAGE key rename "$GARAGE_KEY_ID" reflector-test
+          fi
+          $GARAGE bucket allow reflector-media --read --write --key reflector-test
+
+      - name: Wait for Hatchet and generate API token
+        working-directory: server/tests
+        run: |
+          echo "Waiting for Hatchet to be healthy..."
+          for i in $(seq 1 90); do
+            if docker compose -f docker-compose.integration.yml exec -T hatchet curl -sf http://localhost:8888/api/live &>/dev/null; then
+              echo "Hatchet is ready."
+              break
+            fi
+            sleep 2
+          done
+
+          echo "Generating Hatchet API token..."
+          HATCHET_OUTPUT=$(docker compose -f docker-compose.integration.yml exec -T hatchet \
+            /hatchet-admin token create --config /config --name integration-test 2>&1)
+          HATCHET_TOKEN=$(echo "$HATCHET_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
+          if [ -z "$HATCHET_TOKEN" ]; then
+            echo "ERROR: Failed to extract Hatchet JWT token"
+            exit 1
+          fi
+          echo "HATCHET_CLIENT_TOKEN=${HATCHET_TOKEN}" >> $GITHUB_ENV
+
+      - name: Start backend services
+        working-directory: server/tests
+        env:
+          LLM_URL: ${{ secrets.LLM_URL }}
+          LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          # Export garage and hatchet credentials for backend services
+          export GARAGE_KEY_ID="${{ env.GARAGE_KEY_ID }}"
+          export GARAGE_KEY_SECRET="${{ env.GARAGE_KEY_SECRET }}"
+          export HATCHET_CLIENT_TOKEN="${{ env.HATCHET_CLIENT_TOKEN }}"
+
+          docker compose -f docker-compose.integration.yml up -d \
+            server worker hatchet-worker-cpu hatchet-worker-llm test-runner
+
+      - name: Wait for server health check
+        working-directory: server/tests
+        run: |
+          echo "Waiting for server to be healthy..."
+          for i in $(seq 1 60); do
+            if docker compose -f docker-compose.integration.yml exec -T test-runner \
+              curl -sf http://server:1250/health &>/dev/null; then
+              echo "Server is ready."
+              break
+            fi
+            sleep 3
+          done
+
+      - name: Run DB migrations
+        working-directory: server/tests
+        run: |
+          docker compose -f docker-compose.integration.yml exec -T server \
+            uv run alembic upgrade head
+
+      - name: Run integration tests
+        working-directory: server/tests
+        run: |
+          docker compose -f docker-compose.integration.yml exec -T test-runner \
+            uv run pytest tests/integration/ -v -x
+
+      - name: Collect logs on failure
+        if: failure()
+        working-directory: server/tests
+        run: |
+          docker compose -f docker-compose.integration.yml logs --tail=500 > integration-logs.txt 2>&1
+
+      - name: Upload logs artifact
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: integration-logs
+          path: server/tests/integration-logs.txt
+          retention-days: 7
+
+      - name: Teardown
+        if: always()
+        working-directory: server/tests
+        run: |
+          docker compose -f docker-compose.integration.yml down -v --remove-orphans
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ www/.env.production
 opencode.json

 vibedocs/
+server/tests/integration/logs/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -160,6 +160,21 @@ All endpoints prefixed `/v1/`:
 - **Frontend**: No current test suite - opportunities for Jest/React Testing Library
 - **Coverage**: Backend maintains test coverage reports in `htmlcov/`

+### Integration Tests (DO NOT run unless explicitly asked)
+
+There are end-to-end integration tests in `server/tests/integration/` that spin up the full stack (PostgreSQL, Redis, Hatchet, Garage, mock-daily, server, workers) via Docker Compose and exercise real processing pipelines. These tests are:
+
+- `test_file_pipeline.py` — File upload → FilePipeline
+- `test_live_pipeline.py` — WebRTC stream → LivePostPipeline
+- `test_multitrack_pipeline.py` — Multitrack → DailyMultitrackPipeline
+
+**Important:**
+- These tests are **excluded** from normal `uv run pytest` runs via `--ignore=tests/integration` in pyproject.toml.
+- Do **NOT** run them as part of verification, code review, or general testing unless the user explicitly asks.
+- They require Docker, external LLM credentials, and HuggingFace token — they cannot run in a regular test environment.
+- To run locally: `./scripts/run-integration-tests.sh` (requires env vars: `LLM_URL`, `LLM_API_KEY`, `HF_TOKEN`).
+- In CI: triggered manually via the "Integration Tests" GitHub Actions workflow (`workflow_dispatch`).
+
 ## GPU Processing

 Modal.com integration for scalable ML processing:
--- a/scripts/run-integration-tests.sh
+++ b/scripts/run-integration-tests.sh
@@ -0,0 +1,156 @@
+#!/usr/bin/env bash
+#
+# Run integration tests locally.
+#
+# Spins up the full stack via Docker Compose, runs the three integration tests,
+# and tears everything down afterward.
+#
+# Required environment variables:
+#   LLM_URL      — OpenAI-compatible LLM endpoint (e.g. https://api.openai.com/v1)
+#   LLM_API_KEY  — API key for the LLM endpoint
+#   HF_TOKEN     — HuggingFace token for pyannote gated models
+#
+# Optional:
+#   LLM_MODEL    — Model name (default: qwen2.5:14b)
+#
+# Usage:
+#   export LLM_URL="https://api.openai.com/v1"
+#   export LLM_API_KEY="sk-..."
+#   export HF_TOKEN="hf_..."
+#   ./scripts/run-integration-tests.sh
+#
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+COMPOSE_DIR="$REPO_ROOT/server/tests"
+COMPOSE_FILE="$COMPOSE_DIR/docker-compose.integration.yml"
+COMPOSE="docker compose -f $COMPOSE_FILE"
+
+# ── Validate required env vars ──────────────────────────────────────────────
+for var in LLM_URL LLM_API_KEY HF_TOKEN; do
+    if [[ -z "${!var:-}" ]]; then
+        echo "ERROR: $var is not set. See script header for required env vars."
+        exit 1
+    fi
+done
+
+export LLM_MODEL="${LLM_MODEL:-qwen2.5:14b}"
+
+# ── Helpers ─────────────────────────────────────────────────────────────────
+info()  { echo -e "\n\033[1;34m▸ $*\033[0m"; }
+ok()    { echo -e "\033[1;32m  ✓ $*\033[0m"; }
+fail()  { echo -e "\033[1;31m  ✗ $*\033[0m"; }
+
+wait_for() {
+    local desc="$1" cmd="$2" max="${3:-60}"
+    info "Waiting for $desc (up to ${max}s)..."
+    for i in $(seq 1 "$max"); do
+        if eval "$cmd" &>/dev/null; then
+            ok "$desc is ready"
+            return 0
+        fi
+        sleep 2
+    done
+    fail "$desc did not become ready within ${max}s"
+    return 1
+}
+
+cleanup() {
+    info "Tearing down..."
+    $COMPOSE down -v --remove-orphans 2>/dev/null || true
+}
+
+# Always tear down on exit
+trap cleanup EXIT
+
+# ── Step 1: Build and start infrastructure ──────────────────────────────────
+info "Building and starting infrastructure services..."
+$COMPOSE up -d --build postgres redis garage hatchet mock-daily
+
+# ── Step 2: Set up Garage (S3 bucket + keys) ───────────────────────────────
+wait_for "Garage" "$COMPOSE exec -T garage /garage stats" 60
+
+info "Setting up Garage bucket and keys..."
+GARAGE="$COMPOSE exec -T garage /garage"
+
+# Hardcoded test credentials — ephemeral containers, destroyed after tests
+export GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
+export GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
+
+# Layout
+NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
+LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
+if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
+    $GARAGE layout assign "$NODE_ID" -c 1G -z dc1
+    $GARAGE layout apply --version 1
+fi
+
+# Bucket
+$GARAGE bucket info reflector-media >/dev/null 2>&1 || $GARAGE bucket create reflector-media
+
+# Import key with known credentials
+if ! $GARAGE key info reflector-test >/dev/null 2>&1; then
+    $GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
+    $GARAGE key rename "$GARAGE_KEY_ID" reflector-test
+fi
+
+# Permissions
+$GARAGE bucket allow reflector-media --read --write --key reflector-test
+
+ok "Garage ready with hardcoded test credentials"
+
+# ── Step 3: Generate Hatchet API token ──────────────────────────────────────
+wait_for "Hatchet" "$COMPOSE exec -T hatchet curl -sf http://localhost:8888/api/live" 90
+
+info "Generating Hatchet API token..."
+HATCHET_TOKEN_OUTPUT=$($COMPOSE exec -T hatchet /hatchet-admin token create --config /config --name local-test 2>&1)
+export HATCHET_CLIENT_TOKEN=$(echo "$HATCHET_TOKEN_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
+
+if [[ -z "$HATCHET_CLIENT_TOKEN" ]]; then
+    fail "Failed to extract Hatchet token (JWT not found in output)"
+    echo "  Output was: $HATCHET_TOKEN_OUTPUT"
+    exit 1
+fi
+ok "Hatchet token generated"
+
+# ── Step 4: Start backend services ──────────────────────────────────────────
+info "Starting backend services..."
+$COMPOSE up -d server worker hatchet-worker-cpu hatchet-worker-llm test-runner
+
+# ── Step 5: Wait for server + run migrations ────────────────────────────────
+wait_for "Server" "$COMPOSE exec -T test-runner curl -sf http://server:1250/health" 60
+
+info "Running database migrations..."
+$COMPOSE exec -T server uv run alembic upgrade head
+ok "Migrations applied"
+
+# ── Step 6: Run integration tests ───────────────────────────────────────────
+info "Running integration tests..."
+echo ""
+
+LOGS_DIR="$COMPOSE_DIR/integration/logs"
+mkdir -p "$LOGS_DIR"
+RUN_TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+TEST_LOG="$LOGS_DIR/$RUN_TIMESTAMP.txt"
+
+if $COMPOSE exec -T test-runner uv run pytest tests/integration/ -v -x 2>&1 | tee "$TEST_LOG.pytest"; then
+    echo ""
+    ok "All integration tests passed!"
+    EXIT_CODE=0
+else
+    echo ""
+    fail "Integration tests failed!"
+    EXIT_CODE=1
+fi
+
+# Always collect service logs + test output into a single file
+info "Collecting logs..."
+$COMPOSE logs --tail=500 > "$TEST_LOG" 2>&1
+echo -e "\n\n=== PYTEST OUTPUT ===\n" >> "$TEST_LOG"
+cat "$TEST_LOG.pytest" >> "$TEST_LOG" 2>/dev/null
+rm -f "$TEST_LOG.pytest"
+echo "  Logs saved to: server/tests/integration/logs/$RUN_TIMESTAMP.txt"
+
+# cleanup runs via trap
+exit $EXIT_CODE
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -119,7 +119,7 @@ AUTH_BACKEND = "jwt"
 HATCHET_CLIENT_TOKEN = "test-dummy-token"

 [tool.pytest.ini_options]
-addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
+addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v --ignore=tests/integration"
 testpaths = ["tests"]
 asyncio_mode = "auto"
 markers = [
--- a/server/reflector/hatchet/workflows/daily_multitrack_pipeline.py
+++ b/server/reflector/hatchet/workflows/daily_multitrack_pipeline.py
@@ -307,7 +307,9 @@ async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
    ctx.log(
        f"get_recording: calling Daily.co API for recording_id={input.recording_id}..."
    )
-    async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
+    async with DailyApiClient(
+        api_key=settings.DAILY_API_KEY, base_url=settings.DAILY_API_URL
+    ) as client:
        recording = await client.get_recording(input.recording_id)
    ctx.log(f"get_recording: Daily.co API returned successfully")

@@ -374,7 +376,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
            settings.DAILY_API_KEY, "DAILY_API_KEY is required"
        )

-        async with DailyApiClient(api_key=daily_api_key) as client:
+        async with DailyApiClient(
+            api_key=daily_api_key, base_url=settings.DAILY_API_URL
+        ) as client:
            participants = await client.get_meeting_participants(mtg_session_id)

        id_to_name = {}
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -180,6 +180,7 @@ class Settings(BaseSettings):
    )

    # Daily.co integration
+    DAILY_API_URL: str = "https://api.daily.co/v1"
    DAILY_API_KEY: str | None = None
    DAILY_WEBHOOK_SECRET: str | None = None
    DAILY_SUBDOMAIN: str | None = None
--- a/server/tests/docker-compose.integration.yml
+++ b/server/tests/docker-compose.integration.yml
@@ -0,0 +1,218 @@
+# Integration test stack — full pipeline end-to-end.
+#
+# Usage:
+#   docker compose -f server/tests/docker-compose.integration.yml up -d --build
+#
+# Requires .env.integration in the repo root (generated by CI workflow).
+
+x-backend-env: &backend-env
+  DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
+  REDIS_HOST: redis
+  CELERY_BROKER_URL: redis://redis:6379/1
+  CELERY_RESULT_BACKEND: redis://redis:6379/1
+  HATCHET_CLIENT_TOKEN: ${HATCHET_CLIENT_TOKEN:-}
+  HATCHET_CLIENT_SERVER_URL: http://hatchet:8888
+  HATCHET_CLIENT_HOST_PORT: hatchet:7077
+  HATCHET_CLIENT_TLS_STRATEGY: none
+  # ML backends — CPU-only, no external services
+  TRANSCRIPT_BACKEND: whisper
+  WHISPER_CHUNK_MODEL: tiny
+  WHISPER_FILE_MODEL: tiny
+  DIARIZATION_BACKEND: pyannote
+  TRANSLATION_BACKEND: passthrough
+  # Storage — local Garage S3
+  TRANSCRIPT_STORAGE_BACKEND: aws
+  TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL: http://garage:3900
+  TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: reflector-media
+  TRANSCRIPT_STORAGE_AWS_REGION: garage
+  # Daily mock
+  DAILY_API_URL: http://mock-daily:8080/v1
+  DAILY_API_KEY: fake-daily-key
+  # Auth
+  PUBLIC_MODE: "true"
+  AUTH_BACKEND: none
+  # LLM (injected from CI)
+  LLM_URL: ${LLM_URL:-}
+  LLM_API_KEY: ${LLM_API_KEY:-}
+  LLM_MODEL: ${LLM_MODEL:-gpt-4o-mini}
+  # HuggingFace (for pyannote gated models)
+  HF_TOKEN: ${HF_TOKEN:-}
+  # Garage S3 credentials — hardcoded test keys, containers are ephemeral
+  TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: GK0123456789abcdef01234567 # gitleaks:allow
+  TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
+  # NOTE: DAILYCO_STORAGE_AWS_* intentionally NOT set — forces fallback to
+  # get_transcripts_storage() which has ENDPOINT_URL pointing at Garage.
+  # Setting them would bypass the endpoint and generate presigned URLs for AWS.
+
+services:
+  postgres:
+    image: postgres:17-alpine
+    command: ["postgres", "-c", "max_connections=200"]
+    environment:
+      POSTGRES_USER: reflector
+      POSTGRES_PASSWORD: reflector
+      POSTGRES_DB: reflector
+    volumes:
+      - ../../server/docker/init-hatchet-db.sql:/docker-entrypoint-initdb.d/init-hatchet-db.sql:ro
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U reflector"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+
+  redis:
+    image: redis:7.2-alpine
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  hatchet:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
+    depends_on:
+      postgres:
+        condition: service_healthy
+    environment:
+      DATABASE_URL: "postgresql://reflector:reflector@postgres:5432/hatchet?sslmode=disable&connect_timeout=30"
+      SERVER_AUTH_COOKIE_INSECURE: "t"
+      SERVER_AUTH_COOKIE_DOMAIN: "localhost"
+      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
+      SERVER_GRPC_INSECURE: "t"
+      SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
+      SERVER_GRPC_PORT: "7077"
+      SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
+      SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 15
+      start_period: 30s
+
+  garage:
+    image: dxflrs/garage:v1.1.0
+    volumes:
+      - ../../data/garage.toml:/etc/garage.toml:ro
+    healthcheck:
+      test: ["CMD", "/garage", "stats"]
+      interval: 5s
+      timeout: 3s
+      retries: 10
+      start_period: 5s
+
+  mock-daily:
+    build:
+      context: .
+      dockerfile: integration/Dockerfile.mock-daily
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/recordings/test')"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  server:
+    build:
+      context: ../../server
+      dockerfile: Dockerfile
+    environment:
+      <<: *backend-env
+      ENTRYPOINT: server
+      WEBRTC_HOST: server
+      WEBRTC_PORT_RANGE: "52000-52100"
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+      hatchet:
+        condition: service_healthy
+      garage:
+        condition: service_healthy
+      mock-daily:
+        condition: service_healthy
+    volumes:
+      - server_data:/app/data
+
+  worker:
+    build:
+      context: ../../server
+      dockerfile: Dockerfile
+    environment:
+      <<: *backend-env
+      ENTRYPOINT: worker
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    volumes:
+      - server_data:/app/data
+
+  hatchet-worker-cpu:
+    build:
+      context: ../../server
+      dockerfile: Dockerfile
+    environment:
+      <<: *backend-env
+      ENTRYPOINT: hatchet-worker-cpu
+    depends_on:
+      hatchet:
+        condition: service_healthy
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    volumes:
+      - server_data:/app/data
+
+  hatchet-worker-llm:
+    build:
+      context: ../../server
+      dockerfile: Dockerfile
+    environment:
+      <<: *backend-env
+      ENTRYPOINT: hatchet-worker-llm
+    depends_on:
+      hatchet:
+        condition: service_healthy
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    volumes:
+      - server_data:/app/data
+
+  test-runner:
+    build:
+      context: ../../server
+      dockerfile: Dockerfile
+    environment:
+      <<: *backend-env
+      # Override DATABASE_URL for sync driver (used by direct DB access in tests)
+      DATABASE_URL_ASYNC: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
+      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
+      SERVER_URL: http://server:1250
+      GARAGE_ENDPOINT: http://garage:3900
+    depends_on:
+      server:
+        condition: service_started
+      worker:
+        condition: service_started
+      hatchet-worker-cpu:
+        condition: service_started
+      hatchet-worker-llm:
+        condition: service_started
+    volumes:
+      - server_data:/app/data
+      # Mount test files into the container
+      - ./records:/app/tests/records:ro
+      - ./integration:/app/tests/integration:ro
+    entrypoint: ["sleep", "infinity"]
+
+volumes:
+  server_data:
+
+networks:
+  default:
+    attachable: true
--- a/server/tests/integration/Dockerfile.mock-daily
+++ b/server/tests/integration/Dockerfile.mock-daily
@@ -0,0 +1,9 @@
+FROM python:3.12-slim
+
+RUN pip install --no-cache-dir fastapi uvicorn[standard]
+
+WORKDIR /app
+COPY integration/mock_daily_server.py /app/mock_daily_server.py
+
+EXPOSE 8080
+CMD ["uvicorn", "mock_daily_server:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/server/tests/integration/init.py
+++ b/server/tests/integration/init.py
--- a/server/tests/integration/conftest.py
+++ b/server/tests/integration/conftest.py
@@ -0,0 +1,116 @@
+"""
+Integration test fixtures — no mocks, real services.
+
+All services (PostgreSQL, Redis, Hatchet, Garage, server, workers) are
+expected to be running via docker-compose.integration.yml.
+"""
+
+import asyncio
+import os
+from pathlib import Path
+
+import boto3
+import httpx
+import pytest
+import pytest_asyncio
+from sqlalchemy.ext.asyncio import create_async_engine
+
+SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
+GARAGE_ENDPOINT = os.environ.get("GARAGE_ENDPOINT", "http://garage:3900")
+DATABASE_URL = os.environ.get(
+    "DATABASE_URL_ASYNC",
+    os.environ.get(
+        "DATABASE_URL",
+        "postgresql+asyncpg://reflector:reflector@postgres:5432/reflector",
+    ),
+)
+GARAGE_KEY_ID = os.environ.get("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID", "")
+GARAGE_KEY_SECRET = os.environ.get("TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY", "")
+BUCKET_NAME = "reflector-media"
+
+
+@pytest_asyncio.fixture
+async def api_client():
+    """HTTP client pointed at the running server."""
+    async with httpx.AsyncClient(
+        base_url=f"{SERVER_URL}/v1",
+        timeout=httpx.Timeout(30.0),
+    ) as client:
+        yield client
+
+
+@pytest.fixture(scope="session")
+def s3_client():
+    """Boto3 S3 client pointed at Garage."""
+    return boto3.client(
+        "s3",
+        endpoint_url=GARAGE_ENDPOINT,
+        aws_access_key_id=GARAGE_KEY_ID,
+        aws_secret_access_key=GARAGE_KEY_SECRET,
+        region_name="garage",
+    )
+
+
+@pytest_asyncio.fixture
+async def db_engine():
+    """SQLAlchemy async engine for direct DB operations."""
+    engine = create_async_engine(DATABASE_URL)
+    yield engine
+    await engine.dispose()
+
+
+@pytest.fixture(scope="session")
+def test_records_dir():
+    """Path to the test audio files directory."""
+    return Path(__file__).parent.parent / "records"
+
+
+@pytest.fixture(scope="session")
+def bucket_name():
+    """S3 bucket name used for integration tests."""
+    return BUCKET_NAME
+
+
+async def _poll_transcript_status(
+    client: httpx.AsyncClient,
+    transcript_id: str,
+    target: str | tuple[str, ...],
+    error: str = "error",
+    max_wait: int = 300,
+    interval: int = 3,
+) -> dict:
+    """
+    Poll GET /transcripts/{id} until status matches target or error.
+
+    target can be a single status string or a tuple of acceptable statuses.
+    Returns the transcript dict on success, raises on timeout or error status.
+    """
+    targets = (target,) if isinstance(target, str) else target
+    elapsed = 0
+    status = None
+    while elapsed < max_wait:
+        resp = await client.get(f"/transcripts/{transcript_id}")
+        resp.raise_for_status()
+        data = resp.json()
+        status = data.get("status")
+
+        if status in targets:
+            return data
+        if status == error:
+            raise AssertionError(
+                f"Transcript {transcript_id} reached error status: {data}"
+            )
+
+        await asyncio.sleep(interval)
+        elapsed += interval
+
+    raise TimeoutError(
+        f"Transcript {transcript_id} did not reach status '{target}' "
+        f"within {max_wait}s (last status: {status})"
+    )
+
+
+@pytest_asyncio.fixture
+def poll_transcript_status():
+    """Returns the poll_transcript_status async helper function."""
+    return _poll_transcript_status
--- a/server/tests/integration/garage_setup.sh
+++ b/server/tests/integration/garage_setup.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+#
+# Initialize Garage bucket and keys for integration tests.
+# Run inside the Garage container after it's healthy.
+#
+# Outputs KEY_ID and KEY_SECRET to stdout (last two lines).
+#
+# Note: uses /bin/sh (not bash) since the Garage container is minimal.
+#
+set -eu
+
+echo "Waiting for Garage to be ready..."
+i=0
+while [ "$i" -lt 30 ]; do
+    if /garage stats >/dev/null 2>&1; then
+        break
+    fi
+    sleep 1
+    i=$((i + 1))
+done
+
+# Layout setup
+NODE_ID=$(/garage node id -q | tr -d '[:space:]')
+LAYOUT_STATUS=$(/garage layout show 2>&1 || true)
+if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
+    /garage layout assign "$NODE_ID" -c 1G -z dc1
+    /garage layout apply --version 1
+    echo "Layout applied."
+else
+    echo "Layout already configured."
+fi
+
+# Bucket
+if ! /garage bucket info reflector-media >/dev/null 2>&1; then
+    /garage bucket create reflector-media
+    echo "Bucket 'reflector-media' created."
+else
+    echo "Bucket 'reflector-media' already exists."
+fi
+
+# Key
+if /garage key info reflector-test >/dev/null 2>&1; then
+    echo "Key 'reflector-test' already exists."
+    KEY_OUTPUT=$(/garage key info reflector-test 2>&1)
+else
+    KEY_OUTPUT=$(/garage key create reflector-test 2>&1)
+    echo "Key 'reflector-test' created."
+fi
+
+# Permissions
+/garage bucket allow reflector-media --read --write --key reflector-test
+
+# Extract key ID and secret from output using POSIX-compatible parsing
+# garage key output format:
+#   Key name: reflector-test
+#   Key ID: GK...
+#   Secret key: ...
+KEY_ID=$(echo "$KEY_OUTPUT" | grep "Key ID" | sed 's/.*Key ID: *//')
+KEY_SECRET=$(echo "$KEY_OUTPUT" | grep "Secret key" | sed 's/.*Secret key: *//')
+
+echo "GARAGE_KEY_ID=${KEY_ID}"
+echo "GARAGE_KEY_SECRET=${KEY_SECRET}"
--- a/server/tests/integration/mock_daily_server.py
+++ b/server/tests/integration/mock_daily_server.py
@@ -0,0 +1,75 @@
+"""
+Minimal FastAPI mock for Daily.co API.
+
+Serves canned responses for:
+- GET /v1/recordings/{recording_id}
+- GET /v1/meetings/{meeting_id}/participants
+"""
+
+from fastapi import FastAPI
+
+app = FastAPI(title="Mock Daily API")
+
+
+# Participant UUIDs must be 36-char hex UUIDs to match Daily's filename format
+PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
+PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
+
+# Daily-format track keys: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
+TRACK_KEYS = [
+    f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
+    f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
+]
+
+
+@app.get("/v1/recordings/{recording_id}")
+async def get_recording(recording_id: str):
+    return {
+        "id": recording_id,
+        "room_name": "integration-test-room",
+        "start_ts": 1700000000,
+        "type": "raw-tracks",
+        "status": "finished",
+        "max_participants": 2,
+        "duration": 5,
+        "share_token": None,
+        "s3": {
+            "bucket_name": "reflector-media",
+            "bucket_region": "garage",
+            "key": None,
+            "endpoint": None,
+        },
+        "s3key": None,
+        "tracks": [
+            {"type": "audio", "s3Key": key, "size": 100000} for key in TRACK_KEYS
+        ],
+        "mtgSessionId": "mock-mtg-session-id",
+    }
+
+
+@app.get("/v1/meetings/{meeting_id}/participants")
+async def get_meeting_participants(meeting_id: str):
+    return {
+        "data": [
+            {
+                "user_id": "user-a",
+                "participant_id": PARTICIPANT_A_ID,
+                "user_name": "Speaker A",
+                "join_time": 1700000000,
+                "duration": 300,
+            },
+            {
+                "user_id": "user-b",
+                "participant_id": PARTICIPANT_B_ID,
+                "user_name": "Speaker B",
+                "join_time": 1700000010,
+                "duration": 290,
+            },
+        ]
+    }
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8080)
--- a/server/tests/integration/test_file_pipeline.py
+++ b/server/tests/integration/test_file_pipeline.py
@@ -0,0 +1,61 @@
+"""
+Integration test: File upload → FilePipeline → full processing.
+
+Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
+pyannote diarization → LLM summarization/topics → status "ended".
+"""
+
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_file_pipeline_end_to_end(
+    api_client, test_records_dir, poll_transcript_status
+):
+    """Upload a WAV file and verify the full pipeline completes."""
+    # 1. Create transcript
+    resp = await api_client.post(
+        "/transcripts",
+        json={"name": "integration-file-test", "source_kind": "file"},
+    )
+    assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
+    transcript = resp.json()
+    transcript_id = transcript["id"]
+
+    # 2. Upload audio file (single chunk)
+    audio_path = test_records_dir / "test_short.wav"
+    assert audio_path.exists(), f"Test audio file not found: {audio_path}"
+
+    with open(audio_path, "rb") as f:
+        resp = await api_client.post(
+            f"/transcripts/{transcript_id}/record/upload",
+            params={"chunk_number": 0, "total_chunks": 1},
+            files={"chunk": ("test_short.wav", f, "audio/wav")},
+        )
+    assert resp.status_code == 200, f"Upload failed: {resp.text}"
+
+    # 3. Poll until pipeline completes
+    data = await poll_transcript_status(
+        api_client, transcript_id, target="ended", max_wait=300
+    )
+
+    # 4. Assertions
+    assert data["status"] == "ended"
+    assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
+    assert (
+        data.get("long_summary") and len(data["long_summary"]) > 0
+    ), "Long summary should be non-empty"
+    assert (
+        data.get("short_summary") and len(data["short_summary"]) > 0
+    ), "Short summary should be non-empty"
+
+    # Topics are served from a separate endpoint
+    topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
+    assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
+    topics = topics_resp.json()
+    assert len(topics) >= 1, "Should have at least 1 topic"
+    for topic in topics:
+        assert topic.get("title"), "Each topic should have a title"
+        assert topic.get("summary"), "Each topic should have a summary"
+
+    assert data.get("duration", 0) > 0, "Duration should be positive"
--- a/server/tests/integration/test_live_pipeline.py
+++ b/server/tests/integration/test_live_pipeline.py
@@ -0,0 +1,109 @@
+"""
+Integration test: WebRTC stream → LivePostProcessingPipeline → full processing.
+
+Exercises: WebRTC SDP exchange → live audio streaming → connection close →
+Hatchet LivePostPipeline → whisper transcription → LLM summarization/topics → status "ended".
+"""
+
+import asyncio
+import json
+import os
+
+import httpx
+import pytest
+from aiortc import RTCPeerConnection, RTCSessionDescription
+from aiortc.contrib.media import MediaPlayer
+
+SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
+
+
+@pytest.mark.asyncio
+async def test_live_pipeline_end_to_end(
+    api_client, test_records_dir, poll_transcript_status
+):
+    """Stream audio via WebRTC and verify the full post-processing pipeline completes."""
+    # 1. Create transcript
+    resp = await api_client.post(
+        "/transcripts",
+        json={"name": "integration-live-test"},
+    )
+    assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
+    transcript = resp.json()
+    transcript_id = transcript["id"]
+
+    # 2. Set up WebRTC peer connection with audio from test file
+    audio_path = test_records_dir / "test_short.wav"
+    assert audio_path.exists(), f"Test audio file not found: {audio_path}"
+
+    pc = RTCPeerConnection()
+    player = MediaPlayer(audio_path.as_posix())
+
+    # Add audio track
+    audio_track = player.audio
+    pc.addTrack(audio_track)
+
+    # Create data channel (server expects this for STOP command)
+    channel = pc.createDataChannel("data-channel")
+
+    # 3. Generate SDP offer
+    offer = await pc.createOffer()
+    await pc.setLocalDescription(offer)
+
+    sdp_payload = {
+        "sdp": pc.localDescription.sdp,
+        "type": pc.localDescription.type,
+    }
+
+    # 4. Send offer to server and get answer
+    webrtc_url = f"{SERVER_URL}/v1/transcripts/{transcript_id}/record/webrtc"
+    async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+        resp = await client.post(webrtc_url, json=sdp_payload)
+    assert resp.status_code == 200, f"WebRTC offer failed: {resp.text}"
+
+    answer_data = resp.json()
+    answer = RTCSessionDescription(sdp=answer_data["sdp"], type=answer_data["type"])
+    await pc.setRemoteDescription(answer)
+
+    # 5. Wait for audio playback to finish
+    max_stream_wait = 60
+    elapsed = 0
+    while elapsed < max_stream_wait:
+        if audio_track.readyState == "ended":
+            break
+        await asyncio.sleep(0.5)
+        elapsed += 0.5
+
+    # 6. Send STOP command and close connection
+    try:
+        channel.send(json.dumps({"cmd": "STOP"}))
+        await asyncio.sleep(1)
+    except Exception:
+        pass  # Channel may not be open if track ended quickly
+
+    await pc.close()
+
+    # 7. Poll until post-processing pipeline completes
+    data = await poll_transcript_status(
+        api_client, transcript_id, target="ended", max_wait=300
+    )
+
+    # 8. Assertions
+    assert data["status"] == "ended"
+    assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
+    assert (
+        data.get("long_summary") and len(data["long_summary"]) > 0
+    ), "Long summary should be non-empty"
+    assert (
+        data.get("short_summary") and len(data["short_summary"]) > 0
+    ), "Short summary should be non-empty"
+
+    # Topics are served from a separate endpoint
+    topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
+    assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
+    topics = topics_resp.json()
+    assert len(topics) >= 1, "Should have at least 1 topic"
+    for topic in topics:
+        assert topic.get("title"), "Each topic should have a title"
+        assert topic.get("summary"), "Each topic should have a summary"
+
+    assert data.get("duration", 0) > 0, "Duration should be positive"
--- a/server/tests/integration/test_multitrack_pipeline.py
+++ b/server/tests/integration/test_multitrack_pipeline.py
@@ -0,0 +1,129 @@
+"""
+Integration test: Multitrack → DailyMultitrackPipeline → full processing.
+
+Exercises: S3 upload → DB recording setup → process endpoint →
+Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
+diarization → mixdown → LLM summarization/topics → status "ended".
+"""
+
+import json
+from datetime import datetime, timezone
+
+import pytest
+from sqlalchemy import text
+
+# Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
+# These UUIDs must match mock_daily_server.py participant IDs
+PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
+PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
+TRACK_KEYS = [
+    f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
+    f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
+]
+
+
+@pytest.mark.asyncio
+async def test_multitrack_pipeline_end_to_end(
+    api_client,
+    s3_client,
+    db_engine,
+    test_records_dir,
+    bucket_name,
+    poll_transcript_status,
+):
+    """Set up multitrack recording in S3/DB and verify the full pipeline completes."""
+    # 1. Upload test audio as two separate tracks to Garage S3
+    audio_path = test_records_dir / "test_short.wav"
+    assert audio_path.exists(), f"Test audio file not found: {audio_path}"
+
+    for track_key in TRACK_KEYS:
+        s3_client.upload_file(
+            str(audio_path),
+            bucket_name,
+            track_key,
+        )
+
+    # 2. Create transcript via API
+    resp = await api_client.post(
+        "/transcripts",
+        json={"name": "integration-multitrack-test"},
+    )
+    assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
+    transcript = resp.json()
+    transcript_id = transcript["id"]
+
+    # 3. Insert Recording row and link to transcript via direct DB access
+    recording_id = f"rec-integration-{transcript_id[:8]}"
+    now = datetime.now(timezone.utc)
+
+    async with db_engine.begin() as conn:
+        # Insert recording with track_keys
+        await conn.execute(
+            text("""
+                INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys)
+                VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json))
+            """),
+            {
+                "id": recording_id,
+                "bucket_name": bucket_name,
+                "object_key": TRACK_KEYS[0],
+                "recorded_at": now,
+                "status": "completed",
+                "track_keys": json.dumps(TRACK_KEYS),
+            },
+        )
+
+        # Link recording to transcript and set status to uploaded
+        await conn.execute(
+            text("""
+                UPDATE transcript
+                SET recording_id = :recording_id, status = 'uploaded'
+                WHERE id = :transcript_id
+            """),
+            {
+                "recording_id": recording_id,
+                "transcript_id": transcript_id,
+            },
+        )
+
+    # 4. Trigger processing via process endpoint
+    resp = await api_client.post(f"/transcripts/{transcript_id}/process")
+    assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
+
+    # 5. Poll until pipeline completes
+    # The pipeline will call mock-daily for get_recording and get_participants
+    # Accept "error" too — non-critical steps like action_items may fail due to
+    # LLM parsing flakiness while core results (transcript, summaries) still exist.
+    data = await poll_transcript_status(
+        api_client, transcript_id, target=("ended", "error"), max_wait=300
+    )
+
+    # 6. Assertions — verify core pipeline results regardless of final status
+    assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
+    assert (
+        data.get("long_summary") and len(data["long_summary"]) > 0
+    ), "Long summary should be non-empty"
+    assert (
+        data.get("short_summary") and len(data["short_summary"]) > 0
+    ), "Short summary should be non-empty"
+
+    # Topics are served from a separate endpoint
+    topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
+    assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
+    topics = topics_resp.json()
+    assert len(topics) >= 1, "Should have at least 1 topic"
+    for topic in topics:
+        assert topic.get("title"), "Each topic should have a title"
+        assert topic.get("summary"), "Each topic should have a summary"
+
+    # Participants are served from a separate endpoint
+    participants_resp = await api_client.get(
+        f"/transcripts/{transcript_id}/participants"
+    )
+    assert (
+        participants_resp.status_code == 200
+    ), f"Failed to get participants: {participants_resp.text}"
+    participants = participants_resp.json()
+    assert (
+        len(participants) >= 2
+    ), f"Expected at least 2 speakers for multitrack, got {len(participants)}"