test: full integration tests (#916)

* test: full integration tests * fix: add env vars as secrets in CI
2026-05-06 03:15:17 +00:00 · 2026-03-18 15:29:21 -05:00
parent a9200d35bf
commit 9a2f973a2e
16 changed files with 1098 additions and 3 deletions
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -0,0 +1,139 @@
 name: Integration Tests
 on:
  workflow_dispatch:
    inputs:
      llm_model:
        description: "LLM model name (overrides LLM_MODEL secret)"
        required: false
        default: ""
        type: string
 jobs:
  integration:
    runs-on: ubuntu-latest
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
      - name: Start infrastructure services
        working-directory: server/tests
        env:
          LLM_URL: ${{ secrets.LLM_URL }}
          LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          docker compose -f docker-compose.integration.yml up -d --build postgres redis garage hatchet mock-daily
      - name: Set up Garage bucket and keys
        working-directory: server/tests
        run: |
          GARAGE="docker compose -f docker-compose.integration.yml exec -T garage /garage"
          GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
          GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
          echo "Waiting for Garage to be healthy..."
          for i in $(seq 1 60); do
            if $GARAGE stats &>/dev/null; then break; fi
            sleep 2
          done
          echo "Setting up Garage..."
          NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
          LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
          if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
            $GARAGE layout assign "$NODE_ID" -c 1G -z dc1
            $GARAGE layout apply --version 1
          fi
          $GARAGE bucket info reflector-media &>/dev/null || $GARAGE bucket create reflector-media
          if ! $GARAGE key info reflector-test &>/dev/null; then
            $GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
            $GARAGE key rename "$GARAGE_KEY_ID" reflector-test
          fi
          $GARAGE bucket allow reflector-media --read --write --key reflector-test
      - name: Wait for Hatchet and generate API token
        working-directory: server/tests
        run: |
          echo "Waiting for Hatchet to be healthy..."
          for i in $(seq 1 90); do
            if docker compose -f docker-compose.integration.yml exec -T hatchet curl -sf http://localhost:8888/api/live &>/dev/null; then
              echo "Hatchet is ready."
              break
            fi
            sleep 2
          done
          echo "Generating Hatchet API token..."
          HATCHET_OUTPUT=$(docker compose -f docker-compose.integration.yml exec -T hatchet \
            /hatchet-admin token create --config /config --name integration-test 2>&1)
          HATCHET_TOKEN=$(echo "$HATCHET_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
          if [ -z "$HATCHET_TOKEN" ]; then
            echo "ERROR: Failed to extract Hatchet JWT token"
            exit 1
          fi
          echo "HATCHET_CLIENT_TOKEN=${HATCHET_TOKEN}" >> $GITHUB_ENV
      - name: Start backend services
        working-directory: server/tests
        env:
          LLM_URL: ${{ secrets.LLM_URL }}
          LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          # Export garage and hatchet credentials for backend services
          export GARAGE_KEY_ID="${{ env.GARAGE_KEY_ID }}"
          export GARAGE_KEY_SECRET="${{ env.GARAGE_KEY_SECRET }}"
          export HATCHET_CLIENT_TOKEN="${{ env.HATCHET_CLIENT_TOKEN }}"
          docker compose -f docker-compose.integration.yml up -d \
            server worker hatchet-worker-cpu hatchet-worker-llm test-runner
      - name: Wait for server health check
        working-directory: server/tests
        run: |
          echo "Waiting for server to be healthy..."
          for i in $(seq 1 60); do
            if docker compose -f docker-compose.integration.yml exec -T test-runner \
              curl -sf http://server:1250/health &>/dev/null; then
              echo "Server is ready."
              break
            fi
            sleep 3
          done
      - name: Run DB migrations
        working-directory: server/tests
        run: |
          docker compose -f docker-compose.integration.yml exec -T server \
            uv run alembic upgrade head
      - name: Run integration tests
        working-directory: server/tests
        run: |
          docker compose -f docker-compose.integration.yml exec -T test-runner \
            uv run pytest tests/integration/ -v -x
      - name: Collect logs on failure
        if: failure()
        working-directory: server/tests
        run: |
          docker compose -f docker-compose.integration.yml logs --tail=500 > integration-logs.txt 2>&1
      - name: Upload logs artifact
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: integration-logs
          path: server/tests/integration-logs.txt
          retention-days: 7
      - name: Teardown
        if: always()
        working-directory: server/tests
        run: |
          docker compose -f docker-compose.integration.yml down -v --remove-orphans
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ www/.env.production
 opencode.json
 vibedocs/
 server/tests/integration/logs/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -160,6 +160,21 @@ All endpoints prefixed `/v1/`:
 - **Frontend**: No current test suite - opportunities for Jest/React Testing Library
 - **Coverage**: Backend maintains test coverage reports in `htmlcov/`
 ### Integration Tests (DO NOT run unless explicitly asked)
 There are end-to-end integration tests in `server/tests/integration/` that spin up the full stack (PostgreSQL, Redis, Hatchet, Garage, mock-daily, server, workers) via Docker Compose and exercise real processing pipelines. These tests are:
 - `test_file_pipeline.py` — File upload → FilePipeline
 - `test_live_pipeline.py` — WebRTC stream → LivePostPipeline
 - `test_multitrack_pipeline.py` — Multitrack → DailyMultitrackPipeline
 **Important:**
 - These tests are **excluded** from normal `uv run pytest` runs via `--ignore=tests/integration` in pyproject.toml.
 - Do **NOT** run them as part of verification, code review, or general testing unless the user explicitly asks.
 - They require Docker, external LLM credentials, and HuggingFace token — they cannot run in a regular test environment.
 - To run locally: `./scripts/run-integration-tests.sh` (requires env vars: `LLM_URL`, `LLM_API_KEY`, `HF_TOKEN`).
 - In CI: triggered manually via the "Integration Tests" GitHub Actions workflow (`workflow_dispatch`).
 ## GPU Processing
 Modal.com integration for scalable ML processing:
--- a/scripts/run-integration-tests.sh
+++ b/scripts/run-integration-tests.sh
@@ -0,0 +1,156 @@
 #!/usr/bin/env bash
 #
 # Run integration tests locally.
 #
 # Spins up the full stack via Docker Compose, runs the three integration tests,
 # and tears everything down afterward.
 #
 # Required environment variables:
 #   LLM_URL      — OpenAI-compatible LLM endpoint (e.g. https://api.openai.com/v1)
 #   LLM_API_KEY  — API key for the LLM endpoint
 #   HF_TOKEN     — HuggingFace token for pyannote gated models
 #
 # Optional:
 #   LLM_MODEL    — Model name (default: qwen2.5:14b)
 #
 # Usage:
 #   export LLM_URL="https://api.openai.com/v1"
 #   export LLM_API_KEY="sk-..."
 #   export HF_TOKEN="hf_..."
 #   ./scripts/run-integration-tests.sh
 #
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
 COMPOSE_DIR="$REPO_ROOT/server/tests"
 COMPOSE_FILE="$COMPOSE_DIR/docker-compose.integration.yml"
 COMPOSE="docker compose -f $COMPOSE_FILE"
 # ── Validate required env vars ──────────────────────────────────────────────
 for var in LLM_URL LLM_API_KEY HF_TOKEN; do
    if [[ -z "${!var:-}" ]]; then
        echo "ERROR: $var is not set. See script header for required env vars."
        exit 1
    fi
 done
 export LLM_MODEL="${LLM_MODEL:-qwen2.5:14b}"
 # ── Helpers ─────────────────────────────────────────────────────────────────
 info()  { echo -e "\n\033[1;34m▸ $*\033[0m"; }
 ok()    { echo -e "\033[1;32m  ✓ $*\033[0m"; }
 fail()  { echo -e "\033[1;31m  ✗ $*\033[0m"; }
 wait_for() {
    local desc="$1" cmd="$2" max="${3:-60}"
    info "Waiting for $desc (up to ${max}s)..."
    for i in $(seq 1 "$max"); do
        if eval "$cmd" &>/dev/null; then
            ok "$desc is ready"
            return 0
        fi
        sleep 2
    done
    fail "$desc did not become ready within ${max}s"
    return 1
 }
 cleanup() {
    info "Tearing down..."
    $COMPOSE down -v --remove-orphans 2>/dev/null || true
 }
 # Always tear down on exit
 trap cleanup EXIT
 # ── Step 1: Build and start infrastructure ──────────────────────────────────
 info "Building and starting infrastructure services..."
 $COMPOSE up -d --build postgres redis garage hatchet mock-daily
 # ── Step 2: Set up Garage (S3 bucket + keys) ───────────────────────────────
 wait_for "Garage" "$COMPOSE exec -T garage /garage stats" 60
 info "Setting up Garage bucket and keys..."
 GARAGE="$COMPOSE exec -T garage /garage"
 # Hardcoded test credentials — ephemeral containers, destroyed after tests
 export GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
 export GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
 # Layout
 NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
 LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
 if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
    $GARAGE layout assign "$NODE_ID" -c 1G -z dc1
    $GARAGE layout apply --version 1
 fi
 # Bucket
 $GARAGE bucket info reflector-media >/dev/null 2>&1 || $GARAGE bucket create reflector-media
 # Import key with known credentials
 if ! $GARAGE key info reflector-test >/dev/null 2>&1; then
    $GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
    $GARAGE key rename "$GARAGE_KEY_ID" reflector-test
 fi
 # Permissions
 $GARAGE bucket allow reflector-media --read --write --key reflector-test
 ok "Garage ready with hardcoded test credentials"
 # ── Step 3: Generate Hatchet API token ──────────────────────────────────────
 wait_for "Hatchet" "$COMPOSE exec -T hatchet curl -sf http://localhost:8888/api/live" 90
 info "Generating Hatchet API token..."
 HATCHET_TOKEN_OUTPUT=$($COMPOSE exec -T hatchet /hatchet-admin token create --config /config --name local-test 2>&1)
 export HATCHET_CLIENT_TOKEN=$(echo "$HATCHET_TOKEN_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
 if [[ -z "$HATCHET_CLIENT_TOKEN" ]]; then
    fail "Failed to extract Hatchet token (JWT not found in output)"
    echo "  Output was: $HATCHET_TOKEN_OUTPUT"
    exit 1
 fi
 ok "Hatchet token generated"
 # ── Step 4: Start backend services ──────────────────────────────────────────
 info "Starting backend services..."
 $COMPOSE up -d server worker hatchet-worker-cpu hatchet-worker-llm test-runner
 # ── Step 5: Wait for server + run migrations ────────────────────────────────
 wait_for "Server" "$COMPOSE exec -T test-runner curl -sf http://server:1250/health" 60
 info "Running database migrations..."
 $COMPOSE exec -T server uv run alembic upgrade head
 ok "Migrations applied"
 # ── Step 6: Run integration tests ───────────────────────────────────────────
 info "Running integration tests..."
 echo ""
 LOGS_DIR="$COMPOSE_DIR/integration/logs"
 mkdir -p "$LOGS_DIR"
 RUN_TIMESTAMP=$(date +%Y%m%d-%H%M%S)
 TEST_LOG="$LOGS_DIR/$RUN_TIMESTAMP.txt"
 if $COMPOSE exec -T test-runner uv run pytest tests/integration/ -v -x 2>&1 | tee "$TEST_LOG.pytest"; then
    echo ""
    ok "All integration tests passed!"
    EXIT_CODE=0
 else
    echo ""
    fail "Integration tests failed!"
    EXIT_CODE=1
 fi
 # Always collect service logs + test output into a single file
 info "Collecting logs..."
 $COMPOSE logs --tail=500 > "$TEST_LOG" 2>&1
 echo -e "\n\n=== PYTEST OUTPUT ===\n" >> "$TEST_LOG"
 cat "$TEST_LOG.pytest" >> "$TEST_LOG" 2>/dev/null
 rm -f "$TEST_LOG.pytest"
 echo "  Logs saved to: server/tests/integration/logs/$RUN_TIMESTAMP.txt"
 # cleanup runs via trap
 exit $EXIT_CODE
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -119,7 +119,7 @@ AUTH_BACKEND = "jwt"
 HATCHET_CLIENT_TOKEN = "test-dummy-token"
 [tool.pytest.ini_options]
-addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
+addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v --ignore=tests/integration"
 testpaths = ["tests"]
 asyncio_mode = "auto"
 markers = [
--- a/server/reflector/hatchet/workflows/daily_multitrack_pipeline.py
+++ b/server/reflector/hatchet/workflows/daily_multitrack_pipeline.py
@@ -307,7 +307,9 @@ async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
    ctx.log(
        f"get_recording: calling Daily.co API for recording_id={input.recording_id}..."
    )
-    async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
+    async with DailyApiClient(
        api_key=settings.DAILY_API_KEY, base_url=settings.DAILY_API_URL
    ) as client:
        recording = await client.get_recording(input.recording_id)
    ctx.log(f"get_recording: Daily.co API returned successfully")
@@ -374,7 +376,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
            settings.DAILY_API_KEY, "DAILY_API_KEY is required"
        )
-        async with DailyApiClient(api_key=daily_api_key) as client:
+        async with DailyApiClient(
            api_key=daily_api_key, base_url=settings.DAILY_API_URL
        ) as client:
            participants = await client.get_meeting_participants(mtg_session_id)
        id_to_name = {}
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -180,6 +180,7 @@ class Settings(BaseSettings):
    )
    # Daily.co integration
    DAILY_API_URL: str = "https://api.daily.co/v1"
    DAILY_API_KEY: str | None = None
    DAILY_WEBHOOK_SECRET: str | None = None
    DAILY_SUBDOMAIN: str | None = None
--- a/server/tests/docker-compose.integration.yml
+++ b/server/tests/docker-compose.integration.yml
@@ -0,0 +1,218 @@
 # Integration test stack — full pipeline end-to-end.
 #
 # Usage:
 #   docker compose -f server/tests/docker-compose.integration.yml up -d --build
 #
 # Requires .env.integration in the repo root (generated by CI workflow).
 x-backend-env: &backend-env
  DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
  REDIS_HOST: redis
  CELERY_BROKER_URL: redis://redis:6379/1
  CELERY_RESULT_BACKEND: redis://redis:6379/1
  HATCHET_CLIENT_TOKEN: ${HATCHET_CLIENT_TOKEN:-}
  HATCHET_CLIENT_SERVER_URL: http://hatchet:8888
  HATCHET_CLIENT_HOST_PORT: hatchet:7077
  HATCHET_CLIENT_TLS_STRATEGY: none
  # ML backends — CPU-only, no external services
  TRANSCRIPT_BACKEND: whisper
  WHISPER_CHUNK_MODEL: tiny
  WHISPER_FILE_MODEL: tiny
  DIARIZATION_BACKEND: pyannote
  TRANSLATION_BACKEND: passthrough
  # Storage — local Garage S3
  TRANSCRIPT_STORAGE_BACKEND: aws
  TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL: http://garage:3900
  TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: reflector-media
  TRANSCRIPT_STORAGE_AWS_REGION: garage
  # Daily mock
  DAILY_API_URL: http://mock-daily:8080/v1
  DAILY_API_KEY: fake-daily-key
  # Auth
  PUBLIC_MODE: "true"
  AUTH_BACKEND: none
  # LLM (injected from CI)
  LLM_URL: ${LLM_URL:-}
  LLM_API_KEY: ${LLM_API_KEY:-}
  LLM_MODEL: ${LLM_MODEL:-gpt-4o-mini}
  # HuggingFace (for pyannote gated models)
  HF_TOKEN: ${HF_TOKEN:-}
  # Garage S3 credentials — hardcoded test keys, containers are ephemeral
  TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: GK0123456789abcdef01234567 # gitleaks:allow
  TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
  # NOTE: DAILYCO_STORAGE_AWS_* intentionally NOT set — forces fallback to
  # get_transcripts_storage() which has ENDPOINT_URL pointing at Garage.
  # Setting them would bypass the endpoint and generate presigned URLs for AWS.
 services:
  postgres:
    image: postgres:17-alpine
    command: ["postgres", "-c", "max_connections=200"]
    environment:
      POSTGRES_USER: reflector
      POSTGRES_PASSWORD: reflector
      POSTGRES_DB: reflector
    volumes:
      - ../../server/docker/init-hatchet-db.sql:/docker-entrypoint-initdb.d/init-hatchet-db.sql:ro
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U reflector"]
      interval: 5s
      timeout: 3s
      retries: 10
  redis:
    image: redis:7.2-alpine
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 3s
      retries: 5
  hatchet:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
    depends_on:
      postgres:
        condition: service_healthy
    environment:
      DATABASE_URL: "postgresql://reflector:reflector@postgres:5432/hatchet?sslmode=disable&connect_timeout=30"
      SERVER_AUTH_COOKIE_INSECURE: "t"
      SERVER_AUTH_COOKIE_DOMAIN: "localhost"
      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
      SERVER_GRPC_INSECURE: "t"
      SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
      SERVER_GRPC_PORT: "7077"
      SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
      SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
      interval: 10s
      timeout: 5s
      retries: 15
      start_period: 30s
  garage:
    image: dxflrs/garage:v1.1.0
    volumes:
      - ../../data/garage.toml:/etc/garage.toml:ro
    healthcheck:
      test: ["CMD", "/garage", "stats"]
      interval: 5s
      timeout: 3s
      retries: 10
      start_period: 5s
  mock-daily:
    build:
      context: .
      dockerfile: integration/Dockerfile.mock-daily
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/recordings/test')"]
      interval: 5s
      timeout: 3s
      retries: 5
  server:
    build:
      context: ../../server
      dockerfile: Dockerfile
    environment:
      <<: *backend-env
      ENTRYPOINT: server
      WEBRTC_HOST: server
      WEBRTC_PORT_RANGE: "52000-52100"
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
      hatchet:
        condition: service_healthy
      garage:
        condition: service_healthy
      mock-daily:
        condition: service_healthy
    volumes:
      - server_data:/app/data
  worker:
    build:
      context: ../../server
      dockerfile: Dockerfile
    environment:
      <<: *backend-env
      ENTRYPOINT: worker
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    volumes:
      - server_data:/app/data
  hatchet-worker-cpu:
    build:
      context: ../../server
      dockerfile: Dockerfile
    environment:
      <<: *backend-env
      ENTRYPOINT: hatchet-worker-cpu
    depends_on:
      hatchet:
        condition: service_healthy
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    volumes:
      - server_data:/app/data
  hatchet-worker-llm:
    build:
      context: ../../server
      dockerfile: Dockerfile
    environment:
      <<: *backend-env
      ENTRYPOINT: hatchet-worker-llm
    depends_on:
      hatchet:
        condition: service_healthy
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    volumes:
      - server_data:/app/data
  test-runner:
    build:
      context: ../../server
      dockerfile: Dockerfile
    environment:
      <<: *backend-env
      # Override DATABASE_URL for sync driver (used by direct DB access in tests)
      DATABASE_URL_ASYNC: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
      DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
      SERVER_URL: http://server:1250
      GARAGE_ENDPOINT: http://garage:3900
    depends_on:
      server:
        condition: service_started
      worker:
        condition: service_started
      hatchet-worker-cpu:
        condition: service_started
      hatchet-worker-llm:
        condition: service_started
    volumes:
      - server_data:/app/data
      # Mount test files into the container
      - ./records:/app/tests/records:ro
      - ./integration:/app/tests/integration:ro
    entrypoint: ["sleep", "infinity"]
 volumes:
  server_data:
 networks:
  default:
    attachable: true
--- a/server/tests/integration/Dockerfile.mock-daily
+++ b/server/tests/integration/Dockerfile.mock-daily
@@ -0,0 +1,9 @@
 FROM python:3.12-slim
 RUN pip install --no-cache-dir fastapi uvicorn[standard]
 WORKDIR /app
 COPY integration/mock_daily_server.py /app/mock_daily_server.py
 EXPOSE 8080
 CMD ["uvicorn", "mock_daily_server:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/server/tests/integration/init.py
+++ b/server/tests/integration/init.py
--- a/server/tests/integration/conftest.py
+++ b/server/tests/integration/conftest.py
@@ -0,0 +1,116 @@
 """
 Integration test fixtures — no mocks, real services.
 All services (PostgreSQL, Redis, Hatchet, Garage, server, workers) are
 expected to be running via docker-compose.integration.yml.
 """
 import asyncio
 import os
 from pathlib import Path
 import boto3
 import httpx
 import pytest
 import pytest_asyncio
 from sqlalchemy.ext.asyncio import create_async_engine
 SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
 GARAGE_ENDPOINT = os.environ.get("GARAGE_ENDPOINT", "http://garage:3900")
 DATABASE_URL = os.environ.get(
    "DATABASE_URL_ASYNC",
    os.environ.get(
        "DATABASE_URL",
        "postgresql+asyncpg://reflector:reflector@postgres:5432/reflector",
    ),
 )
 GARAGE_KEY_ID = os.environ.get("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID", "")
 GARAGE_KEY_SECRET = os.environ.get("TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY", "")
 BUCKET_NAME = "reflector-media"
@pytest_asyncio.fixture
 async def api_client():
    """HTTP client pointed at the running server."""
    async with httpx.AsyncClient(
        base_url=f"{SERVER_URL}/v1",
        timeout=httpx.Timeout(30.0),
    ) as client:
        yield client
@pytest.fixture(scope="session")
 def s3_client():
    """Boto3 S3 client pointed at Garage."""
    return boto3.client(
        "s3",
        endpoint_url=GARAGE_ENDPOINT,
        aws_access_key_id=GARAGE_KEY_ID,
        aws_secret_access_key=GARAGE_KEY_SECRET,
        region_name="garage",
    )
@pytest_asyncio.fixture
 async def db_engine():
    """SQLAlchemy async engine for direct DB operations."""
    engine = create_async_engine(DATABASE_URL)
    yield engine
    await engine.dispose()
@pytest.fixture(scope="session")
 def test_records_dir():
    """Path to the test audio files directory."""
    return Path(__file__).parent.parent / "records"
@pytest.fixture(scope="session")
 def bucket_name():
    """S3 bucket name used for integration tests."""
    return BUCKET_NAME
 async def _poll_transcript_status(
    client: httpx.AsyncClient,
    transcript_id: str,
    target: str | tuple[str, ...],
    error: str = "error",
    max_wait: int = 300,
    interval: int = 3,
 ) -> dict:
    """
    Poll GET /transcripts/{id} until status matches target or error.
    target can be a single status string or a tuple of acceptable statuses.
    Returns the transcript dict on success, raises on timeout or error status.
    """
    targets = (target,) if isinstance(target, str) else target
    elapsed = 0
    status = None
    while elapsed < max_wait:
        resp = await client.get(f"/transcripts/{transcript_id}")
        resp.raise_for_status()
        data = resp.json()
        status = data.get("status")
        if status in targets:
            return data
        if status == error:
            raise AssertionError(
                f"Transcript {transcript_id} reached error status: {data}"
            )
        await asyncio.sleep(interval)
        elapsed += interval
    raise TimeoutError(
        f"Transcript {transcript_id} did not reach status '{target}' "
        f"within {max_wait}s (last status: {status})"
    )
@pytest_asyncio.fixture
 def poll_transcript_status():
    """Returns the poll_transcript_status async helper function."""
    return _poll_transcript_status
--- a/server/tests/integration/garage_setup.sh
+++ b/server/tests/integration/garage_setup.sh
@@ -0,0 +1,62 @@
 #!/bin/sh
 #
 # Initialize Garage bucket and keys for integration tests.
 # Run inside the Garage container after it's healthy.
 #
 # Outputs KEY_ID and KEY_SECRET to stdout (last two lines).
 #
 # Note: uses /bin/sh (not bash) since the Garage container is minimal.
 #
 set -eu
 echo "Waiting for Garage to be ready..."
 i=0
 while [ "$i" -lt 30 ]; do
    if /garage stats >/dev/null 2>&1; then
        break
    fi
    sleep 1
    i=$((i + 1))
 done
 # Layout setup
 NODE_ID=$(/garage node id -q | tr -d '[:space:]')
 LAYOUT_STATUS=$(/garage layout show 2>&1 || true)
 if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
    /garage layout assign "$NODE_ID" -c 1G -z dc1
    /garage layout apply --version 1
    echo "Layout applied."
 else
    echo "Layout already configured."
 fi
 # Bucket
 if ! /garage bucket info reflector-media >/dev/null 2>&1; then
    /garage bucket create reflector-media
    echo "Bucket 'reflector-media' created."
 else
    echo "Bucket 'reflector-media' already exists."
 fi
 # Key
 if /garage key info reflector-test >/dev/null 2>&1; then
    echo "Key 'reflector-test' already exists."
    KEY_OUTPUT=$(/garage key info reflector-test 2>&1)
 else
    KEY_OUTPUT=$(/garage key create reflector-test 2>&1)
    echo "Key 'reflector-test' created."
 fi
 # Permissions
 /garage bucket allow reflector-media --read --write --key reflector-test
 # Extract key ID and secret from output using POSIX-compatible parsing
 # garage key output format:
 #   Key name: reflector-test
 #   Key ID: GK...
 #   Secret key: ...
 KEY_ID=$(echo "$KEY_OUTPUT" | grep "Key ID" | sed 's/.*Key ID: *//')
 KEY_SECRET=$(echo "$KEY_OUTPUT" | grep "Secret key" | sed 's/.*Secret key: *//')
 echo "GARAGE_KEY_ID=${KEY_ID}"
 echo "GARAGE_KEY_SECRET=${KEY_SECRET}"
--- a/server/tests/integration/mock_daily_server.py
+++ b/server/tests/integration/mock_daily_server.py
@@ -0,0 +1,75 @@
 """
 Minimal FastAPI mock for Daily.co API.
 Serves canned responses for:
 - GET /v1/recordings/{recording_id}
 - GET /v1/meetings/{meeting_id}/participants
 """
 from fastapi import FastAPI
 app = FastAPI(title="Mock Daily API")
 # Participant UUIDs must be 36-char hex UUIDs to match Daily's filename format
 PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
 PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
 # Daily-format track keys: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
 TRACK_KEYS = [
    f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
    f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
 ]
@app.get("/v1/recordings/{recording_id}")
 async def get_recording(recording_id: str):
    return {
        "id": recording_id,
        "room_name": "integration-test-room",
        "start_ts": 1700000000,
        "type": "raw-tracks",
        "status": "finished",
        "max_participants": 2,
        "duration": 5,
        "share_token": None,
        "s3": {
            "bucket_name": "reflector-media",
            "bucket_region": "garage",
            "key": None,
            "endpoint": None,
        },
        "s3key": None,
        "tracks": [
            {"type": "audio", "s3Key": key, "size": 100000} for key in TRACK_KEYS
        ],
        "mtgSessionId": "mock-mtg-session-id",
    }
@app.get("/v1/meetings/{meeting_id}/participants")
 async def get_meeting_participants(meeting_id: str):
    return {
        "data": [
            {
                "user_id": "user-a",
                "participant_id": PARTICIPANT_A_ID,
                "user_name": "Speaker A",
                "join_time": 1700000000,
                "duration": 300,
            },
            {
                "user_id": "user-b",
                "participant_id": PARTICIPANT_B_ID,
                "user_name": "Speaker B",
                "join_time": 1700000010,
                "duration": 290,
            },
        ]
    }
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8080)
--- a/server/tests/integration/test_file_pipeline.py
+++ b/server/tests/integration/test_file_pipeline.py
@@ -0,0 +1,61 @@
 """
 Integration test: File upload → FilePipeline → full processing.
 Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
 pyannote diarization → LLM summarization/topics → status "ended".
 """
 import pytest
@pytest.mark.asyncio
 async def test_file_pipeline_end_to_end(
    api_client, test_records_dir, poll_transcript_status
 ):
    """Upload a WAV file and verify the full pipeline completes."""
    # 1. Create transcript
    resp = await api_client.post(
        "/transcripts",
        json={"name": "integration-file-test", "source_kind": "file"},
    )
    assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
    transcript = resp.json()
    transcript_id = transcript["id"]
    # 2. Upload audio file (single chunk)
    audio_path = test_records_dir / "test_short.wav"
    assert audio_path.exists(), f"Test audio file not found: {audio_path}"
    with open(audio_path, "rb") as f:
        resp = await api_client.post(
            f"/transcripts/{transcript_id}/record/upload",
            params={"chunk_number": 0, "total_chunks": 1},
            files={"chunk": ("test_short.wav", f, "audio/wav")},
        )
    assert resp.status_code == 200, f"Upload failed: {resp.text}"
    # 3. Poll until pipeline completes
    data = await poll_transcript_status(
        api_client, transcript_id, target="ended", max_wait=300
    )
    # 4. Assertions
    assert data["status"] == "ended"
    assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
    assert (
        data.get("long_summary") and len(data["long_summary"]) > 0
    ), "Long summary should be non-empty"
    assert (
        data.get("short_summary") and len(data["short_summary"]) > 0
    ), "Short summary should be non-empty"
    # Topics are served from a separate endpoint
    topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
    assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
    topics = topics_resp.json()
    assert len(topics) >= 1, "Should have at least 1 topic"
    for topic in topics:
        assert topic.get("title"), "Each topic should have a title"
        assert topic.get("summary"), "Each topic should have a summary"
    assert data.get("duration", 0) > 0, "Duration should be positive"
--- a/server/tests/integration/test_live_pipeline.py
+++ b/server/tests/integration/test_live_pipeline.py
@@ -0,0 +1,109 @@
 """
 Integration test: WebRTC stream → LivePostProcessingPipeline → full processing.
 Exercises: WebRTC SDP exchange → live audio streaming → connection close →
 Hatchet LivePostPipeline → whisper transcription → LLM summarization/topics → status "ended".
 """
 import asyncio
 import json
 import os
 import httpx
 import pytest
 from aiortc import RTCPeerConnection, RTCSessionDescription
 from aiortc.contrib.media import MediaPlayer
 SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
@pytest.mark.asyncio
 async def test_live_pipeline_end_to_end(
    api_client, test_records_dir, poll_transcript_status
 ):
    """Stream audio via WebRTC and verify the full post-processing pipeline completes."""
    # 1. Create transcript
    resp = await api_client.post(
        "/transcripts",
        json={"name": "integration-live-test"},
    )
    assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
    transcript = resp.json()
    transcript_id = transcript["id"]
    # 2. Set up WebRTC peer connection with audio from test file
    audio_path = test_records_dir / "test_short.wav"
    assert audio_path.exists(), f"Test audio file not found: {audio_path}"
    pc = RTCPeerConnection()
    player = MediaPlayer(audio_path.as_posix())
    # Add audio track
    audio_track = player.audio
    pc.addTrack(audio_track)
    # Create data channel (server expects this for STOP command)
    channel = pc.createDataChannel("data-channel")
    # 3. Generate SDP offer
    offer = await pc.createOffer()
    await pc.setLocalDescription(offer)
    sdp_payload = {
        "sdp": pc.localDescription.sdp,
        "type": pc.localDescription.type,
    }
    # 4. Send offer to server and get answer
    webrtc_url = f"{SERVER_URL}/v1/transcripts/{transcript_id}/record/webrtc"
    async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
        resp = await client.post(webrtc_url, json=sdp_payload)
    assert resp.status_code == 200, f"WebRTC offer failed: {resp.text}"
    answer_data = resp.json()
    answer = RTCSessionDescription(sdp=answer_data["sdp"], type=answer_data["type"])
    await pc.setRemoteDescription(answer)
    # 5. Wait for audio playback to finish
    max_stream_wait = 60
    elapsed = 0
    while elapsed < max_stream_wait:
        if audio_track.readyState == "ended":
            break
        await asyncio.sleep(0.5)
        elapsed += 0.5
    # 6. Send STOP command and close connection
    try:
        channel.send(json.dumps({"cmd": "STOP"}))
        await asyncio.sleep(1)
    except Exception:
        pass  # Channel may not be open if track ended quickly
    await pc.close()
    # 7. Poll until post-processing pipeline completes
    data = await poll_transcript_status(
        api_client, transcript_id, target="ended", max_wait=300
    )
    # 8. Assertions
    assert data["status"] == "ended"
    assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
    assert (
        data.get("long_summary") and len(data["long_summary"]) > 0
    ), "Long summary should be non-empty"
    assert (
        data.get("short_summary") and len(data["short_summary"]) > 0
    ), "Short summary should be non-empty"
    # Topics are served from a separate endpoint
    topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
    assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
    topics = topics_resp.json()
    assert len(topics) >= 1, "Should have at least 1 topic"
    for topic in topics:
        assert topic.get("title"), "Each topic should have a title"
        assert topic.get("summary"), "Each topic should have a summary"
    assert data.get("duration", 0) > 0, "Duration should be positive"
--- a/server/tests/integration/test_multitrack_pipeline.py
+++ b/server/tests/integration/test_multitrack_pipeline.py
@@ -0,0 +1,129 @@
 """
 Integration test: Multitrack → DailyMultitrackPipeline → full processing.
 Exercises: S3 upload → DB recording setup → process endpoint →
 Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
 diarization → mixdown → LLM summarization/topics → status "ended".
 """
 import json
 from datetime import datetime, timezone
 import pytest
 from sqlalchemy import text
 # Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
 # These UUIDs must match mock_daily_server.py participant IDs
 PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
 PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
 TRACK_KEYS = [
    f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
    f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
 ]
@pytest.mark.asyncio
 async def test_multitrack_pipeline_end_to_end(
    api_client,
    s3_client,
    db_engine,
    test_records_dir,
    bucket_name,
    poll_transcript_status,
 ):
    """Set up multitrack recording in S3/DB and verify the full pipeline completes."""
    # 1. Upload test audio as two separate tracks to Garage S3
    audio_path = test_records_dir / "test_short.wav"
    assert audio_path.exists(), f"Test audio file not found: {audio_path}"
    for track_key in TRACK_KEYS:
        s3_client.upload_file(
            str(audio_path),
            bucket_name,
            track_key,
        )
    # 2. Create transcript via API
    resp = await api_client.post(
        "/transcripts",
        json={"name": "integration-multitrack-test"},
    )
    assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
    transcript = resp.json()
    transcript_id = transcript["id"]
    # 3. Insert Recording row and link to transcript via direct DB access
    recording_id = f"rec-integration-{transcript_id[:8]}"
    now = datetime.now(timezone.utc)
    async with db_engine.begin() as conn:
        # Insert recording with track_keys
        await conn.execute(
            text("""
                INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys)
                VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json))
            """),
            {
                "id": recording_id,
                "bucket_name": bucket_name,
                "object_key": TRACK_KEYS[0],
                "recorded_at": now,
                "status": "completed",
                "track_keys": json.dumps(TRACK_KEYS),
            },
        )
        # Link recording to transcript and set status to uploaded
        await conn.execute(
            text("""
                UPDATE transcript
                SET recording_id = :recording_id, status = 'uploaded'
                WHERE id = :transcript_id
            """),
            {
                "recording_id": recording_id,
                "transcript_id": transcript_id,
            },
        )
    # 4. Trigger processing via process endpoint
    resp = await api_client.post(f"/transcripts/{transcript_id}/process")
    assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
    # 5. Poll until pipeline completes
    # The pipeline will call mock-daily for get_recording and get_participants
    # Accept "error" too — non-critical steps like action_items may fail due to
    # LLM parsing flakiness while core results (transcript, summaries) still exist.
    data = await poll_transcript_status(
        api_client, transcript_id, target=("ended", "error"), max_wait=300
    )
    # 6. Assertions — verify core pipeline results regardless of final status
    assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
    assert (
        data.get("long_summary") and len(data["long_summary"]) > 0
    ), "Long summary should be non-empty"
    assert (
        data.get("short_summary") and len(data["short_summary"]) > 0
    ), "Short summary should be non-empty"
    # Topics are served from a separate endpoint
    topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
    assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
    topics = topics_resp.json()
    assert len(topics) >= 1, "Should have at least 1 topic"
    for topic in topics:
        assert topic.get("title"), "Each topic should have a title"
        assert topic.get("summary"), "Each topic should have a summary"
    # Participants are served from a separate endpoint
    participants_resp = await api_client.get(
        f"/transcripts/{transcript_id}/participants"
    )
    assert (
        participants_resp.status_code == 200
    ), f"Failed to get participants: {participants_resp.text}"
    participants = participants_resp.json()
    assert (
        len(participants) >= 2
    ), f"Expected at least 2 speakers for multitrack, got {len(participants)}"