mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-21 22:56:47 +00:00
test: full integration tests (#916)
* test: full integration tests * fix: add env vars as secrets in CI
This commit is contained in:
committed by
GitHub
parent
a9200d35bf
commit
9a2f973a2e
139
.github/workflows/integration_tests.yml
vendored
Normal file
139
.github/workflows/integration_tests.yml
vendored
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
name: Integration Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
llm_model:
|
||||||
|
description: "LLM model name (overrides LLM_MODEL secret)"
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
type: string
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
integration:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 60
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Start infrastructure services
|
||||||
|
working-directory: server/tests
|
||||||
|
env:
|
||||||
|
LLM_URL: ${{ secrets.LLM_URL }}
|
||||||
|
LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
|
||||||
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
|
run: |
|
||||||
|
docker compose -f docker-compose.integration.yml up -d --build postgres redis garage hatchet mock-daily
|
||||||
|
|
||||||
|
- name: Set up Garage bucket and keys
|
||||||
|
working-directory: server/tests
|
||||||
|
run: |
|
||||||
|
GARAGE="docker compose -f docker-compose.integration.yml exec -T garage /garage"
|
||||||
|
GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
|
||||||
|
GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
|
||||||
|
|
||||||
|
echo "Waiting for Garage to be healthy..."
|
||||||
|
for i in $(seq 1 60); do
|
||||||
|
if $GARAGE stats &>/dev/null; then break; fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Setting up Garage..."
|
||||||
|
NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
|
||||||
|
LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
|
||||||
|
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
|
||||||
|
$GARAGE layout assign "$NODE_ID" -c 1G -z dc1
|
||||||
|
$GARAGE layout apply --version 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
$GARAGE bucket info reflector-media &>/dev/null || $GARAGE bucket create reflector-media
|
||||||
|
if ! $GARAGE key info reflector-test &>/dev/null; then
|
||||||
|
$GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
|
||||||
|
$GARAGE key rename "$GARAGE_KEY_ID" reflector-test
|
||||||
|
fi
|
||||||
|
$GARAGE bucket allow reflector-media --read --write --key reflector-test
|
||||||
|
|
||||||
|
- name: Wait for Hatchet and generate API token
|
||||||
|
working-directory: server/tests
|
||||||
|
run: |
|
||||||
|
echo "Waiting for Hatchet to be healthy..."
|
||||||
|
for i in $(seq 1 90); do
|
||||||
|
if docker compose -f docker-compose.integration.yml exec -T hatchet curl -sf http://localhost:8888/api/live &>/dev/null; then
|
||||||
|
echo "Hatchet is ready."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Generating Hatchet API token..."
|
||||||
|
HATCHET_OUTPUT=$(docker compose -f docker-compose.integration.yml exec -T hatchet \
|
||||||
|
/hatchet-admin token create --config /config --name integration-test 2>&1)
|
||||||
|
HATCHET_TOKEN=$(echo "$HATCHET_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
|
||||||
|
if [ -z "$HATCHET_TOKEN" ]; then
|
||||||
|
echo "ERROR: Failed to extract Hatchet JWT token"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "HATCHET_CLIENT_TOKEN=${HATCHET_TOKEN}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Start backend services
|
||||||
|
working-directory: server/tests
|
||||||
|
env:
|
||||||
|
LLM_URL: ${{ secrets.LLM_URL }}
|
||||||
|
LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
|
||||||
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
|
run: |
|
||||||
|
# Export garage and hatchet credentials for backend services
|
||||||
|
export GARAGE_KEY_ID="${{ env.GARAGE_KEY_ID }}"
|
||||||
|
export GARAGE_KEY_SECRET="${{ env.GARAGE_KEY_SECRET }}"
|
||||||
|
export HATCHET_CLIENT_TOKEN="${{ env.HATCHET_CLIENT_TOKEN }}"
|
||||||
|
|
||||||
|
docker compose -f docker-compose.integration.yml up -d \
|
||||||
|
server worker hatchet-worker-cpu hatchet-worker-llm test-runner
|
||||||
|
|
||||||
|
- name: Wait for server health check
|
||||||
|
working-directory: server/tests
|
||||||
|
run: |
|
||||||
|
echo "Waiting for server to be healthy..."
|
||||||
|
for i in $(seq 1 60); do
|
||||||
|
if docker compose -f docker-compose.integration.yml exec -T test-runner \
|
||||||
|
curl -sf http://server:1250/health &>/dev/null; then
|
||||||
|
echo "Server is ready."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 3
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Run DB migrations
|
||||||
|
working-directory: server/tests
|
||||||
|
run: |
|
||||||
|
docker compose -f docker-compose.integration.yml exec -T server \
|
||||||
|
uv run alembic upgrade head
|
||||||
|
|
||||||
|
- name: Run integration tests
|
||||||
|
working-directory: server/tests
|
||||||
|
run: |
|
||||||
|
docker compose -f docker-compose.integration.yml exec -T test-runner \
|
||||||
|
uv run pytest tests/integration/ -v -x
|
||||||
|
|
||||||
|
- name: Collect logs on failure
|
||||||
|
if: failure()
|
||||||
|
working-directory: server/tests
|
||||||
|
run: |
|
||||||
|
docker compose -f docker-compose.integration.yml logs --tail=500 > integration-logs.txt 2>&1
|
||||||
|
|
||||||
|
- name: Upload logs artifact
|
||||||
|
if: failure()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: integration-logs
|
||||||
|
path: server/tests/integration-logs.txt
|
||||||
|
retention-days: 7
|
||||||
|
|
||||||
|
- name: Teardown
|
||||||
|
if: always()
|
||||||
|
working-directory: server/tests
|
||||||
|
run: |
|
||||||
|
docker compose -f docker-compose.integration.yml down -v --remove-orphans
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -25,3 +25,4 @@ www/.env.production
|
|||||||
opencode.json
|
opencode.json
|
||||||
|
|
||||||
vibedocs/
|
vibedocs/
|
||||||
|
server/tests/integration/logs/
|
||||||
|
|||||||
15
CLAUDE.md
15
CLAUDE.md
@@ -160,6 +160,21 @@ All endpoints prefixed `/v1/`:
|
|||||||
- **Frontend**: No current test suite - opportunities for Jest/React Testing Library
|
- **Frontend**: No current test suite - opportunities for Jest/React Testing Library
|
||||||
- **Coverage**: Backend maintains test coverage reports in `htmlcov/`
|
- **Coverage**: Backend maintains test coverage reports in `htmlcov/`
|
||||||
|
|
||||||
|
### Integration Tests (DO NOT run unless explicitly asked)
|
||||||
|
|
||||||
|
There are end-to-end integration tests in `server/tests/integration/` that spin up the full stack (PostgreSQL, Redis, Hatchet, Garage, mock-daily, server, workers) via Docker Compose and exercise real processing pipelines. These tests are:
|
||||||
|
|
||||||
|
- `test_file_pipeline.py` — File upload → FilePipeline
|
||||||
|
- `test_live_pipeline.py` — WebRTC stream → LivePostPipeline
|
||||||
|
- `test_multitrack_pipeline.py` — Multitrack → DailyMultitrackPipeline
|
||||||
|
|
||||||
|
**Important:**
|
||||||
|
- These tests are **excluded** from normal `uv run pytest` runs via `--ignore=tests/integration` in pyproject.toml.
|
||||||
|
- Do **NOT** run them as part of verification, code review, or general testing unless the user explicitly asks.
|
||||||
|
- They require Docker, external LLM credentials, and HuggingFace token — they cannot run in a regular test environment.
|
||||||
|
- To run locally: `./scripts/run-integration-tests.sh` (requires env vars: `LLM_URL`, `LLM_API_KEY`, `HF_TOKEN`).
|
||||||
|
- In CI: triggered manually via the "Integration Tests" GitHub Actions workflow (`workflow_dispatch`).
|
||||||
|
|
||||||
## GPU Processing
|
## GPU Processing
|
||||||
|
|
||||||
Modal.com integration for scalable ML processing:
|
Modal.com integration for scalable ML processing:
|
||||||
|
|||||||
156
scripts/run-integration-tests.sh
Executable file
156
scripts/run-integration-tests.sh
Executable file
@@ -0,0 +1,156 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# Run integration tests locally.
|
||||||
|
#
|
||||||
|
# Spins up the full stack via Docker Compose, runs the three integration tests,
|
||||||
|
# and tears everything down afterward.
|
||||||
|
#
|
||||||
|
# Required environment variables:
|
||||||
|
# LLM_URL — OpenAI-compatible LLM endpoint (e.g. https://api.openai.com/v1)
|
||||||
|
# LLM_API_KEY — API key for the LLM endpoint
|
||||||
|
# HF_TOKEN — HuggingFace token for pyannote gated models
|
||||||
|
#
|
||||||
|
# Optional:
|
||||||
|
# LLM_MODEL — Model name (default: qwen2.5:14b)
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# export LLM_URL="https://api.openai.com/v1"
|
||||||
|
# export LLM_API_KEY="sk-..."
|
||||||
|
# export HF_TOKEN="hf_..."
|
||||||
|
# ./scripts/run-integration-tests.sh
|
||||||
|
#
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
COMPOSE_DIR="$REPO_ROOT/server/tests"
|
||||||
|
COMPOSE_FILE="$COMPOSE_DIR/docker-compose.integration.yml"
|
||||||
|
COMPOSE="docker compose -f $COMPOSE_FILE"
|
||||||
|
|
||||||
|
# ── Validate required env vars ──────────────────────────────────────────────
|
||||||
|
for var in LLM_URL LLM_API_KEY HF_TOKEN; do
|
||||||
|
if [[ -z "${!var:-}" ]]; then
|
||||||
|
echo "ERROR: $var is not set. See script header for required env vars."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
export LLM_MODEL="${LLM_MODEL:-qwen2.5:14b}"
|
||||||
|
|
||||||
|
# ── Helpers ─────────────────────────────────────────────────────────────────
|
||||||
|
info() { echo -e "\n\033[1;34m▸ $*\033[0m"; }
|
||||||
|
ok() { echo -e "\033[1;32m ✓ $*\033[0m"; }
|
||||||
|
fail() { echo -e "\033[1;31m ✗ $*\033[0m"; }
|
||||||
|
|
||||||
|
wait_for() {
|
||||||
|
local desc="$1" cmd="$2" max="${3:-60}"
|
||||||
|
info "Waiting for $desc (up to ${max}s)..."
|
||||||
|
for i in $(seq 1 "$max"); do
|
||||||
|
if eval "$cmd" &>/dev/null; then
|
||||||
|
ok "$desc is ready"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
fail "$desc did not become ready within ${max}s"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
info "Tearing down..."
|
||||||
|
$COMPOSE down -v --remove-orphans 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Always tear down on exit
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
# ── Step 1: Build and start infrastructure ──────────────────────────────────
|
||||||
|
info "Building and starting infrastructure services..."
|
||||||
|
$COMPOSE up -d --build postgres redis garage hatchet mock-daily
|
||||||
|
|
||||||
|
# ── Step 2: Set up Garage (S3 bucket + keys) ───────────────────────────────
|
||||||
|
wait_for "Garage" "$COMPOSE exec -T garage /garage stats" 60
|
||||||
|
|
||||||
|
info "Setting up Garage bucket and keys..."
|
||||||
|
GARAGE="$COMPOSE exec -T garage /garage"
|
||||||
|
|
||||||
|
# Hardcoded test credentials — ephemeral containers, destroyed after tests
|
||||||
|
export GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
|
||||||
|
export GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
|
||||||
|
|
||||||
|
# Layout
|
||||||
|
NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
|
||||||
|
LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
|
||||||
|
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
|
||||||
|
$GARAGE layout assign "$NODE_ID" -c 1G -z dc1
|
||||||
|
$GARAGE layout apply --version 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Bucket
|
||||||
|
$GARAGE bucket info reflector-media >/dev/null 2>&1 || $GARAGE bucket create reflector-media
|
||||||
|
|
||||||
|
# Import key with known credentials
|
||||||
|
if ! $GARAGE key info reflector-test >/dev/null 2>&1; then
|
||||||
|
$GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
|
||||||
|
$GARAGE key rename "$GARAGE_KEY_ID" reflector-test
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Permissions
|
||||||
|
$GARAGE bucket allow reflector-media --read --write --key reflector-test
|
||||||
|
|
||||||
|
ok "Garage ready with hardcoded test credentials"
|
||||||
|
|
||||||
|
# ── Step 3: Generate Hatchet API token ──────────────────────────────────────
|
||||||
|
wait_for "Hatchet" "$COMPOSE exec -T hatchet curl -sf http://localhost:8888/api/live" 90
|
||||||
|
|
||||||
|
info "Generating Hatchet API token..."
|
||||||
|
HATCHET_TOKEN_OUTPUT=$($COMPOSE exec -T hatchet /hatchet-admin token create --config /config --name local-test 2>&1)
|
||||||
|
export HATCHET_CLIENT_TOKEN=$(echo "$HATCHET_TOKEN_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
|
||||||
|
|
||||||
|
if [[ -z "$HATCHET_CLIENT_TOKEN" ]]; then
|
||||||
|
fail "Failed to extract Hatchet token (JWT not found in output)"
|
||||||
|
echo " Output was: $HATCHET_TOKEN_OUTPUT"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
ok "Hatchet token generated"
|
||||||
|
|
||||||
|
# ── Step 4: Start backend services ──────────────────────────────────────────
|
||||||
|
info "Starting backend services..."
|
||||||
|
$COMPOSE up -d server worker hatchet-worker-cpu hatchet-worker-llm test-runner
|
||||||
|
|
||||||
|
# ── Step 5: Wait for server + run migrations ────────────────────────────────
|
||||||
|
wait_for "Server" "$COMPOSE exec -T test-runner curl -sf http://server:1250/health" 60
|
||||||
|
|
||||||
|
info "Running database migrations..."
|
||||||
|
$COMPOSE exec -T server uv run alembic upgrade head
|
||||||
|
ok "Migrations applied"
|
||||||
|
|
||||||
|
# ── Step 6: Run integration tests ───────────────────────────────────────────
|
||||||
|
info "Running integration tests..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
LOGS_DIR="$COMPOSE_DIR/integration/logs"
|
||||||
|
mkdir -p "$LOGS_DIR"
|
||||||
|
RUN_TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||||
|
TEST_LOG="$LOGS_DIR/$RUN_TIMESTAMP.txt"
|
||||||
|
|
||||||
|
if $COMPOSE exec -T test-runner uv run pytest tests/integration/ -v -x 2>&1 | tee "$TEST_LOG.pytest"; then
|
||||||
|
echo ""
|
||||||
|
ok "All integration tests passed!"
|
||||||
|
EXIT_CODE=0
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
fail "Integration tests failed!"
|
||||||
|
EXIT_CODE=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Always collect service logs + test output into a single file
|
||||||
|
info "Collecting logs..."
|
||||||
|
$COMPOSE logs --tail=500 > "$TEST_LOG" 2>&1
|
||||||
|
echo -e "\n\n=== PYTEST OUTPUT ===\n" >> "$TEST_LOG"
|
||||||
|
cat "$TEST_LOG.pytest" >> "$TEST_LOG" 2>/dev/null
|
||||||
|
rm -f "$TEST_LOG.pytest"
|
||||||
|
echo " Logs saved to: server/tests/integration/logs/$RUN_TIMESTAMP.txt"
|
||||||
|
|
||||||
|
# cleanup runs via trap
|
||||||
|
exit $EXIT_CODE
|
||||||
@@ -119,7 +119,7 @@ AUTH_BACKEND = "jwt"
|
|||||||
HATCHET_CLIENT_TOKEN = "test-dummy-token"
|
HATCHET_CLIENT_TOKEN = "test-dummy-token"
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
|
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v --ignore=tests/integration"
|
||||||
testpaths = ["tests"]
|
testpaths = ["tests"]
|
||||||
asyncio_mode = "auto"
|
asyncio_mode = "auto"
|
||||||
markers = [
|
markers = [
|
||||||
|
|||||||
@@ -307,7 +307,9 @@ async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
|
|||||||
ctx.log(
|
ctx.log(
|
||||||
f"get_recording: calling Daily.co API for recording_id={input.recording_id}..."
|
f"get_recording: calling Daily.co API for recording_id={input.recording_id}..."
|
||||||
)
|
)
|
||||||
async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
|
async with DailyApiClient(
|
||||||
|
api_key=settings.DAILY_API_KEY, base_url=settings.DAILY_API_URL
|
||||||
|
) as client:
|
||||||
recording = await client.get_recording(input.recording_id)
|
recording = await client.get_recording(input.recording_id)
|
||||||
ctx.log(f"get_recording: Daily.co API returned successfully")
|
ctx.log(f"get_recording: Daily.co API returned successfully")
|
||||||
|
|
||||||
@@ -374,7 +376,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
|||||||
settings.DAILY_API_KEY, "DAILY_API_KEY is required"
|
settings.DAILY_API_KEY, "DAILY_API_KEY is required"
|
||||||
)
|
)
|
||||||
|
|
||||||
async with DailyApiClient(api_key=daily_api_key) as client:
|
async with DailyApiClient(
|
||||||
|
api_key=daily_api_key, base_url=settings.DAILY_API_URL
|
||||||
|
) as client:
|
||||||
participants = await client.get_meeting_participants(mtg_session_id)
|
participants = await client.get_meeting_participants(mtg_session_id)
|
||||||
|
|
||||||
id_to_name = {}
|
id_to_name = {}
|
||||||
|
|||||||
@@ -180,6 +180,7 @@ class Settings(BaseSettings):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Daily.co integration
|
# Daily.co integration
|
||||||
|
DAILY_API_URL: str = "https://api.daily.co/v1"
|
||||||
DAILY_API_KEY: str | None = None
|
DAILY_API_KEY: str | None = None
|
||||||
DAILY_WEBHOOK_SECRET: str | None = None
|
DAILY_WEBHOOK_SECRET: str | None = None
|
||||||
DAILY_SUBDOMAIN: str | None = None
|
DAILY_SUBDOMAIN: str | None = None
|
||||||
|
|||||||
218
server/tests/docker-compose.integration.yml
Normal file
218
server/tests/docker-compose.integration.yml
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
# Integration test stack — full pipeline end-to-end.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# docker compose -f server/tests/docker-compose.integration.yml up -d --build
|
||||||
|
#
|
||||||
|
# Requires .env.integration in the repo root (generated by CI workflow).
|
||||||
|
|
||||||
|
x-backend-env: &backend-env
|
||||||
|
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||||
|
REDIS_HOST: redis
|
||||||
|
CELERY_BROKER_URL: redis://redis:6379/1
|
||||||
|
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
||||||
|
HATCHET_CLIENT_TOKEN: ${HATCHET_CLIENT_TOKEN:-}
|
||||||
|
HATCHET_CLIENT_SERVER_URL: http://hatchet:8888
|
||||||
|
HATCHET_CLIENT_HOST_PORT: hatchet:7077
|
||||||
|
HATCHET_CLIENT_TLS_STRATEGY: none
|
||||||
|
# ML backends — CPU-only, no external services
|
||||||
|
TRANSCRIPT_BACKEND: whisper
|
||||||
|
WHISPER_CHUNK_MODEL: tiny
|
||||||
|
WHISPER_FILE_MODEL: tiny
|
||||||
|
DIARIZATION_BACKEND: pyannote
|
||||||
|
TRANSLATION_BACKEND: passthrough
|
||||||
|
# Storage — local Garage S3
|
||||||
|
TRANSCRIPT_STORAGE_BACKEND: aws
|
||||||
|
TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL: http://garage:3900
|
||||||
|
TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: reflector-media
|
||||||
|
TRANSCRIPT_STORAGE_AWS_REGION: garage
|
||||||
|
# Daily mock
|
||||||
|
DAILY_API_URL: http://mock-daily:8080/v1
|
||||||
|
DAILY_API_KEY: fake-daily-key
|
||||||
|
# Auth
|
||||||
|
PUBLIC_MODE: "true"
|
||||||
|
AUTH_BACKEND: none
|
||||||
|
# LLM (injected from CI)
|
||||||
|
LLM_URL: ${LLM_URL:-}
|
||||||
|
LLM_API_KEY: ${LLM_API_KEY:-}
|
||||||
|
LLM_MODEL: ${LLM_MODEL:-gpt-4o-mini}
|
||||||
|
# HuggingFace (for pyannote gated models)
|
||||||
|
HF_TOKEN: ${HF_TOKEN:-}
|
||||||
|
# Garage S3 credentials — hardcoded test keys, containers are ephemeral
|
||||||
|
TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: GK0123456789abcdef01234567 # gitleaks:allow
|
||||||
|
TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
|
||||||
|
# NOTE: DAILYCO_STORAGE_AWS_* intentionally NOT set — forces fallback to
|
||||||
|
# get_transcripts_storage() which has ENDPOINT_URL pointing at Garage.
|
||||||
|
# Setting them would bypass the endpoint and generate presigned URLs for AWS.
|
||||||
|
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:17-alpine
|
||||||
|
command: ["postgres", "-c", "max_connections=200"]
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: reflector
|
||||||
|
POSTGRES_PASSWORD: reflector
|
||||||
|
POSTGRES_DB: reflector
|
||||||
|
volumes:
|
||||||
|
- ../../server/docker/init-hatchet-db.sql:/docker-entrypoint-initdb.d/init-hatchet-db.sql:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U reflector"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7.2-alpine
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
hatchet:
|
||||||
|
image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: "postgresql://reflector:reflector@postgres:5432/hatchet?sslmode=disable&connect_timeout=30"
|
||||||
|
SERVER_AUTH_COOKIE_INSECURE: "t"
|
||||||
|
SERVER_AUTH_COOKIE_DOMAIN: "localhost"
|
||||||
|
SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
|
||||||
|
SERVER_GRPC_INSECURE: "t"
|
||||||
|
SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||||
|
SERVER_GRPC_PORT: "7077"
|
||||||
|
SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
|
||||||
|
SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 15
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
garage:
|
||||||
|
image: dxflrs/garage:v1.1.0
|
||||||
|
volumes:
|
||||||
|
- ../../data/garage.toml:/etc/garage.toml:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "/garage", "stats"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 10
|
||||||
|
start_period: 5s
|
||||||
|
|
||||||
|
mock-daily:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: integration/Dockerfile.mock-daily
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/recordings/test')"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
server:
|
||||||
|
build:
|
||||||
|
context: ../../server
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *backend-env
|
||||||
|
ENTRYPOINT: server
|
||||||
|
WEBRTC_HOST: server
|
||||||
|
WEBRTC_PORT_RANGE: "52000-52100"
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
hatchet:
|
||||||
|
condition: service_healthy
|
||||||
|
garage:
|
||||||
|
condition: service_healthy
|
||||||
|
mock-daily:
|
||||||
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- server_data:/app/data
|
||||||
|
|
||||||
|
worker:
|
||||||
|
build:
|
||||||
|
context: ../../server
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *backend-env
|
||||||
|
ENTRYPOINT: worker
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- server_data:/app/data
|
||||||
|
|
||||||
|
hatchet-worker-cpu:
|
||||||
|
build:
|
||||||
|
context: ../../server
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *backend-env
|
||||||
|
ENTRYPOINT: hatchet-worker-cpu
|
||||||
|
depends_on:
|
||||||
|
hatchet:
|
||||||
|
condition: service_healthy
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- server_data:/app/data
|
||||||
|
|
||||||
|
hatchet-worker-llm:
|
||||||
|
build:
|
||||||
|
context: ../../server
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *backend-env
|
||||||
|
ENTRYPOINT: hatchet-worker-llm
|
||||||
|
depends_on:
|
||||||
|
hatchet:
|
||||||
|
condition: service_healthy
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- server_data:/app/data
|
||||||
|
|
||||||
|
test-runner:
|
||||||
|
build:
|
||||||
|
context: ../../server
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *backend-env
|
||||||
|
# Override DATABASE_URL for sync driver (used by direct DB access in tests)
|
||||||
|
DATABASE_URL_ASYNC: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||||
|
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||||
|
SERVER_URL: http://server:1250
|
||||||
|
GARAGE_ENDPOINT: http://garage:3900
|
||||||
|
depends_on:
|
||||||
|
server:
|
||||||
|
condition: service_started
|
||||||
|
worker:
|
||||||
|
condition: service_started
|
||||||
|
hatchet-worker-cpu:
|
||||||
|
condition: service_started
|
||||||
|
hatchet-worker-llm:
|
||||||
|
condition: service_started
|
||||||
|
volumes:
|
||||||
|
- server_data:/app/data
|
||||||
|
# Mount test files into the container
|
||||||
|
- ./records:/app/tests/records:ro
|
||||||
|
- ./integration:/app/tests/integration:ro
|
||||||
|
entrypoint: ["sleep", "infinity"]
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
server_data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
attachable: true
|
||||||
9
server/tests/integration/Dockerfile.mock-daily
Normal file
9
server/tests/integration/Dockerfile.mock-daily
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir fastapi uvicorn[standard]
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY integration/mock_daily_server.py /app/mock_daily_server.py
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
CMD ["uvicorn", "mock_daily_server:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||||
0
server/tests/integration/__init__.py
Normal file
0
server/tests/integration/__init__.py
Normal file
116
server/tests/integration/conftest.py
Normal file
116
server/tests/integration/conftest.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
"""
|
||||||
|
Integration test fixtures — no mocks, real services.
|
||||||
|
|
||||||
|
All services (PostgreSQL, Redis, Hatchet, Garage, server, workers) are
|
||||||
|
expected to be running via docker-compose.integration.yml.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import boto3
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
from sqlalchemy.ext.asyncio import create_async_engine
|
||||||
|
|
||||||
|
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
|
||||||
|
GARAGE_ENDPOINT = os.environ.get("GARAGE_ENDPOINT", "http://garage:3900")
|
||||||
|
DATABASE_URL = os.environ.get(
|
||||||
|
"DATABASE_URL_ASYNC",
|
||||||
|
os.environ.get(
|
||||||
|
"DATABASE_URL",
|
||||||
|
"postgresql+asyncpg://reflector:reflector@postgres:5432/reflector",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
GARAGE_KEY_ID = os.environ.get("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID", "")
|
||||||
|
GARAGE_KEY_SECRET = os.environ.get("TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY", "")
|
||||||
|
BUCKET_NAME = "reflector-media"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def api_client():
|
||||||
|
"""HTTP client pointed at the running server."""
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
base_url=f"{SERVER_URL}/v1",
|
||||||
|
timeout=httpx.Timeout(30.0),
|
||||||
|
) as client:
|
||||||
|
yield client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def s3_client():
|
||||||
|
"""Boto3 S3 client pointed at Garage."""
|
||||||
|
return boto3.client(
|
||||||
|
"s3",
|
||||||
|
endpoint_url=GARAGE_ENDPOINT,
|
||||||
|
aws_access_key_id=GARAGE_KEY_ID,
|
||||||
|
aws_secret_access_key=GARAGE_KEY_SECRET,
|
||||||
|
region_name="garage",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def db_engine():
|
||||||
|
"""SQLAlchemy async engine for direct DB operations."""
|
||||||
|
engine = create_async_engine(DATABASE_URL)
|
||||||
|
yield engine
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def test_records_dir():
|
||||||
|
"""Path to the test audio files directory."""
|
||||||
|
return Path(__file__).parent.parent / "records"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def bucket_name():
|
||||||
|
"""S3 bucket name used for integration tests."""
|
||||||
|
return BUCKET_NAME
|
||||||
|
|
||||||
|
|
||||||
|
async def _poll_transcript_status(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
transcript_id: str,
|
||||||
|
target: str | tuple[str, ...],
|
||||||
|
error: str = "error",
|
||||||
|
max_wait: int = 300,
|
||||||
|
interval: int = 3,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Poll GET /transcripts/{id} until status matches target or error.
|
||||||
|
|
||||||
|
target can be a single status string or a tuple of acceptable statuses.
|
||||||
|
Returns the transcript dict on success, raises on timeout or error status.
|
||||||
|
"""
|
||||||
|
targets = (target,) if isinstance(target, str) else target
|
||||||
|
elapsed = 0
|
||||||
|
status = None
|
||||||
|
while elapsed < max_wait:
|
||||||
|
resp = await client.get(f"/transcripts/{transcript_id}")
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
status = data.get("status")
|
||||||
|
|
||||||
|
if status in targets:
|
||||||
|
return data
|
||||||
|
if status == error:
|
||||||
|
raise AssertionError(
|
||||||
|
f"Transcript {transcript_id} reached error status: {data}"
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(interval)
|
||||||
|
elapsed += interval
|
||||||
|
|
||||||
|
raise TimeoutError(
|
||||||
|
f"Transcript {transcript_id} did not reach status '{target}' "
|
||||||
|
f"within {max_wait}s (last status: {status})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
def poll_transcript_status():
|
||||||
|
"""Returns the poll_transcript_status async helper function."""
|
||||||
|
return _poll_transcript_status
|
||||||
62
server/tests/integration/garage_setup.sh
Executable file
62
server/tests/integration/garage_setup.sh
Executable file
@@ -0,0 +1,62 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Initialize Garage bucket and keys for integration tests.
|
||||||
|
# Run inside the Garage container after it's healthy.
|
||||||
|
#
|
||||||
|
# Outputs KEY_ID and KEY_SECRET to stdout (last two lines).
|
||||||
|
#
|
||||||
|
# Note: uses /bin/sh (not bash) since the Garage container is minimal.
|
||||||
|
#
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
echo "Waiting for Garage to be ready..."
|
||||||
|
i=0
|
||||||
|
while [ "$i" -lt 30 ]; do
|
||||||
|
if /garage stats >/dev/null 2>&1; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
i=$((i + 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
# Layout setup
|
||||||
|
NODE_ID=$(/garage node id -q | tr -d '[:space:]')
|
||||||
|
LAYOUT_STATUS=$(/garage layout show 2>&1 || true)
|
||||||
|
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
|
||||||
|
/garage layout assign "$NODE_ID" -c 1G -z dc1
|
||||||
|
/garage layout apply --version 1
|
||||||
|
echo "Layout applied."
|
||||||
|
else
|
||||||
|
echo "Layout already configured."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Bucket
|
||||||
|
if ! /garage bucket info reflector-media >/dev/null 2>&1; then
|
||||||
|
/garage bucket create reflector-media
|
||||||
|
echo "Bucket 'reflector-media' created."
|
||||||
|
else
|
||||||
|
echo "Bucket 'reflector-media' already exists."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Key
|
||||||
|
if /garage key info reflector-test >/dev/null 2>&1; then
|
||||||
|
echo "Key 'reflector-test' already exists."
|
||||||
|
KEY_OUTPUT=$(/garage key info reflector-test 2>&1)
|
||||||
|
else
|
||||||
|
KEY_OUTPUT=$(/garage key create reflector-test 2>&1)
|
||||||
|
echo "Key 'reflector-test' created."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Permissions
|
||||||
|
/garage bucket allow reflector-media --read --write --key reflector-test
|
||||||
|
|
||||||
|
# Extract key ID and secret from output using POSIX-compatible parsing
|
||||||
|
# garage key output format:
|
||||||
|
# Key name: reflector-test
|
||||||
|
# Key ID: GK...
|
||||||
|
# Secret key: ...
|
||||||
|
KEY_ID=$(echo "$KEY_OUTPUT" | grep "Key ID" | sed 's/.*Key ID: *//')
|
||||||
|
KEY_SECRET=$(echo "$KEY_OUTPUT" | grep "Secret key" | sed 's/.*Secret key: *//')
|
||||||
|
|
||||||
|
echo "GARAGE_KEY_ID=${KEY_ID}"
|
||||||
|
echo "GARAGE_KEY_SECRET=${KEY_SECRET}"
|
||||||
75
server/tests/integration/mock_daily_server.py
Normal file
75
server/tests/integration/mock_daily_server.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
"""
|
||||||
|
Minimal FastAPI mock for Daily.co API.
|
||||||
|
|
||||||
|
Serves canned responses for:
|
||||||
|
- GET /v1/recordings/{recording_id}
|
||||||
|
- GET /v1/meetings/{meeting_id}/participants
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
|
||||||
|
app = FastAPI(title="Mock Daily API")
|
||||||
|
|
||||||
|
|
||||||
|
# Participant UUIDs must be 36-char hex UUIDs to match Daily's filename format
|
||||||
|
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||||
|
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||||
|
|
||||||
|
# Daily-format track keys: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
|
||||||
|
TRACK_KEYS = [
|
||||||
|
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
|
||||||
|
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v1/recordings/{recording_id}")
|
||||||
|
async def get_recording(recording_id: str):
|
||||||
|
return {
|
||||||
|
"id": recording_id,
|
||||||
|
"room_name": "integration-test-room",
|
||||||
|
"start_ts": 1700000000,
|
||||||
|
"type": "raw-tracks",
|
||||||
|
"status": "finished",
|
||||||
|
"max_participants": 2,
|
||||||
|
"duration": 5,
|
||||||
|
"share_token": None,
|
||||||
|
"s3": {
|
||||||
|
"bucket_name": "reflector-media",
|
||||||
|
"bucket_region": "garage",
|
||||||
|
"key": None,
|
||||||
|
"endpoint": None,
|
||||||
|
},
|
||||||
|
"s3key": None,
|
||||||
|
"tracks": [
|
||||||
|
{"type": "audio", "s3Key": key, "size": 100000} for key in TRACK_KEYS
|
||||||
|
],
|
||||||
|
"mtgSessionId": "mock-mtg-session-id",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v1/meetings/{meeting_id}/participants")
|
||||||
|
async def get_meeting_participants(meeting_id: str):
|
||||||
|
return {
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"user_id": "user-a",
|
||||||
|
"participant_id": PARTICIPANT_A_ID,
|
||||||
|
"user_name": "Speaker A",
|
||||||
|
"join_time": 1700000000,
|
||||||
|
"duration": 300,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_id": "user-b",
|
||||||
|
"participant_id": PARTICIPANT_B_ID,
|
||||||
|
"user_name": "Speaker B",
|
||||||
|
"join_time": 1700000010,
|
||||||
|
"duration": 290,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8080)
|
||||||
61
server/tests/integration/test_file_pipeline.py
Normal file
61
server/tests/integration/test_file_pipeline.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""
|
||||||
|
Integration test: File upload → FilePipeline → full processing.
|
||||||
|
|
||||||
|
Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
|
||||||
|
pyannote diarization → LLM summarization/topics → status "ended".
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_file_pipeline_end_to_end(
|
||||||
|
api_client, test_records_dir, poll_transcript_status
|
||||||
|
):
|
||||||
|
"""Upload a WAV file and verify the full pipeline completes."""
|
||||||
|
# 1. Create transcript
|
||||||
|
resp = await api_client.post(
|
||||||
|
"/transcripts",
|
||||||
|
json={"name": "integration-file-test", "source_kind": "file"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||||
|
transcript = resp.json()
|
||||||
|
transcript_id = transcript["id"]
|
||||||
|
|
||||||
|
# 2. Upload audio file (single chunk)
|
||||||
|
audio_path = test_records_dir / "test_short.wav"
|
||||||
|
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||||
|
|
||||||
|
with open(audio_path, "rb") as f:
|
||||||
|
resp = await api_client.post(
|
||||||
|
f"/transcripts/{transcript_id}/record/upload",
|
||||||
|
params={"chunk_number": 0, "total_chunks": 1},
|
||||||
|
files={"chunk": ("test_short.wav", f, "audio/wav")},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200, f"Upload failed: {resp.text}"
|
||||||
|
|
||||||
|
# 3. Poll until pipeline completes
|
||||||
|
data = await poll_transcript_status(
|
||||||
|
api_client, transcript_id, target="ended", max_wait=300
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Assertions
|
||||||
|
assert data["status"] == "ended"
|
||||||
|
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||||
|
assert (
|
||||||
|
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||||
|
), "Long summary should be non-empty"
|
||||||
|
assert (
|
||||||
|
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||||
|
), "Short summary should be non-empty"
|
||||||
|
|
||||||
|
# Topics are served from a separate endpoint
|
||||||
|
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||||
|
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||||
|
topics = topics_resp.json()
|
||||||
|
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||||
|
for topic in topics:
|
||||||
|
assert topic.get("title"), "Each topic should have a title"
|
||||||
|
assert topic.get("summary"), "Each topic should have a summary"
|
||||||
|
|
||||||
|
assert data.get("duration", 0) > 0, "Duration should be positive"
|
||||||
109
server/tests/integration/test_live_pipeline.py
Normal file
109
server/tests/integration/test_live_pipeline.py
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
"""
|
||||||
|
Integration test: WebRTC stream → LivePostProcessingPipeline → full processing.
|
||||||
|
|
||||||
|
Exercises: WebRTC SDP exchange → live audio streaming → connection close →
|
||||||
|
Hatchet LivePostPipeline → whisper transcription → LLM summarization/topics → status "ended".
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
from aiortc import RTCPeerConnection, RTCSessionDescription
|
||||||
|
from aiortc.contrib.media import MediaPlayer
|
||||||
|
|
||||||
|
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_live_pipeline_end_to_end(
|
||||||
|
api_client, test_records_dir, poll_transcript_status
|
||||||
|
):
|
||||||
|
"""Stream audio via WebRTC and verify the full post-processing pipeline completes."""
|
||||||
|
# 1. Create transcript
|
||||||
|
resp = await api_client.post(
|
||||||
|
"/transcripts",
|
||||||
|
json={"name": "integration-live-test"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||||
|
transcript = resp.json()
|
||||||
|
transcript_id = transcript["id"]
|
||||||
|
|
||||||
|
# 2. Set up WebRTC peer connection with audio from test file
|
||||||
|
audio_path = test_records_dir / "test_short.wav"
|
||||||
|
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||||
|
|
||||||
|
pc = RTCPeerConnection()
|
||||||
|
player = MediaPlayer(audio_path.as_posix())
|
||||||
|
|
||||||
|
# Add audio track
|
||||||
|
audio_track = player.audio
|
||||||
|
pc.addTrack(audio_track)
|
||||||
|
|
||||||
|
# Create data channel (server expects this for STOP command)
|
||||||
|
channel = pc.createDataChannel("data-channel")
|
||||||
|
|
||||||
|
# 3. Generate SDP offer
|
||||||
|
offer = await pc.createOffer()
|
||||||
|
await pc.setLocalDescription(offer)
|
||||||
|
|
||||||
|
sdp_payload = {
|
||||||
|
"sdp": pc.localDescription.sdp,
|
||||||
|
"type": pc.localDescription.type,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 4. Send offer to server and get answer
|
||||||
|
webrtc_url = f"{SERVER_URL}/v1/transcripts/{transcript_id}/record/webrtc"
|
||||||
|
async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
|
||||||
|
resp = await client.post(webrtc_url, json=sdp_payload)
|
||||||
|
assert resp.status_code == 200, f"WebRTC offer failed: {resp.text}"
|
||||||
|
|
||||||
|
answer_data = resp.json()
|
||||||
|
answer = RTCSessionDescription(sdp=answer_data["sdp"], type=answer_data["type"])
|
||||||
|
await pc.setRemoteDescription(answer)
|
||||||
|
|
||||||
|
# 5. Wait for audio playback to finish
|
||||||
|
max_stream_wait = 60
|
||||||
|
elapsed = 0
|
||||||
|
while elapsed < max_stream_wait:
|
||||||
|
if audio_track.readyState == "ended":
|
||||||
|
break
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
elapsed += 0.5
|
||||||
|
|
||||||
|
# 6. Send STOP command and close connection
|
||||||
|
try:
|
||||||
|
channel.send(json.dumps({"cmd": "STOP"}))
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
except Exception:
|
||||||
|
pass # Channel may not be open if track ended quickly
|
||||||
|
|
||||||
|
await pc.close()
|
||||||
|
|
||||||
|
# 7. Poll until post-processing pipeline completes
|
||||||
|
data = await poll_transcript_status(
|
||||||
|
api_client, transcript_id, target="ended", max_wait=300
|
||||||
|
)
|
||||||
|
|
||||||
|
# 8. Assertions
|
||||||
|
assert data["status"] == "ended"
|
||||||
|
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||||
|
assert (
|
||||||
|
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||||
|
), "Long summary should be non-empty"
|
||||||
|
assert (
|
||||||
|
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||||
|
), "Short summary should be non-empty"
|
||||||
|
|
||||||
|
# Topics are served from a separate endpoint
|
||||||
|
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||||
|
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||||
|
topics = topics_resp.json()
|
||||||
|
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||||
|
for topic in topics:
|
||||||
|
assert topic.get("title"), "Each topic should have a title"
|
||||||
|
assert topic.get("summary"), "Each topic should have a summary"
|
||||||
|
|
||||||
|
assert data.get("duration", 0) > 0, "Duration should be positive"
|
||||||
129
server/tests/integration/test_multitrack_pipeline.py
Normal file
129
server/tests/integration/test_multitrack_pipeline.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
"""
|
||||||
|
Integration test: Multitrack → DailyMultitrackPipeline → full processing.
|
||||||
|
|
||||||
|
Exercises: S3 upload → DB recording setup → process endpoint →
|
||||||
|
Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
|
||||||
|
diarization → mixdown → LLM summarization/topics → status "ended".
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
# Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
|
||||||
|
# These UUIDs must match mock_daily_server.py participant IDs
|
||||||
|
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||||
|
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||||
|
TRACK_KEYS = [
|
||||||
|
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
|
||||||
|
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multitrack_pipeline_end_to_end(
|
||||||
|
api_client,
|
||||||
|
s3_client,
|
||||||
|
db_engine,
|
||||||
|
test_records_dir,
|
||||||
|
bucket_name,
|
||||||
|
poll_transcript_status,
|
||||||
|
):
|
||||||
|
"""Set up multitrack recording in S3/DB and verify the full pipeline completes."""
|
||||||
|
# 1. Upload test audio as two separate tracks to Garage S3
|
||||||
|
audio_path = test_records_dir / "test_short.wav"
|
||||||
|
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||||
|
|
||||||
|
for track_key in TRACK_KEYS:
|
||||||
|
s3_client.upload_file(
|
||||||
|
str(audio_path),
|
||||||
|
bucket_name,
|
||||||
|
track_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Create transcript via API
|
||||||
|
resp = await api_client.post(
|
||||||
|
"/transcripts",
|
||||||
|
json={"name": "integration-multitrack-test"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||||
|
transcript = resp.json()
|
||||||
|
transcript_id = transcript["id"]
|
||||||
|
|
||||||
|
# 3. Insert Recording row and link to transcript via direct DB access
|
||||||
|
recording_id = f"rec-integration-{transcript_id[:8]}"
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
async with db_engine.begin() as conn:
|
||||||
|
# Insert recording with track_keys
|
||||||
|
await conn.execute(
|
||||||
|
text("""
|
||||||
|
INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys)
|
||||||
|
VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json))
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"id": recording_id,
|
||||||
|
"bucket_name": bucket_name,
|
||||||
|
"object_key": TRACK_KEYS[0],
|
||||||
|
"recorded_at": now,
|
||||||
|
"status": "completed",
|
||||||
|
"track_keys": json.dumps(TRACK_KEYS),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Link recording to transcript and set status to uploaded
|
||||||
|
await conn.execute(
|
||||||
|
text("""
|
||||||
|
UPDATE transcript
|
||||||
|
SET recording_id = :recording_id, status = 'uploaded'
|
||||||
|
WHERE id = :transcript_id
|
||||||
|
"""),
|
||||||
|
{
|
||||||
|
"recording_id": recording_id,
|
||||||
|
"transcript_id": transcript_id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Trigger processing via process endpoint
|
||||||
|
resp = await api_client.post(f"/transcripts/{transcript_id}/process")
|
||||||
|
assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
|
||||||
|
|
||||||
|
# 5. Poll until pipeline completes
|
||||||
|
# The pipeline will call mock-daily for get_recording and get_participants
|
||||||
|
# Accept "error" too — non-critical steps like action_items may fail due to
|
||||||
|
# LLM parsing flakiness while core results (transcript, summaries) still exist.
|
||||||
|
data = await poll_transcript_status(
|
||||||
|
api_client, transcript_id, target=("ended", "error"), max_wait=300
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. Assertions — verify core pipeline results regardless of final status
|
||||||
|
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||||
|
assert (
|
||||||
|
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||||
|
), "Long summary should be non-empty"
|
||||||
|
assert (
|
||||||
|
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||||
|
), "Short summary should be non-empty"
|
||||||
|
|
||||||
|
# Topics are served from a separate endpoint
|
||||||
|
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||||
|
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||||
|
topics = topics_resp.json()
|
||||||
|
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||||
|
for topic in topics:
|
||||||
|
assert topic.get("title"), "Each topic should have a title"
|
||||||
|
assert topic.get("summary"), "Each topic should have a summary"
|
||||||
|
|
||||||
|
# Participants are served from a separate endpoint
|
||||||
|
participants_resp = await api_client.get(
|
||||||
|
f"/transcripts/{transcript_id}/participants"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
participants_resp.status_code == 200
|
||||||
|
), f"Failed to get participants: {participants_resp.text}"
|
||||||
|
participants = participants_resp.json()
|
||||||
|
assert (
|
||||||
|
len(participants) >= 2
|
||||||
|
), f"Expected at least 2 speakers for multitrack, got {len(participants)}"
|
||||||
Reference in New Issue
Block a user