test: full integration tests (#916)

* test: full integration tests

* fix: add env vars as secrets in CI
This commit is contained in:
Juan Diego García
2026-03-18 15:29:21 -05:00
committed by GitHub
parent a9200d35bf
commit 9a2f973a2e
16 changed files with 1098 additions and 3 deletions

View File

@@ -119,7 +119,7 @@ AUTH_BACKEND = "jwt"
HATCHET_CLIENT_TOKEN = "test-dummy-token"
[tool.pytest.ini_options]
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v --ignore=tests/integration"
testpaths = ["tests"]
asyncio_mode = "auto"
markers = [

View File

@@ -307,7 +307,9 @@ async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
ctx.log(
f"get_recording: calling Daily.co API for recording_id={input.recording_id}..."
)
async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
async with DailyApiClient(
api_key=settings.DAILY_API_KEY, base_url=settings.DAILY_API_URL
) as client:
recording = await client.get_recording(input.recording_id)
ctx.log(f"get_recording: Daily.co API returned successfully")
@@ -374,7 +376,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
settings.DAILY_API_KEY, "DAILY_API_KEY is required"
)
async with DailyApiClient(api_key=daily_api_key) as client:
async with DailyApiClient(
api_key=daily_api_key, base_url=settings.DAILY_API_URL
) as client:
participants = await client.get_meeting_participants(mtg_session_id)
id_to_name = {}

View File

@@ -180,6 +180,7 @@ class Settings(BaseSettings):
)
# Daily.co integration
DAILY_API_URL: str = "https://api.daily.co/v1"
DAILY_API_KEY: str | None = None
DAILY_WEBHOOK_SECRET: str | None = None
DAILY_SUBDOMAIN: str | None = None

View File

@@ -0,0 +1,218 @@
# Integration test stack — full pipeline end-to-end.
#
# Usage:
# docker compose -f server/tests/docker-compose.integration.yml up -d --build
#
# Requires .env.integration in the repo root (generated by CI workflow).
x-backend-env: &backend-env
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
REDIS_HOST: redis
CELERY_BROKER_URL: redis://redis:6379/1
CELERY_RESULT_BACKEND: redis://redis:6379/1
HATCHET_CLIENT_TOKEN: ${HATCHET_CLIENT_TOKEN:-}
HATCHET_CLIENT_SERVER_URL: http://hatchet:8888
HATCHET_CLIENT_HOST_PORT: hatchet:7077
HATCHET_CLIENT_TLS_STRATEGY: none
# ML backends — CPU-only, no external services
TRANSCRIPT_BACKEND: whisper
WHISPER_CHUNK_MODEL: tiny
WHISPER_FILE_MODEL: tiny
DIARIZATION_BACKEND: pyannote
TRANSLATION_BACKEND: passthrough
# Storage — local Garage S3
TRANSCRIPT_STORAGE_BACKEND: aws
TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL: http://garage:3900
TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: reflector-media
TRANSCRIPT_STORAGE_AWS_REGION: garage
# Daily mock
DAILY_API_URL: http://mock-daily:8080/v1
DAILY_API_KEY: fake-daily-key
# Auth
PUBLIC_MODE: "true"
AUTH_BACKEND: none
# LLM (injected from CI)
LLM_URL: ${LLM_URL:-}
LLM_API_KEY: ${LLM_API_KEY:-}
LLM_MODEL: ${LLM_MODEL:-gpt-4o-mini}
# HuggingFace (for pyannote gated models)
HF_TOKEN: ${HF_TOKEN:-}
# Garage S3 credentials — hardcoded test keys, containers are ephemeral
TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: GK0123456789abcdef01234567 # gitleaks:allow
TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
# NOTE: DAILYCO_STORAGE_AWS_* intentionally NOT set — forces fallback to
# get_transcripts_storage() which has ENDPOINT_URL pointing at Garage.
# Setting them would bypass the endpoint and generate presigned URLs for AWS.
services:
postgres:
image: postgres:17-alpine
command: ["postgres", "-c", "max_connections=200"]
environment:
POSTGRES_USER: reflector
POSTGRES_PASSWORD: reflector
POSTGRES_DB: reflector
volumes:
- ../../server/docker/init-hatchet-db.sql:/docker-entrypoint-initdb.d/init-hatchet-db.sql:ro
healthcheck:
test: ["CMD-SHELL", "pg_isready -U reflector"]
interval: 5s
timeout: 3s
retries: 10
redis:
image: redis:7.2-alpine
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 3s
retries: 5
hatchet:
image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
depends_on:
postgres:
condition: service_healthy
environment:
DATABASE_URL: "postgresql://reflector:reflector@postgres:5432/hatchet?sslmode=disable&connect_timeout=30"
SERVER_AUTH_COOKIE_INSECURE: "t"
SERVER_AUTH_COOKIE_DOMAIN: "localhost"
SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
SERVER_GRPC_INSECURE: "t"
SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
SERVER_GRPC_PORT: "7077"
SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
interval: 10s
timeout: 5s
retries: 15
start_period: 30s
garage:
image: dxflrs/garage:v1.1.0
volumes:
- ../../data/garage.toml:/etc/garage.toml:ro
healthcheck:
test: ["CMD", "/garage", "stats"]
interval: 5s
timeout: 3s
retries: 10
start_period: 5s
mock-daily:
build:
context: .
dockerfile: integration/Dockerfile.mock-daily
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/recordings/test')"]
interval: 5s
timeout: 3s
retries: 5
server:
build:
context: ../../server
dockerfile: Dockerfile
environment:
<<: *backend-env
ENTRYPOINT: server
WEBRTC_HOST: server
WEBRTC_PORT_RANGE: "52000-52100"
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
hatchet:
condition: service_healthy
garage:
condition: service_healthy
mock-daily:
condition: service_healthy
volumes:
- server_data:/app/data
worker:
build:
context: ../../server
dockerfile: Dockerfile
environment:
<<: *backend-env
ENTRYPOINT: worker
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- server_data:/app/data
hatchet-worker-cpu:
build:
context: ../../server
dockerfile: Dockerfile
environment:
<<: *backend-env
ENTRYPOINT: hatchet-worker-cpu
depends_on:
hatchet:
condition: service_healthy
postgres:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- server_data:/app/data
hatchet-worker-llm:
build:
context: ../../server
dockerfile: Dockerfile
environment:
<<: *backend-env
ENTRYPOINT: hatchet-worker-llm
depends_on:
hatchet:
condition: service_healthy
postgres:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- server_data:/app/data
test-runner:
build:
context: ../../server
dockerfile: Dockerfile
environment:
<<: *backend-env
# Override DATABASE_URL for sync driver (used by direct DB access in tests)
DATABASE_URL_ASYNC: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
SERVER_URL: http://server:1250
GARAGE_ENDPOINT: http://garage:3900
depends_on:
server:
condition: service_started
worker:
condition: service_started
hatchet-worker-cpu:
condition: service_started
hatchet-worker-llm:
condition: service_started
volumes:
- server_data:/app/data
# Mount test files into the container
- ./records:/app/tests/records:ro
- ./integration:/app/tests/integration:ro
entrypoint: ["sleep", "infinity"]
volumes:
server_data:
networks:
default:
attachable: true

View File

@@ -0,0 +1,9 @@
FROM python:3.12-slim
RUN pip install --no-cache-dir fastapi uvicorn[standard]
WORKDIR /app
COPY integration/mock_daily_server.py /app/mock_daily_server.py
EXPOSE 8080
CMD ["uvicorn", "mock_daily_server:app", "--host", "0.0.0.0", "--port", "8080"]

View File

View File

@@ -0,0 +1,116 @@
"""
Integration test fixtures — no mocks, real services.
All services (PostgreSQL, Redis, Hatchet, Garage, server, workers) are
expected to be running via docker-compose.integration.yml.
"""
import asyncio
import os
from pathlib import Path
import boto3
import httpx
import pytest
import pytest_asyncio
from sqlalchemy.ext.asyncio import create_async_engine
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
GARAGE_ENDPOINT = os.environ.get("GARAGE_ENDPOINT", "http://garage:3900")
DATABASE_URL = os.environ.get(
"DATABASE_URL_ASYNC",
os.environ.get(
"DATABASE_URL",
"postgresql+asyncpg://reflector:reflector@postgres:5432/reflector",
),
)
GARAGE_KEY_ID = os.environ.get("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID", "")
GARAGE_KEY_SECRET = os.environ.get("TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY", "")
BUCKET_NAME = "reflector-media"
@pytest_asyncio.fixture
async def api_client():
"""HTTP client pointed at the running server."""
async with httpx.AsyncClient(
base_url=f"{SERVER_URL}/v1",
timeout=httpx.Timeout(30.0),
) as client:
yield client
@pytest.fixture(scope="session")
def s3_client():
"""Boto3 S3 client pointed at Garage."""
return boto3.client(
"s3",
endpoint_url=GARAGE_ENDPOINT,
aws_access_key_id=GARAGE_KEY_ID,
aws_secret_access_key=GARAGE_KEY_SECRET,
region_name="garage",
)
@pytest_asyncio.fixture
async def db_engine():
"""SQLAlchemy async engine for direct DB operations."""
engine = create_async_engine(DATABASE_URL)
yield engine
await engine.dispose()
@pytest.fixture(scope="session")
def test_records_dir():
"""Path to the test audio files directory."""
return Path(__file__).parent.parent / "records"
@pytest.fixture(scope="session")
def bucket_name():
"""S3 bucket name used for integration tests."""
return BUCKET_NAME
async def _poll_transcript_status(
client: httpx.AsyncClient,
transcript_id: str,
target: str | tuple[str, ...],
error: str = "error",
max_wait: int = 300,
interval: int = 3,
) -> dict:
"""
Poll GET /transcripts/{id} until status matches target or error.
target can be a single status string or a tuple of acceptable statuses.
Returns the transcript dict on success, raises on timeout or error status.
"""
targets = (target,) if isinstance(target, str) else target
elapsed = 0
status = None
while elapsed < max_wait:
resp = await client.get(f"/transcripts/{transcript_id}")
resp.raise_for_status()
data = resp.json()
status = data.get("status")
if status in targets:
return data
if status == error:
raise AssertionError(
f"Transcript {transcript_id} reached error status: {data}"
)
await asyncio.sleep(interval)
elapsed += interval
raise TimeoutError(
f"Transcript {transcript_id} did not reach status '{target}' "
f"within {max_wait}s (last status: {status})"
)
@pytest_asyncio.fixture
def poll_transcript_status():
"""Returns the poll_transcript_status async helper function."""
return _poll_transcript_status

View File

@@ -0,0 +1,62 @@
#!/bin/sh
#
# Initialize Garage bucket and keys for integration tests.
# Run inside the Garage container after it's healthy.
#
# Outputs KEY_ID and KEY_SECRET to stdout (last two lines).
#
# Note: uses /bin/sh (not bash) since the Garage container is minimal.
#
set -eu
echo "Waiting for Garage to be ready..."
i=0
while [ "$i" -lt 30 ]; do
if /garage stats >/dev/null 2>&1; then
break
fi
sleep 1
i=$((i + 1))
done
# Layout setup
NODE_ID=$(/garage node id -q | tr -d '[:space:]')
LAYOUT_STATUS=$(/garage layout show 2>&1 || true)
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
/garage layout assign "$NODE_ID" -c 1G -z dc1
/garage layout apply --version 1
echo "Layout applied."
else
echo "Layout already configured."
fi
# Bucket
if ! /garage bucket info reflector-media >/dev/null 2>&1; then
/garage bucket create reflector-media
echo "Bucket 'reflector-media' created."
else
echo "Bucket 'reflector-media' already exists."
fi
# Key
if /garage key info reflector-test >/dev/null 2>&1; then
echo "Key 'reflector-test' already exists."
KEY_OUTPUT=$(/garage key info reflector-test 2>&1)
else
KEY_OUTPUT=$(/garage key create reflector-test 2>&1)
echo "Key 'reflector-test' created."
fi
# Permissions
/garage bucket allow reflector-media --read --write --key reflector-test
# Extract key ID and secret from output using POSIX-compatible parsing
# garage key output format:
# Key name: reflector-test
# Key ID: GK...
# Secret key: ...
KEY_ID=$(echo "$KEY_OUTPUT" | grep "Key ID" | sed 's/.*Key ID: *//')
KEY_SECRET=$(echo "$KEY_OUTPUT" | grep "Secret key" | sed 's/.*Secret key: *//')
echo "GARAGE_KEY_ID=${KEY_ID}"
echo "GARAGE_KEY_SECRET=${KEY_SECRET}"

View File

@@ -0,0 +1,75 @@
"""
Minimal FastAPI mock for Daily.co API.
Serves canned responses for:
- GET /v1/recordings/{recording_id}
- GET /v1/meetings/{meeting_id}/participants
"""
from fastapi import FastAPI
app = FastAPI(title="Mock Daily API")
# Participant UUIDs must be 36-char hex UUIDs to match Daily's filename format
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
# Daily-format track keys: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
TRACK_KEYS = [
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
]
@app.get("/v1/recordings/{recording_id}")
async def get_recording(recording_id: str):
return {
"id": recording_id,
"room_name": "integration-test-room",
"start_ts": 1700000000,
"type": "raw-tracks",
"status": "finished",
"max_participants": 2,
"duration": 5,
"share_token": None,
"s3": {
"bucket_name": "reflector-media",
"bucket_region": "garage",
"key": None,
"endpoint": None,
},
"s3key": None,
"tracks": [
{"type": "audio", "s3Key": key, "size": 100000} for key in TRACK_KEYS
],
"mtgSessionId": "mock-mtg-session-id",
}
@app.get("/v1/meetings/{meeting_id}/participants")
async def get_meeting_participants(meeting_id: str):
return {
"data": [
{
"user_id": "user-a",
"participant_id": PARTICIPANT_A_ID,
"user_name": "Speaker A",
"join_time": 1700000000,
"duration": 300,
},
{
"user_id": "user-b",
"participant_id": PARTICIPANT_B_ID,
"user_name": "Speaker B",
"join_time": 1700000010,
"duration": 290,
},
]
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8080)

View File

@@ -0,0 +1,61 @@
"""
Integration test: File upload → FilePipeline → full processing.
Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
pyannote diarization → LLM summarization/topics → status "ended".
"""
import pytest
@pytest.mark.asyncio
async def test_file_pipeline_end_to_end(
api_client, test_records_dir, poll_transcript_status
):
"""Upload a WAV file and verify the full pipeline completes."""
# 1. Create transcript
resp = await api_client.post(
"/transcripts",
json={"name": "integration-file-test", "source_kind": "file"},
)
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
transcript = resp.json()
transcript_id = transcript["id"]
# 2. Upload audio file (single chunk)
audio_path = test_records_dir / "test_short.wav"
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
with open(audio_path, "rb") as f:
resp = await api_client.post(
f"/transcripts/{transcript_id}/record/upload",
params={"chunk_number": 0, "total_chunks": 1},
files={"chunk": ("test_short.wav", f, "audio/wav")},
)
assert resp.status_code == 200, f"Upload failed: {resp.text}"
# 3. Poll until pipeline completes
data = await poll_transcript_status(
api_client, transcript_id, target="ended", max_wait=300
)
# 4. Assertions
assert data["status"] == "ended"
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
assert (
data.get("long_summary") and len(data["long_summary"]) > 0
), "Long summary should be non-empty"
assert (
data.get("short_summary") and len(data["short_summary"]) > 0
), "Short summary should be non-empty"
# Topics are served from a separate endpoint
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
topics = topics_resp.json()
assert len(topics) >= 1, "Should have at least 1 topic"
for topic in topics:
assert topic.get("title"), "Each topic should have a title"
assert topic.get("summary"), "Each topic should have a summary"
assert data.get("duration", 0) > 0, "Duration should be positive"

View File

@@ -0,0 +1,109 @@
"""
Integration test: WebRTC stream → LivePostProcessingPipeline → full processing.
Exercises: WebRTC SDP exchange → live audio streaming → connection close →
Hatchet LivePostPipeline → whisper transcription → LLM summarization/topics → status "ended".
"""
import asyncio
import json
import os
import httpx
import pytest
from aiortc import RTCPeerConnection, RTCSessionDescription
from aiortc.contrib.media import MediaPlayer
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
@pytest.mark.asyncio
async def test_live_pipeline_end_to_end(
api_client, test_records_dir, poll_transcript_status
):
"""Stream audio via WebRTC and verify the full post-processing pipeline completes."""
# 1. Create transcript
resp = await api_client.post(
"/transcripts",
json={"name": "integration-live-test"},
)
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
transcript = resp.json()
transcript_id = transcript["id"]
# 2. Set up WebRTC peer connection with audio from test file
audio_path = test_records_dir / "test_short.wav"
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
pc = RTCPeerConnection()
player = MediaPlayer(audio_path.as_posix())
# Add audio track
audio_track = player.audio
pc.addTrack(audio_track)
# Create data channel (server expects this for STOP command)
channel = pc.createDataChannel("data-channel")
# 3. Generate SDP offer
offer = await pc.createOffer()
await pc.setLocalDescription(offer)
sdp_payload = {
"sdp": pc.localDescription.sdp,
"type": pc.localDescription.type,
}
# 4. Send offer to server and get answer
webrtc_url = f"{SERVER_URL}/v1/transcripts/{transcript_id}/record/webrtc"
async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
resp = await client.post(webrtc_url, json=sdp_payload)
assert resp.status_code == 200, f"WebRTC offer failed: {resp.text}"
answer_data = resp.json()
answer = RTCSessionDescription(sdp=answer_data["sdp"], type=answer_data["type"])
await pc.setRemoteDescription(answer)
# 5. Wait for audio playback to finish
max_stream_wait = 60
elapsed = 0
while elapsed < max_stream_wait:
if audio_track.readyState == "ended":
break
await asyncio.sleep(0.5)
elapsed += 0.5
# 6. Send STOP command and close connection
try:
channel.send(json.dumps({"cmd": "STOP"}))
await asyncio.sleep(1)
except Exception:
pass # Channel may not be open if track ended quickly
await pc.close()
# 7. Poll until post-processing pipeline completes
data = await poll_transcript_status(
api_client, transcript_id, target="ended", max_wait=300
)
# 8. Assertions
assert data["status"] == "ended"
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
assert (
data.get("long_summary") and len(data["long_summary"]) > 0
), "Long summary should be non-empty"
assert (
data.get("short_summary") and len(data["short_summary"]) > 0
), "Short summary should be non-empty"
# Topics are served from a separate endpoint
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
topics = topics_resp.json()
assert len(topics) >= 1, "Should have at least 1 topic"
for topic in topics:
assert topic.get("title"), "Each topic should have a title"
assert topic.get("summary"), "Each topic should have a summary"
assert data.get("duration", 0) > 0, "Duration should be positive"

View File

@@ -0,0 +1,129 @@
"""
Integration test: Multitrack → DailyMultitrackPipeline → full processing.
Exercises: S3 upload → DB recording setup → process endpoint →
Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
diarization → mixdown → LLM summarization/topics → status "ended".
"""
import json
from datetime import datetime, timezone
import pytest
from sqlalchemy import text
# Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
# These UUIDs must match mock_daily_server.py participant IDs
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
TRACK_KEYS = [
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
]
@pytest.mark.asyncio
async def test_multitrack_pipeline_end_to_end(
api_client,
s3_client,
db_engine,
test_records_dir,
bucket_name,
poll_transcript_status,
):
"""Set up multitrack recording in S3/DB and verify the full pipeline completes."""
# 1. Upload test audio as two separate tracks to Garage S3
audio_path = test_records_dir / "test_short.wav"
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
for track_key in TRACK_KEYS:
s3_client.upload_file(
str(audio_path),
bucket_name,
track_key,
)
# 2. Create transcript via API
resp = await api_client.post(
"/transcripts",
json={"name": "integration-multitrack-test"},
)
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
transcript = resp.json()
transcript_id = transcript["id"]
# 3. Insert Recording row and link to transcript via direct DB access
recording_id = f"rec-integration-{transcript_id[:8]}"
now = datetime.now(timezone.utc)
async with db_engine.begin() as conn:
# Insert recording with track_keys
await conn.execute(
text("""
INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys)
VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json))
"""),
{
"id": recording_id,
"bucket_name": bucket_name,
"object_key": TRACK_KEYS[0],
"recorded_at": now,
"status": "completed",
"track_keys": json.dumps(TRACK_KEYS),
},
)
# Link recording to transcript and set status to uploaded
await conn.execute(
text("""
UPDATE transcript
SET recording_id = :recording_id, status = 'uploaded'
WHERE id = :transcript_id
"""),
{
"recording_id": recording_id,
"transcript_id": transcript_id,
},
)
# 4. Trigger processing via process endpoint
resp = await api_client.post(f"/transcripts/{transcript_id}/process")
assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
# 5. Poll until pipeline completes
# The pipeline will call mock-daily for get_recording and get_participants
# Accept "error" too — non-critical steps like action_items may fail due to
# LLM parsing flakiness while core results (transcript, summaries) still exist.
data = await poll_transcript_status(
api_client, transcript_id, target=("ended", "error"), max_wait=300
)
# 6. Assertions — verify core pipeline results regardless of final status
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
assert (
data.get("long_summary") and len(data["long_summary"]) > 0
), "Long summary should be non-empty"
assert (
data.get("short_summary") and len(data["short_summary"]) > 0
), "Short summary should be non-empty"
# Topics are served from a separate endpoint
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
topics = topics_resp.json()
assert len(topics) >= 1, "Should have at least 1 topic"
for topic in topics:
assert topic.get("title"), "Each topic should have a title"
assert topic.get("summary"), "Each topic should have a summary"
# Participants are served from a separate endpoint
participants_resp = await api_client.get(
f"/transcripts/{transcript_id}/participants"
)
assert (
participants_resp.status_code == 200
), f"Failed to get participants: {participants_resp.text}"
participants = participants_resp.json()
assert (
len(participants) >= 2
), f"Expected at least 2 speakers for multitrack, got {len(participants)}"