mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-21 22:56:47 +00:00
test: full integration tests (#916)
* test: full integration tests * fix: add env vars as secrets in CI
This commit is contained in:
committed by
GitHub
parent
a9200d35bf
commit
9a2f973a2e
@@ -119,7 +119,7 @@ AUTH_BACKEND = "jwt"
|
||||
HATCHET_CLIENT_TOKEN = "test-dummy-token"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
|
||||
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v --ignore=tests/integration"
|
||||
testpaths = ["tests"]
|
||||
asyncio_mode = "auto"
|
||||
markers = [
|
||||
|
||||
@@ -307,7 +307,9 @@ async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
|
||||
ctx.log(
|
||||
f"get_recording: calling Daily.co API for recording_id={input.recording_id}..."
|
||||
)
|
||||
async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
|
||||
async with DailyApiClient(
|
||||
api_key=settings.DAILY_API_KEY, base_url=settings.DAILY_API_URL
|
||||
) as client:
|
||||
recording = await client.get_recording(input.recording_id)
|
||||
ctx.log(f"get_recording: Daily.co API returned successfully")
|
||||
|
||||
@@ -374,7 +376,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
||||
settings.DAILY_API_KEY, "DAILY_API_KEY is required"
|
||||
)
|
||||
|
||||
async with DailyApiClient(api_key=daily_api_key) as client:
|
||||
async with DailyApiClient(
|
||||
api_key=daily_api_key, base_url=settings.DAILY_API_URL
|
||||
) as client:
|
||||
participants = await client.get_meeting_participants(mtg_session_id)
|
||||
|
||||
id_to_name = {}
|
||||
|
||||
@@ -180,6 +180,7 @@ class Settings(BaseSettings):
|
||||
)
|
||||
|
||||
# Daily.co integration
|
||||
DAILY_API_URL: str = "https://api.daily.co/v1"
|
||||
DAILY_API_KEY: str | None = None
|
||||
DAILY_WEBHOOK_SECRET: str | None = None
|
||||
DAILY_SUBDOMAIN: str | None = None
|
||||
|
||||
218
server/tests/docker-compose.integration.yml
Normal file
218
server/tests/docker-compose.integration.yml
Normal file
@@ -0,0 +1,218 @@
|
||||
# Integration test stack — full pipeline end-to-end.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f server/tests/docker-compose.integration.yml up -d --build
|
||||
#
|
||||
# Requires .env.integration in the repo root (generated by CI workflow).
|
||||
|
||||
x-backend-env: &backend-env
|
||||
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
REDIS_HOST: redis
|
||||
CELERY_BROKER_URL: redis://redis:6379/1
|
||||
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
||||
HATCHET_CLIENT_TOKEN: ${HATCHET_CLIENT_TOKEN:-}
|
||||
HATCHET_CLIENT_SERVER_URL: http://hatchet:8888
|
||||
HATCHET_CLIENT_HOST_PORT: hatchet:7077
|
||||
HATCHET_CLIENT_TLS_STRATEGY: none
|
||||
# ML backends — CPU-only, no external services
|
||||
TRANSCRIPT_BACKEND: whisper
|
||||
WHISPER_CHUNK_MODEL: tiny
|
||||
WHISPER_FILE_MODEL: tiny
|
||||
DIARIZATION_BACKEND: pyannote
|
||||
TRANSLATION_BACKEND: passthrough
|
||||
# Storage — local Garage S3
|
||||
TRANSCRIPT_STORAGE_BACKEND: aws
|
||||
TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL: http://garage:3900
|
||||
TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: reflector-media
|
||||
TRANSCRIPT_STORAGE_AWS_REGION: garage
|
||||
# Daily mock
|
||||
DAILY_API_URL: http://mock-daily:8080/v1
|
||||
DAILY_API_KEY: fake-daily-key
|
||||
# Auth
|
||||
PUBLIC_MODE: "true"
|
||||
AUTH_BACKEND: none
|
||||
# LLM (injected from CI)
|
||||
LLM_URL: ${LLM_URL:-}
|
||||
LLM_API_KEY: ${LLM_API_KEY:-}
|
||||
LLM_MODEL: ${LLM_MODEL:-gpt-4o-mini}
|
||||
# HuggingFace (for pyannote gated models)
|
||||
HF_TOKEN: ${HF_TOKEN:-}
|
||||
# Garage S3 credentials — hardcoded test keys, containers are ephemeral
|
||||
TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: GK0123456789abcdef01234567 # gitleaks:allow
|
||||
TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
|
||||
# NOTE: DAILYCO_STORAGE_AWS_* intentionally NOT set — forces fallback to
|
||||
# get_transcripts_storage() which has ENDPOINT_URL pointing at Garage.
|
||||
# Setting them would bypass the endpoint and generate presigned URLs for AWS.
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:17-alpine
|
||||
command: ["postgres", "-c", "max_connections=200"]
|
||||
environment:
|
||||
POSTGRES_USER: reflector
|
||||
POSTGRES_PASSWORD: reflector
|
||||
POSTGRES_DB: reflector
|
||||
volumes:
|
||||
- ../../server/docker/init-hatchet-db.sql:/docker-entrypoint-initdb.d/init-hatchet-db.sql:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U reflector"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
hatchet:
|
||||
image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: "postgresql://reflector:reflector@postgres:5432/hatchet?sslmode=disable&connect_timeout=30"
|
||||
SERVER_AUTH_COOKIE_INSECURE: "t"
|
||||
SERVER_AUTH_COOKIE_DOMAIN: "localhost"
|
||||
SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
|
||||
SERVER_GRPC_INSECURE: "t"
|
||||
SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||
SERVER_GRPC_PORT: "7077"
|
||||
SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
|
||||
SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 15
|
||||
start_period: 30s
|
||||
|
||||
garage:
|
||||
image: dxflrs/garage:v1.1.0
|
||||
volumes:
|
||||
- ../../data/garage.toml:/etc/garage.toml:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "/garage", "stats"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
start_period: 5s
|
||||
|
||||
mock-daily:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: integration/Dockerfile.mock-daily
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/recordings/test')"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
server:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: server
|
||||
WEBRTC_HOST: server
|
||||
WEBRTC_PORT_RANGE: "52000-52100"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
garage:
|
||||
condition: service_healthy
|
||||
mock-daily:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
worker:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: worker
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
hatchet-worker-cpu:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: hatchet-worker-cpu
|
||||
depends_on:
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
hatchet-worker-llm:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: hatchet-worker-llm
|
||||
depends_on:
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
test-runner:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
# Override DATABASE_URL for sync driver (used by direct DB access in tests)
|
||||
DATABASE_URL_ASYNC: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
SERVER_URL: http://server:1250
|
||||
GARAGE_ENDPOINT: http://garage:3900
|
||||
depends_on:
|
||||
server:
|
||||
condition: service_started
|
||||
worker:
|
||||
condition: service_started
|
||||
hatchet-worker-cpu:
|
||||
condition: service_started
|
||||
hatchet-worker-llm:
|
||||
condition: service_started
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
# Mount test files into the container
|
||||
- ./records:/app/tests/records:ro
|
||||
- ./integration:/app/tests/integration:ro
|
||||
entrypoint: ["sleep", "infinity"]
|
||||
|
||||
volumes:
|
||||
server_data:
|
||||
|
||||
networks:
|
||||
default:
|
||||
attachable: true
|
||||
9
server/tests/integration/Dockerfile.mock-daily
Normal file
9
server/tests/integration/Dockerfile.mock-daily
Normal file
@@ -0,0 +1,9 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
RUN pip install --no-cache-dir fastapi uvicorn[standard]
|
||||
|
||||
WORKDIR /app
|
||||
COPY integration/mock_daily_server.py /app/mock_daily_server.py
|
||||
|
||||
EXPOSE 8080
|
||||
CMD ["uvicorn", "mock_daily_server:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
0
server/tests/integration/__init__.py
Normal file
0
server/tests/integration/__init__.py
Normal file
116
server/tests/integration/conftest.py
Normal file
116
server/tests/integration/conftest.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
Integration test fixtures — no mocks, real services.
|
||||
|
||||
All services (PostgreSQL, Redis, Hatchet, Garage, server, workers) are
|
||||
expected to be running via docker-compose.integration.yml.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
|
||||
GARAGE_ENDPOINT = os.environ.get("GARAGE_ENDPOINT", "http://garage:3900")
|
||||
DATABASE_URL = os.environ.get(
|
||||
"DATABASE_URL_ASYNC",
|
||||
os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql+asyncpg://reflector:reflector@postgres:5432/reflector",
|
||||
),
|
||||
)
|
||||
GARAGE_KEY_ID = os.environ.get("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID", "")
|
||||
GARAGE_KEY_SECRET = os.environ.get("TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY", "")
|
||||
BUCKET_NAME = "reflector-media"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def api_client():
|
||||
"""HTTP client pointed at the running server."""
|
||||
async with httpx.AsyncClient(
|
||||
base_url=f"{SERVER_URL}/v1",
|
||||
timeout=httpx.Timeout(30.0),
|
||||
) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def s3_client():
|
||||
"""Boto3 S3 client pointed at Garage."""
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=GARAGE_ENDPOINT,
|
||||
aws_access_key_id=GARAGE_KEY_ID,
|
||||
aws_secret_access_key=GARAGE_KEY_SECRET,
|
||||
region_name="garage",
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def db_engine():
|
||||
"""SQLAlchemy async engine for direct DB operations."""
|
||||
engine = create_async_engine(DATABASE_URL)
|
||||
yield engine
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_records_dir():
|
||||
"""Path to the test audio files directory."""
|
||||
return Path(__file__).parent.parent / "records"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def bucket_name():
|
||||
"""S3 bucket name used for integration tests."""
|
||||
return BUCKET_NAME
|
||||
|
||||
|
||||
async def _poll_transcript_status(
|
||||
client: httpx.AsyncClient,
|
||||
transcript_id: str,
|
||||
target: str | tuple[str, ...],
|
||||
error: str = "error",
|
||||
max_wait: int = 300,
|
||||
interval: int = 3,
|
||||
) -> dict:
|
||||
"""
|
||||
Poll GET /transcripts/{id} until status matches target or error.
|
||||
|
||||
target can be a single status string or a tuple of acceptable statuses.
|
||||
Returns the transcript dict on success, raises on timeout or error status.
|
||||
"""
|
||||
targets = (target,) if isinstance(target, str) else target
|
||||
elapsed = 0
|
||||
status = None
|
||||
while elapsed < max_wait:
|
||||
resp = await client.get(f"/transcripts/{transcript_id}")
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
status = data.get("status")
|
||||
|
||||
if status in targets:
|
||||
return data
|
||||
if status == error:
|
||||
raise AssertionError(
|
||||
f"Transcript {transcript_id} reached error status: {data}"
|
||||
)
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
elapsed += interval
|
||||
|
||||
raise TimeoutError(
|
||||
f"Transcript {transcript_id} did not reach status '{target}' "
|
||||
f"within {max_wait}s (last status: {status})"
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
def poll_transcript_status():
|
||||
"""Returns the poll_transcript_status async helper function."""
|
||||
return _poll_transcript_status
|
||||
62
server/tests/integration/garage_setup.sh
Executable file
62
server/tests/integration/garage_setup.sh
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Initialize Garage bucket and keys for integration tests.
|
||||
# Run inside the Garage container after it's healthy.
|
||||
#
|
||||
# Outputs KEY_ID and KEY_SECRET to stdout (last two lines).
|
||||
#
|
||||
# Note: uses /bin/sh (not bash) since the Garage container is minimal.
|
||||
#
|
||||
set -eu
|
||||
|
||||
echo "Waiting for Garage to be ready..."
|
||||
i=0
|
||||
while [ "$i" -lt 30 ]; do
|
||||
if /garage stats >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
# Layout setup
|
||||
NODE_ID=$(/garage node id -q | tr -d '[:space:]')
|
||||
LAYOUT_STATUS=$(/garage layout show 2>&1 || true)
|
||||
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
|
||||
/garage layout assign "$NODE_ID" -c 1G -z dc1
|
||||
/garage layout apply --version 1
|
||||
echo "Layout applied."
|
||||
else
|
||||
echo "Layout already configured."
|
||||
fi
|
||||
|
||||
# Bucket
|
||||
if ! /garage bucket info reflector-media >/dev/null 2>&1; then
|
||||
/garage bucket create reflector-media
|
||||
echo "Bucket 'reflector-media' created."
|
||||
else
|
||||
echo "Bucket 'reflector-media' already exists."
|
||||
fi
|
||||
|
||||
# Key
|
||||
if /garage key info reflector-test >/dev/null 2>&1; then
|
||||
echo "Key 'reflector-test' already exists."
|
||||
KEY_OUTPUT=$(/garage key info reflector-test 2>&1)
|
||||
else
|
||||
KEY_OUTPUT=$(/garage key create reflector-test 2>&1)
|
||||
echo "Key 'reflector-test' created."
|
||||
fi
|
||||
|
||||
# Permissions
|
||||
/garage bucket allow reflector-media --read --write --key reflector-test
|
||||
|
||||
# Extract key ID and secret from output using POSIX-compatible parsing
|
||||
# garage key output format:
|
||||
# Key name: reflector-test
|
||||
# Key ID: GK...
|
||||
# Secret key: ...
|
||||
KEY_ID=$(echo "$KEY_OUTPUT" | grep "Key ID" | sed 's/.*Key ID: *//')
|
||||
KEY_SECRET=$(echo "$KEY_OUTPUT" | grep "Secret key" | sed 's/.*Secret key: *//')
|
||||
|
||||
echo "GARAGE_KEY_ID=${KEY_ID}"
|
||||
echo "GARAGE_KEY_SECRET=${KEY_SECRET}"
|
||||
75
server/tests/integration/mock_daily_server.py
Normal file
75
server/tests/integration/mock_daily_server.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
Minimal FastAPI mock for Daily.co API.
|
||||
|
||||
Serves canned responses for:
|
||||
- GET /v1/recordings/{recording_id}
|
||||
- GET /v1/meetings/{meeting_id}/participants
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
app = FastAPI(title="Mock Daily API")
|
||||
|
||||
|
||||
# Participant UUIDs must be 36-char hex UUIDs to match Daily's filename format
|
||||
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||
|
||||
# Daily-format track keys: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
|
||||
TRACK_KEYS = [
|
||||
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
|
||||
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
|
||||
]
|
||||
|
||||
|
||||
@app.get("/v1/recordings/{recording_id}")
|
||||
async def get_recording(recording_id: str):
|
||||
return {
|
||||
"id": recording_id,
|
||||
"room_name": "integration-test-room",
|
||||
"start_ts": 1700000000,
|
||||
"type": "raw-tracks",
|
||||
"status": "finished",
|
||||
"max_participants": 2,
|
||||
"duration": 5,
|
||||
"share_token": None,
|
||||
"s3": {
|
||||
"bucket_name": "reflector-media",
|
||||
"bucket_region": "garage",
|
||||
"key": None,
|
||||
"endpoint": None,
|
||||
},
|
||||
"s3key": None,
|
||||
"tracks": [
|
||||
{"type": "audio", "s3Key": key, "size": 100000} for key in TRACK_KEYS
|
||||
],
|
||||
"mtgSessionId": "mock-mtg-session-id",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/v1/meetings/{meeting_id}/participants")
|
||||
async def get_meeting_participants(meeting_id: str):
|
||||
return {
|
||||
"data": [
|
||||
{
|
||||
"user_id": "user-a",
|
||||
"participant_id": PARTICIPANT_A_ID,
|
||||
"user_name": "Speaker A",
|
||||
"join_time": 1700000000,
|
||||
"duration": 300,
|
||||
},
|
||||
{
|
||||
"user_id": "user-b",
|
||||
"participant_id": PARTICIPANT_B_ID,
|
||||
"user_name": "Speaker B",
|
||||
"join_time": 1700000010,
|
||||
"duration": 290,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8080)
|
||||
61
server/tests/integration/test_file_pipeline.py
Normal file
61
server/tests/integration/test_file_pipeline.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Integration test: File upload → FilePipeline → full processing.
|
||||
|
||||
Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
|
||||
pyannote diarization → LLM summarization/topics → status "ended".
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_pipeline_end_to_end(
|
||||
api_client, test_records_dir, poll_transcript_status
|
||||
):
|
||||
"""Upload a WAV file and verify the full pipeline completes."""
|
||||
# 1. Create transcript
|
||||
resp = await api_client.post(
|
||||
"/transcripts",
|
||||
json={"name": "integration-file-test", "source_kind": "file"},
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||
transcript = resp.json()
|
||||
transcript_id = transcript["id"]
|
||||
|
||||
# 2. Upload audio file (single chunk)
|
||||
audio_path = test_records_dir / "test_short.wav"
|
||||
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||
|
||||
with open(audio_path, "rb") as f:
|
||||
resp = await api_client.post(
|
||||
f"/transcripts/{transcript_id}/record/upload",
|
||||
params={"chunk_number": 0, "total_chunks": 1},
|
||||
files={"chunk": ("test_short.wav", f, "audio/wav")},
|
||||
)
|
||||
assert resp.status_code == 200, f"Upload failed: {resp.text}"
|
||||
|
||||
# 3. Poll until pipeline completes
|
||||
data = await poll_transcript_status(
|
||||
api_client, transcript_id, target="ended", max_wait=300
|
||||
)
|
||||
|
||||
# 4. Assertions
|
||||
assert data["status"] == "ended"
|
||||
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||
assert (
|
||||
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||
), "Long summary should be non-empty"
|
||||
assert (
|
||||
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||
), "Short summary should be non-empty"
|
||||
|
||||
# Topics are served from a separate endpoint
|
||||
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||
topics = topics_resp.json()
|
||||
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||
for topic in topics:
|
||||
assert topic.get("title"), "Each topic should have a title"
|
||||
assert topic.get("summary"), "Each topic should have a summary"
|
||||
|
||||
assert data.get("duration", 0) > 0, "Duration should be positive"
|
||||
109
server/tests/integration/test_live_pipeline.py
Normal file
109
server/tests/integration/test_live_pipeline.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Integration test: WebRTC stream → LivePostProcessingPipeline → full processing.
|
||||
|
||||
Exercises: WebRTC SDP exchange → live audio streaming → connection close →
|
||||
Hatchet LivePostPipeline → whisper transcription → LLM summarization/topics → status "ended".
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from aiortc import RTCPeerConnection, RTCSessionDescription
|
||||
from aiortc.contrib.media import MediaPlayer
|
||||
|
||||
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_live_pipeline_end_to_end(
|
||||
api_client, test_records_dir, poll_transcript_status
|
||||
):
|
||||
"""Stream audio via WebRTC and verify the full post-processing pipeline completes."""
|
||||
# 1. Create transcript
|
||||
resp = await api_client.post(
|
||||
"/transcripts",
|
||||
json={"name": "integration-live-test"},
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||
transcript = resp.json()
|
||||
transcript_id = transcript["id"]
|
||||
|
||||
# 2. Set up WebRTC peer connection with audio from test file
|
||||
audio_path = test_records_dir / "test_short.wav"
|
||||
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||
|
||||
pc = RTCPeerConnection()
|
||||
player = MediaPlayer(audio_path.as_posix())
|
||||
|
||||
# Add audio track
|
||||
audio_track = player.audio
|
||||
pc.addTrack(audio_track)
|
||||
|
||||
# Create data channel (server expects this for STOP command)
|
||||
channel = pc.createDataChannel("data-channel")
|
||||
|
||||
# 3. Generate SDP offer
|
||||
offer = await pc.createOffer()
|
||||
await pc.setLocalDescription(offer)
|
||||
|
||||
sdp_payload = {
|
||||
"sdp": pc.localDescription.sdp,
|
||||
"type": pc.localDescription.type,
|
||||
}
|
||||
|
||||
# 4. Send offer to server and get answer
|
||||
webrtc_url = f"{SERVER_URL}/v1/transcripts/{transcript_id}/record/webrtc"
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
|
||||
resp = await client.post(webrtc_url, json=sdp_payload)
|
||||
assert resp.status_code == 200, f"WebRTC offer failed: {resp.text}"
|
||||
|
||||
answer_data = resp.json()
|
||||
answer = RTCSessionDescription(sdp=answer_data["sdp"], type=answer_data["type"])
|
||||
await pc.setRemoteDescription(answer)
|
||||
|
||||
# 5. Wait for audio playback to finish
|
||||
max_stream_wait = 60
|
||||
elapsed = 0
|
||||
while elapsed < max_stream_wait:
|
||||
if audio_track.readyState == "ended":
|
||||
break
|
||||
await asyncio.sleep(0.5)
|
||||
elapsed += 0.5
|
||||
|
||||
# 6. Send STOP command and close connection
|
||||
try:
|
||||
channel.send(json.dumps({"cmd": "STOP"}))
|
||||
await asyncio.sleep(1)
|
||||
except Exception:
|
||||
pass # Channel may not be open if track ended quickly
|
||||
|
||||
await pc.close()
|
||||
|
||||
# 7. Poll until post-processing pipeline completes
|
||||
data = await poll_transcript_status(
|
||||
api_client, transcript_id, target="ended", max_wait=300
|
||||
)
|
||||
|
||||
# 8. Assertions
|
||||
assert data["status"] == "ended"
|
||||
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||
assert (
|
||||
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||
), "Long summary should be non-empty"
|
||||
assert (
|
||||
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||
), "Short summary should be non-empty"
|
||||
|
||||
# Topics are served from a separate endpoint
|
||||
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||
topics = topics_resp.json()
|
||||
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||
for topic in topics:
|
||||
assert topic.get("title"), "Each topic should have a title"
|
||||
assert topic.get("summary"), "Each topic should have a summary"
|
||||
|
||||
assert data.get("duration", 0) > 0, "Duration should be positive"
|
||||
129
server/tests/integration/test_multitrack_pipeline.py
Normal file
129
server/tests/integration/test_multitrack_pipeline.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Integration test: Multitrack → DailyMultitrackPipeline → full processing.
|
||||
|
||||
Exercises: S3 upload → DB recording setup → process endpoint →
|
||||
Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
|
||||
diarization → mixdown → LLM summarization/topics → status "ended".
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
|
||||
# Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
|
||||
# These UUIDs must match mock_daily_server.py participant IDs
|
||||
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||
TRACK_KEYS = [
|
||||
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
|
||||
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multitrack_pipeline_end_to_end(
|
||||
api_client,
|
||||
s3_client,
|
||||
db_engine,
|
||||
test_records_dir,
|
||||
bucket_name,
|
||||
poll_transcript_status,
|
||||
):
|
||||
"""Set up multitrack recording in S3/DB and verify the full pipeline completes."""
|
||||
# 1. Upload test audio as two separate tracks to Garage S3
|
||||
audio_path = test_records_dir / "test_short.wav"
|
||||
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||
|
||||
for track_key in TRACK_KEYS:
|
||||
s3_client.upload_file(
|
||||
str(audio_path),
|
||||
bucket_name,
|
||||
track_key,
|
||||
)
|
||||
|
||||
# 2. Create transcript via API
|
||||
resp = await api_client.post(
|
||||
"/transcripts",
|
||||
json={"name": "integration-multitrack-test"},
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||
transcript = resp.json()
|
||||
transcript_id = transcript["id"]
|
||||
|
||||
# 3. Insert Recording row and link to transcript via direct DB access
|
||||
recording_id = f"rec-integration-{transcript_id[:8]}"
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
async with db_engine.begin() as conn:
|
||||
# Insert recording with track_keys
|
||||
await conn.execute(
|
||||
text("""
|
||||
INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys)
|
||||
VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json))
|
||||
"""),
|
||||
{
|
||||
"id": recording_id,
|
||||
"bucket_name": bucket_name,
|
||||
"object_key": TRACK_KEYS[0],
|
||||
"recorded_at": now,
|
||||
"status": "completed",
|
||||
"track_keys": json.dumps(TRACK_KEYS),
|
||||
},
|
||||
)
|
||||
|
||||
# Link recording to transcript and set status to uploaded
|
||||
await conn.execute(
|
||||
text("""
|
||||
UPDATE transcript
|
||||
SET recording_id = :recording_id, status = 'uploaded'
|
||||
WHERE id = :transcript_id
|
||||
"""),
|
||||
{
|
||||
"recording_id": recording_id,
|
||||
"transcript_id": transcript_id,
|
||||
},
|
||||
)
|
||||
|
||||
# 4. Trigger processing via process endpoint
|
||||
resp = await api_client.post(f"/transcripts/{transcript_id}/process")
|
||||
assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
|
||||
|
||||
# 5. Poll until pipeline completes
|
||||
# The pipeline will call mock-daily for get_recording and get_participants
|
||||
# Accept "error" too — non-critical steps like action_items may fail due to
|
||||
# LLM parsing flakiness while core results (transcript, summaries) still exist.
|
||||
data = await poll_transcript_status(
|
||||
api_client, transcript_id, target=("ended", "error"), max_wait=300
|
||||
)
|
||||
|
||||
# 6. Assertions — verify core pipeline results regardless of final status
|
||||
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||
assert (
|
||||
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||
), "Long summary should be non-empty"
|
||||
assert (
|
||||
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||
), "Short summary should be non-empty"
|
||||
|
||||
# Topics are served from a separate endpoint
|
||||
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||
topics = topics_resp.json()
|
||||
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||
for topic in topics:
|
||||
assert topic.get("title"), "Each topic should have a title"
|
||||
assert topic.get("summary"), "Each topic should have a summary"
|
||||
|
||||
# Participants are served from a separate endpoint
|
||||
participants_resp = await api_client.get(
|
||||
f"/transcripts/{transcript_id}/participants"
|
||||
)
|
||||
assert (
|
||||
participants_resp.status_code == 200
|
||||
), f"Failed to get participants: {participants_resp.text}"
|
||||
participants = participants_resp.json()
|
||||
assert (
|
||||
len(participants) >= 2
|
||||
), f"Expected at least 2 speakers for multitrack, got {len(participants)}"
|
||||
Reference in New Issue
Block a user