mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-21 22:56:47 +00:00
test: full integration tests (#916)
* test: full integration tests * fix: add env vars as secrets in CI
This commit is contained in:
committed by
GitHub
parent
a9200d35bf
commit
9a2f973a2e
129
server/tests/integration/test_multitrack_pipeline.py
Normal file
129
server/tests/integration/test_multitrack_pipeline.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Integration test: Multitrack → DailyMultitrackPipeline → full processing.
|
||||
|
||||
Exercises: S3 upload → DB recording setup → process endpoint →
|
||||
Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
|
||||
diarization → mixdown → LLM summarization/topics → status "ended".
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
|
||||
# Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
|
||||
# These UUIDs must match mock_daily_server.py participant IDs
|
||||
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||
TRACK_KEYS = [
|
||||
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
|
||||
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multitrack_pipeline_end_to_end(
|
||||
api_client,
|
||||
s3_client,
|
||||
db_engine,
|
||||
test_records_dir,
|
||||
bucket_name,
|
||||
poll_transcript_status,
|
||||
):
|
||||
"""Set up multitrack recording in S3/DB and verify the full pipeline completes."""
|
||||
# 1. Upload test audio as two separate tracks to Garage S3
|
||||
audio_path = test_records_dir / "test_short.wav"
|
||||
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||
|
||||
for track_key in TRACK_KEYS:
|
||||
s3_client.upload_file(
|
||||
str(audio_path),
|
||||
bucket_name,
|
||||
track_key,
|
||||
)
|
||||
|
||||
# 2. Create transcript via API
|
||||
resp = await api_client.post(
|
||||
"/transcripts",
|
||||
json={"name": "integration-multitrack-test"},
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||
transcript = resp.json()
|
||||
transcript_id = transcript["id"]
|
||||
|
||||
# 3. Insert Recording row and link to transcript via direct DB access
|
||||
recording_id = f"rec-integration-{transcript_id[:8]}"
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
async with db_engine.begin() as conn:
|
||||
# Insert recording with track_keys
|
||||
await conn.execute(
|
||||
text("""
|
||||
INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys)
|
||||
VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json))
|
||||
"""),
|
||||
{
|
||||
"id": recording_id,
|
||||
"bucket_name": bucket_name,
|
||||
"object_key": TRACK_KEYS[0],
|
||||
"recorded_at": now,
|
||||
"status": "completed",
|
||||
"track_keys": json.dumps(TRACK_KEYS),
|
||||
},
|
||||
)
|
||||
|
||||
# Link recording to transcript and set status to uploaded
|
||||
await conn.execute(
|
||||
text("""
|
||||
UPDATE transcript
|
||||
SET recording_id = :recording_id, status = 'uploaded'
|
||||
WHERE id = :transcript_id
|
||||
"""),
|
||||
{
|
||||
"recording_id": recording_id,
|
||||
"transcript_id": transcript_id,
|
||||
},
|
||||
)
|
||||
|
||||
# 4. Trigger processing via process endpoint
|
||||
resp = await api_client.post(f"/transcripts/{transcript_id}/process")
|
||||
assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
|
||||
|
||||
# 5. Poll until pipeline completes
|
||||
# The pipeline will call mock-daily for get_recording and get_participants
|
||||
# Accept "error" too — non-critical steps like action_items may fail due to
|
||||
# LLM parsing flakiness while core results (transcript, summaries) still exist.
|
||||
data = await poll_transcript_status(
|
||||
api_client, transcript_id, target=("ended", "error"), max_wait=300
|
||||
)
|
||||
|
||||
# 6. Assertions — verify core pipeline results regardless of final status
|
||||
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||
assert (
|
||||
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||
), "Long summary should be non-empty"
|
||||
assert (
|
||||
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||
), "Short summary should be non-empty"
|
||||
|
||||
# Topics are served from a separate endpoint
|
||||
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||
topics = topics_resp.json()
|
||||
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||
for topic in topics:
|
||||
assert topic.get("title"), "Each topic should have a title"
|
||||
assert topic.get("summary"), "Each topic should have a summary"
|
||||
|
||||
# Participants are served from a separate endpoint
|
||||
participants_resp = await api_client.get(
|
||||
f"/transcripts/{transcript_id}/participants"
|
||||
)
|
||||
assert (
|
||||
participants_resp.status_code == 200
|
||||
), f"Failed to get participants: {participants_resp.text}"
|
||||
participants = participants_resp.json()
|
||||
assert (
|
||||
len(participants) >= 2
|
||||
), f"Expected at least 2 speakers for multitrack, got {len(participants)}"
|
||||
Reference in New Issue
Block a user