test: full integration tests (#916)

* test: full integration tests * fix: add env vars as secrets in CI
2026-05-06 11:15:18 +00:00 · 2026-03-18 15:29:21 -05:00
parent a9200d35bf
commit 9a2f973a2e
16 changed files with 1098 additions and 3 deletions
--- a/server/tests/integration/test_multitrack_pipeline.py
+++ b/server/tests/integration/test_multitrack_pipeline.py
@@ -0,0 +1,129 @@
+"""
+Integration test: Multitrack → DailyMultitrackPipeline → full processing.
+
+Exercises: S3 upload → DB recording setup → process endpoint →
+Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
+diarization → mixdown → LLM summarization/topics → status "ended".
+"""
+
+import json
+from datetime import datetime, timezone
+
+import pytest
+from sqlalchemy import text
+
+# Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
+# These UUIDs must match mock_daily_server.py participant IDs
+PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
+PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
+TRACK_KEYS = [
+    f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
+    f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
+]
+
+
+@pytest.mark.asyncio
+async def test_multitrack_pipeline_end_to_end(
+    api_client,
+    s3_client,
+    db_engine,
+    test_records_dir,
+    bucket_name,
+    poll_transcript_status,
+):
+    """Set up multitrack recording in S3/DB and verify the full pipeline completes."""
+    # 1. Upload test audio as two separate tracks to Garage S3
+    audio_path = test_records_dir / "test_short.wav"
+    assert audio_path.exists(), f"Test audio file not found: {audio_path}"
+
+    for track_key in TRACK_KEYS:
+        s3_client.upload_file(
+            str(audio_path),
+            bucket_name,
+            track_key,
+        )
+
+    # 2. Create transcript via API
+    resp = await api_client.post(
+        "/transcripts",
+        json={"name": "integration-multitrack-test"},
+    )
+    assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
+    transcript = resp.json()
+    transcript_id = transcript["id"]
+
+    # 3. Insert Recording row and link to transcript via direct DB access
+    recording_id = f"rec-integration-{transcript_id[:8]}"
+    now = datetime.now(timezone.utc)
+
+    async with db_engine.begin() as conn:
+        # Insert recording with track_keys
+        await conn.execute(
+            text("""
+                INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys)
+                VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json))
+            """),
+            {
+                "id": recording_id,
+                "bucket_name": bucket_name,
+                "object_key": TRACK_KEYS[0],
+                "recorded_at": now,
+                "status": "completed",
+                "track_keys": json.dumps(TRACK_KEYS),
+            },
+        )
+
+        # Link recording to transcript and set status to uploaded
+        await conn.execute(
+            text("""
+                UPDATE transcript
+                SET recording_id = :recording_id, status = 'uploaded'
+                WHERE id = :transcript_id
+            """),
+            {
+                "recording_id": recording_id,
+                "transcript_id": transcript_id,
+            },
+        )
+
+    # 4. Trigger processing via process endpoint
+    resp = await api_client.post(f"/transcripts/{transcript_id}/process")
+    assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
+
+    # 5. Poll until pipeline completes
+    # The pipeline will call mock-daily for get_recording and get_participants
+    # Accept "error" too — non-critical steps like action_items may fail due to
+    # LLM parsing flakiness while core results (transcript, summaries) still exist.
+    data = await poll_transcript_status(
+        api_client, transcript_id, target=("ended", "error"), max_wait=300
+    )
+
+    # 6. Assertions — verify core pipeline results regardless of final status
+    assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
+    assert (
+        data.get("long_summary") and len(data["long_summary"]) > 0
+    ), "Long summary should be non-empty"
+    assert (
+        data.get("short_summary") and len(data["short_summary"]) > 0
+    ), "Short summary should be non-empty"
+
+    # Topics are served from a separate endpoint
+    topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
+    assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
+    topics = topics_resp.json()
+    assert len(topics) >= 1, "Should have at least 1 topic"
+    for topic in topics:
+        assert topic.get("title"), "Each topic should have a title"
+        assert topic.get("summary"), "Each topic should have a summary"
+
+    # Participants are served from a separate endpoint
+    participants_resp = await api_client.get(
+        f"/transcripts/{transcript_id}/participants"
+    )
+    assert (
+        participants_resp.status_code == 200
+    ), f"Failed to get participants: {participants_resp.text}"
+    participants = participants_resp.json()
+    assert (
+        len(participants) >= 2
+    ), f"Expected at least 2 speakers for multitrack, got {len(participants)}"