Files
reflector/server/tests/integration/test_file_pipeline.py
Juan Diego García 9a2f973a2e test: full integration tests (#916)
* test: full integration tests

* fix: add env vars as secrets in CI
2026-03-18 15:29:21 -05:00

62 lines
2.3 KiB
Python

"""
Integration test: File upload → FilePipeline → full processing.
Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
pyannote diarization → LLM summarization/topics → status "ended".
"""
import pytest
@pytest.mark.asyncio
async def test_file_pipeline_end_to_end(
api_client, test_records_dir, poll_transcript_status
):
"""Upload a WAV file and verify the full pipeline completes."""
# 1. Create transcript
resp = await api_client.post(
"/transcripts",
json={"name": "integration-file-test", "source_kind": "file"},
)
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
transcript = resp.json()
transcript_id = transcript["id"]
# 2. Upload audio file (single chunk)
audio_path = test_records_dir / "test_short.wav"
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
with open(audio_path, "rb") as f:
resp = await api_client.post(
f"/transcripts/{transcript_id}/record/upload",
params={"chunk_number": 0, "total_chunks": 1},
files={"chunk": ("test_short.wav", f, "audio/wav")},
)
assert resp.status_code == 200, f"Upload failed: {resp.text}"
# 3. Poll until pipeline completes
data = await poll_transcript_status(
api_client, transcript_id, target="ended", max_wait=300
)
# 4. Assertions
assert data["status"] == "ended"
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
assert (
data.get("long_summary") and len(data["long_summary"]) > 0
), "Long summary should be non-empty"
assert (
data.get("short_summary") and len(data["short_summary"]) > 0
), "Short summary should be non-empty"
# Topics are served from a separate endpoint
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
topics = topics_resp.json()
assert len(topics) >= 1, "Should have at least 1 topic"
for topic in topics:
assert topic.get("title"), "Each topic should have a title"
assert topic.get("summary"), "Each topic should have a summary"
assert data.get("duration", 0) > 0, "Duration should be positive"