mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-22 07:06:47 +00:00
62 lines
2.3 KiB
Python
62 lines
2.3 KiB
Python
"""
|
|
Integration test: File upload → FilePipeline → full processing.
|
|
|
|
Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
|
|
pyannote diarization → LLM summarization/topics → status "ended".
|
|
"""
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_file_pipeline_end_to_end(
|
|
api_client, test_records_dir, poll_transcript_status
|
|
):
|
|
"""Upload a WAV file and verify the full pipeline completes."""
|
|
# 1. Create transcript
|
|
resp = await api_client.post(
|
|
"/transcripts",
|
|
json={"name": "integration-file-test", "source_kind": "file"},
|
|
)
|
|
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
|
transcript = resp.json()
|
|
transcript_id = transcript["id"]
|
|
|
|
# 2. Upload audio file (single chunk)
|
|
audio_path = test_records_dir / "test_short.wav"
|
|
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
|
|
|
with open(audio_path, "rb") as f:
|
|
resp = await api_client.post(
|
|
f"/transcripts/{transcript_id}/record/upload",
|
|
params={"chunk_number": 0, "total_chunks": 1},
|
|
files={"chunk": ("test_short.wav", f, "audio/wav")},
|
|
)
|
|
assert resp.status_code == 200, f"Upload failed: {resp.text}"
|
|
|
|
# 3. Poll until pipeline completes
|
|
data = await poll_transcript_status(
|
|
api_client, transcript_id, target="ended", max_wait=300
|
|
)
|
|
|
|
# 4. Assertions
|
|
assert data["status"] == "ended"
|
|
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
|
assert (
|
|
data.get("long_summary") and len(data["long_summary"]) > 0
|
|
), "Long summary should be non-empty"
|
|
assert (
|
|
data.get("short_summary") and len(data["short_summary"]) > 0
|
|
), "Short summary should be non-empty"
|
|
|
|
# Topics are served from a separate endpoint
|
|
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
|
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
|
topics = topics_resp.json()
|
|
assert len(topics) >= 1, "Should have at least 1 topic"
|
|
for topic in topics:
|
|
assert topic.get("title"), "Each topic should have a title"
|
|
assert topic.get("summary"), "Each topic should have a summary"
|
|
|
|
assert data.get("duration", 0) > 0, "Duration should be positive"
|