""" Tests for Modal-based processors using pytest-recording for HTTP recording/playbook Note: theses tests require full modal configuration to be able to record vcr cassettes Configuration required for the first recording: - TRANSCRIPT_BACKEND=modal - TRANSCRIPT_URL=https://xxxxx--reflector-transcriber-parakeet-web.modal.run - TRANSCRIPT_MODAL_API_KEY=xxxxx - DIARIZATION_BACKEND=modal - DIARIZATION_URL=https://xxxxx--reflector-diarizer-web.modal.run - DIARIZATION_MODAL_API_KEY=xxxxx """ from unittest.mock import patch import pytest from reflector.processors.file_diarization import FileDiarizationInput from reflector.processors.file_diarization_modal import FileDiarizationModalProcessor from reflector.processors.file_transcript import FileTranscriptInput from reflector.processors.file_transcript_modal import FileTranscriptModalProcessor from reflector.processors.transcript_diarization_assembler import ( TranscriptDiarizationAssemblerInput, TranscriptDiarizationAssemblerProcessor, ) from reflector.processors.types import DiarizationSegment, Transcript, Word # Public test audio file hosted on S3 specifically for reflector pytests TEST_AUDIO_URL = ( "https://reflector-github-pytest.s3.us-east-1.amazonaws.com/test_mathieu_hello.mp3" ) @pytest.mark.asyncio async def test_file_transcript_modal_processor_missing_url(): with patch("reflector.processors.file_transcript_modal.settings") as mock_settings: mock_settings.TRANSCRIPT_URL = None with pytest.raises(Exception, match="TRANSCRIPT_URL required"): FileTranscriptModalProcessor(modal_api_key="test-api-key") @pytest.mark.asyncio async def test_file_diarization_modal_processor_missing_url(): with patch("reflector.processors.file_diarization_modal.settings") as mock_settings: mock_settings.DIARIZATION_URL = None with pytest.raises(Exception, match="DIARIZATION_URL required"): FileDiarizationModalProcessor(modal_api_key="test-api-key") @pytest.mark.vcr() @pytest.mark.asyncio async def test_file_diarization_modal_processor(vcr): """Test FileDiarizationModalProcessor using public audio URL and Modal API""" from reflector.settings import settings processor = FileDiarizationModalProcessor( modal_api_key=settings.DIARIZATION_MODAL_API_KEY ) test_input = FileDiarizationInput(audio_url=TEST_AUDIO_URL) result = await processor._diarize(test_input) # Verify the result structure assert result is not None assert hasattr(result, "diarization") assert isinstance(result.diarization, list) # Check structure of each diarization segment for segment in result.diarization: assert "start" in segment assert "end" in segment assert "speaker" in segment assert isinstance(segment["start"], (int, float)) assert isinstance(segment["end"], (int, float)) assert isinstance(segment["speaker"], int) # Basic sanity check - start should be before end assert segment["start"] < segment["end"] @pytest.mark.vcr() @pytest.mark.asyncio async def test_file_transcript_modal_processor(): """Test FileTranscriptModalProcessor using public audio URL and Modal API""" from reflector.settings import settings processor = FileTranscriptModalProcessor( modal_api_key=settings.TRANSCRIPT_MODAL_API_KEY ) test_input = FileTranscriptInput( audio_url=TEST_AUDIO_URL, language="en", ) # This will record the HTTP interaction on first run, replay on subsequent runs result = await processor._transcript(test_input) # Verify the result structure assert result is not None assert hasattr(result, "words") assert isinstance(result.words, list) # Check structure of each word if present for word in result.words: assert hasattr(word, "text") assert hasattr(word, "start") assert hasattr(word, "end") assert isinstance(word.start, (int, float)) assert isinstance(word.end, (int, float)) assert isinstance(word.text, str) # Basic sanity check - start should be before or equal to end assert word.start <= word.end @pytest.mark.asyncio async def test_transcript_diarization_assembler_processor(): """Test TranscriptDiarizationAssemblerProcessor without VCR (no HTTP requests)""" # Create test transcript with words words = [ Word(text="Hello", start=0.0, end=1.0, speaker=0), Word(text=" ", start=1.0, end=1.1, speaker=0), Word(text="world", start=1.1, end=2.0, speaker=0), Word(text=".", start=2.0, end=2.1, speaker=0), Word(text=" ", start=2.1, end=2.2, speaker=0), Word(text="How", start=2.2, end=2.8, speaker=0), Word(text=" ", start=2.8, end=2.9, speaker=0), Word(text="are", start=2.9, end=3.2, speaker=0), Word(text=" ", start=3.2, end=3.3, speaker=0), Word(text="you", start=3.3, end=3.8, speaker=0), Word(text="?", start=3.8, end=3.9, speaker=0), ] transcript = Transcript(words=words) # Create test diarization segments diarization = [ DiarizationSegment(start=0.0, end=2.1, speaker=0), DiarizationSegment(start=2.1, end=3.9, speaker=1), ] # Create processor and test input processor = TranscriptDiarizationAssemblerProcessor() test_input = TranscriptDiarizationAssemblerInput( transcript=transcript, diarization=diarization ) # Track emitted results emitted_results = [] async def capture_result(result): emitted_results.append(result) processor.on(capture_result) # Process the input await processor.push(test_input) # Verify result was emitted assert len(emitted_results) == 1 result = emitted_results[0] # Verify result structure assert isinstance(result, Transcript) assert len(result.words) == len(words) # Verify speaker assignments were applied # Words 0-3 (indices) should be speaker 0 (time 0.0-2.0) # Words 4-10 (indices) should be speaker 1 (time 2.1-3.9) for i in range(4): # First 4 words (Hello, space, world, .) assert ( result.words[i].speaker == 0 ), f"Word {i} '{result.words[i].text}' should be speaker 0, got {result.words[i].speaker}" for i in range(4, 11): # Remaining words (space, How, space, are, space, you, ?) assert ( result.words[i].speaker == 1 ), f"Word {i} '{result.words[i].text}' should be speaker 1, got {result.words[i].speaker}" @pytest.mark.asyncio async def test_transcript_diarization_assembler_no_diarization(): """Test TranscriptDiarizationAssemblerProcessor with no diarization data""" # Create test transcript words = [Word(text="Hello", start=0.0, end=1.0, speaker=0)] transcript = Transcript(words=words) # Create processor and test input with empty diarization processor = TranscriptDiarizationAssemblerProcessor() test_input = TranscriptDiarizationAssemblerInput( transcript=transcript, diarization=[] ) # Track emitted results emitted_results = [] async def capture_result(result): emitted_results.append(result) processor.on(capture_result) # Process the input await processor.push(test_input) # Verify original transcript was returned unchanged assert len(emitted_results) == 1 result = emitted_results[0] assert result is transcript # Should be the same object assert result.words[0].speaker == 0 # Original speaker unchanged @pytest.mark.vcr() @pytest.mark.asyncio async def test_full_modal_pipeline_integration(vcr): """Integration test: Transcription -> Diarization -> Assembly This test demonstrates the full pipeline: 1. Run transcription via Modal 2. Run diarization via Modal 3. Assemble transcript with diarization """ from reflector.settings import settings # Step 1: Transcription transcript_processor = FileTranscriptModalProcessor( modal_api_key=settings.TRANSCRIPT_MODAL_API_KEY ) transcript_input = FileTranscriptInput(audio_url=TEST_AUDIO_URL, language="en") transcript = await transcript_processor._transcript(transcript_input) # Step 2: Diarization diarization_processor = FileDiarizationModalProcessor( modal_api_key=settings.DIARIZATION_MODAL_API_KEY ) diarization_input = FileDiarizationInput(audio_url=TEST_AUDIO_URL) diarization_result = await diarization_processor._diarize(diarization_input) # Step 3: Assembly assembler = TranscriptDiarizationAssemblerProcessor() assembly_input = TranscriptDiarizationAssemblerInput( transcript=transcript, diarization=diarization_result.diarization ) # Track assembled result assembled_results = [] async def capture_result(result): assembled_results.append(result) assembler.on(capture_result) await assembler.push(assembly_input) # Verify the full pipeline worked assert len(assembled_results) == 1 final_transcript = assembled_results[0] # Verify the final transcript has the original words with updated speaker info assert isinstance(final_transcript, Transcript) assert len(final_transcript.words) == len(transcript.words) assert len(final_transcript.words) > 0 # Verify some words have been assigned speakers from diarization speakers_found = set(word.speaker for word in final_transcript.words) assert len(speakers_found) > 0 # At least some speaker assignments