""" Tests for GPU Modal transcription endpoints. These tests are marked with the "gpu-modal" group and will not run by default. Run them with: pytest -m gpu-modal tests/test_gpu_modal_transcript_parakeet.py Required environment variables: - TRANSCRIPT_URL: URL to the Modal.com endpoint (required) - TRANSCRIPT_MODAL_API_KEY: API key for authentication (optional) - TRANSCRIPT_MODEL: Model name to use (optional, defaults to nvidia/parakeet-tdt-0.6b-v2) Example with pytest (override default addopts to run ONLY gpu_modal tests): TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-parakeet-web-dev.modal.run \ TRANSCRIPT_MODAL_API_KEY=your-api-key \ uv run -m pytest -m gpu_modal --no-cov tests/test_gpu_modal_transcript.py # Or with completely clean options: uv run -m pytest -m gpu_modal -o addopts="" tests/ Running Modal locally for testing: modal serve gpu/modal_deployments/reflector_transcriber_parakeet.py # This will give you a local URL like https://xxxxx--reflector-transcriber-parakeet-web-dev.modal.run to test against """ import os import tempfile from pathlib import Path import httpx import pytest # Test audio file URL for testing TEST_AUDIO_URL = ( "https://reflector-github-pytest.s3.us-east-1.amazonaws.com/test_mathieu_hello.mp3" ) def get_modal_transcript_url(): """Get and validate the Modal transcript URL from environment.""" url = os.environ.get("TRANSCRIPT_URL") if not url: pytest.skip( "TRANSCRIPT_URL environment variable is required for GPU Modal tests" ) return url def get_auth_headers(): """Get authentication headers if API key is available.""" api_key = os.environ.get("TRANSCRIPT_MODAL_API_KEY") if api_key: return {"Authorization": f"Bearer {api_key}"} return {} def get_model_name(): """Get the model name from environment or use default.""" return os.environ.get("TRANSCRIPT_MODEL", "nvidia/parakeet-tdt-0.6b-v2") @pytest.mark.gpu_modal class TestGPUModalTranscript: """Test suite for GPU Modal transcription endpoints.""" def test_transcriptions_from_url(self): """Test the /v1/audio/transcriptions-from-url endpoint.""" url = get_modal_transcript_url() headers = get_auth_headers() with httpx.Client(timeout=60.0) as client: response = client.post( f"{url}/v1/audio/transcriptions-from-url", json={ "audio_file_url": TEST_AUDIO_URL, "model": get_model_name(), "language": "en", "timestamp_offset": 0.0, }, headers=headers, ) assert response.status_code == 200, f"Request failed: {response.text}" result = response.json() # Verify response structure assert "text" in result assert "words" in result assert isinstance(result["text"], str) assert isinstance(result["words"], list) # Verify content is meaningful assert len(result["text"]) > 0, "Transcript text should not be empty" assert len(result["words"]) > 0, "Words list must not be empty" # Verify word structure for word in result["words"]: assert "word" in word assert "start" in word assert "end" in word assert isinstance(word["start"], (int, float)) assert isinstance(word["end"], (int, float)) assert word["start"] <= word["end"] def test_transcriptions_single_file(self): """Test the /v1/audio/transcriptions endpoint with a single file.""" url = get_modal_transcript_url() headers = get_auth_headers() # Download test audio file to upload with httpx.Client(timeout=60.0) as client: audio_response = client.get(TEST_AUDIO_URL) audio_response.raise_for_status() with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file: tmp_file.write(audio_response.content) tmp_file_path = tmp_file.name try: # Upload the file for transcription with open(tmp_file_path, "rb") as f: files = {"file": ("test_audio.mp3", f, "audio/mpeg")} data = { "model": get_model_name(), "language": "en", "batch": "false", } response = client.post( f"{url}/v1/audio/transcriptions", files=files, data=data, headers=headers, ) assert response.status_code == 200, f"Request failed: {response.text}" result = response.json() # Verify response structure for single file assert "text" in result assert "words" in result assert "filename" in result assert isinstance(result["text"], str) assert isinstance(result["words"], list) # Verify content assert len(result["text"]) > 0, "Transcript text should not be empty" finally: Path(tmp_file_path).unlink(missing_ok=True) def test_transcriptions_multiple_files(self): """Test the /v1/audio/transcriptions endpoint with multiple files (non-batch mode).""" url = get_modal_transcript_url() headers = get_auth_headers() # Create multiple test files (we'll use the same audio content for simplicity) with httpx.Client(timeout=60.0) as client: audio_response = client.get(TEST_AUDIO_URL) audio_response.raise_for_status() audio_content = audio_response.content temp_files = [] try: # Create 3 temporary files for i in range(3): tmp_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) tmp_file.write(audio_content) tmp_file.close() temp_files.append(tmp_file.name) # Upload multiple files for transcription (non-batch) files = [ ("files", (f"test_audio_{i}.mp3", open(f, "rb"), "audio/mpeg")) for i, f in enumerate(temp_files) ] data = { "model": get_model_name(), "language": "en", "batch": "false", } response = client.post( f"{url}/v1/audio/transcriptions", files=files, data=data, headers=headers, ) # Close file handles for _, file_tuple in files: file_tuple[1].close() assert response.status_code == 200, f"Request failed: {response.text}" result = response.json() # Verify response structure for multiple files (non-batch) assert "results" in result assert isinstance(result["results"], list) assert len(result["results"]) == 3 for idx, file_result in enumerate(result["results"]): assert "text" in file_result assert "words" in file_result assert "filename" in file_result assert isinstance(file_result["text"], str) assert isinstance(file_result["words"], list) assert len(file_result["text"]) > 0 finally: for f in temp_files: Path(f).unlink(missing_ok=True) def test_transcriptions_multiple_files_batch(self): """Test the /v1/audio/transcriptions endpoint with multiple files in batch mode.""" url = get_modal_transcript_url() headers = get_auth_headers() # Create multiple test files with httpx.Client(timeout=60.0) as client: audio_response = client.get(TEST_AUDIO_URL) audio_response.raise_for_status() audio_content = audio_response.content temp_files = [] try: # Create 3 temporary files for i in range(3): tmp_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) tmp_file.write(audio_content) tmp_file.close() temp_files.append(tmp_file.name) # Upload multiple files for batch transcription files = [ ("files", (f"test_audio_{i}.mp3", open(f, "rb"), "audio/mpeg")) for i, f in enumerate(temp_files) ] data = { "model": get_model_name(), "language": "en", "batch": "true", } response = client.post( f"{url}/v1/audio/transcriptions", files=files, data=data, headers=headers, ) # Close file handles for _, file_tuple in files: file_tuple[1].close() assert response.status_code == 200, f"Request failed: {response.text}" result = response.json() # Verify response structure for batch mode assert "results" in result assert isinstance(result["results"], list) assert len(result["results"]) == 3 for idx, batch_result in enumerate(result["results"]): assert "text" in batch_result assert "words" in batch_result assert "filename" in batch_result assert isinstance(batch_result["text"], str) assert isinstance(batch_result["words"], list) assert len(batch_result["text"]) > 0 finally: for f in temp_files: Path(f).unlink(missing_ok=True) def test_transcriptions_error_handling(self): """Test error handling for invalid requests.""" url = get_modal_transcript_url() headers = get_auth_headers() with httpx.Client(timeout=60.0) as client: # Test with unsupported language response = client.post( f"{url}/v1/audio/transcriptions-from-url", json={ "audio_file_url": TEST_AUDIO_URL, "model": get_model_name(), "language": "fr", # Parakeet only supports English "timestamp_offset": 0.0, }, headers=headers, ) assert response.status_code == 400 assert "only supports English" in response.text def test_transcriptions_with_timestamp_offset(self): """Test transcription with timestamp offset parameter.""" url = get_modal_transcript_url() headers = get_auth_headers() with httpx.Client(timeout=60.0) as client: # Test with timestamp offset response = client.post( f"{url}/v1/audio/transcriptions-from-url", json={ "audio_file_url": TEST_AUDIO_URL, "model": get_model_name(), "language": "en", "timestamp_offset": 10.0, # Add 10 second offset }, headers=headers, ) assert response.status_code == 200, f"Request failed: {response.text}" result = response.json() # Verify response structure assert "text" in result assert "words" in result assert len(result["words"]) > 0, "Words list must not be empty" # Verify that timestamps have been offset for word in result["words"]: # All timestamps should be >= 10.0 due to offset assert ( word["start"] >= 10.0 ), f"Word start time {word['start']} should be >= 10.0" assert ( word["end"] >= 10.0 ), f"Word end time {word['end']} should be >= 10.0"