mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 20:59:05 +00:00
* fix: refactor modal API key configuration for better separation of concerns - Split generic MODAL_API_KEY into service-specific keys: - TRANSCRIPT_API_KEY for transcription service - DIARIZATION_API_KEY for diarization service - TRANSLATE_API_KEY for translation service - Remove deprecated *_MODAL_API_KEY settings - Add proper validation to ensure URLs are set when using modal processors - Update README with new configuration format BREAKING CHANGE: Configuration keys have changed. Update your .env file: - TRANSCRIPT_MODAL_API_KEY → TRANSCRIPT_API_KEY - LLM_MODAL_API_KEY → (removed, use TRANSCRIPT_API_KEY) - Add DIARIZATION_API_KEY and TRANSLATE_API_KEY if using those services * fix: update Modal backend configuration to use service-specific API keys - Changed from generic MODAL_API_KEY to service-specific keys: - TRANSCRIPT_MODAL_API_KEY for transcription - DIARIZATION_MODAL_API_KEY for diarization - TRANSLATION_MODAL_API_KEY for translation - Updated audio_transcript_modal.py and audio_diarization_modal.py to use modal_api_key parameter - Updated documentation in README.md, CLAUDE.md, and env.example * feat: implement auto/modal pattern for translation processor - Created TranscriptTranslatorAutoProcessor following the same pattern as transcript/diarization - Created TranscriptTranslatorModalProcessor with TRANSLATION_MODAL_API_KEY support - Added TRANSLATION_BACKEND setting (defaults to "modal") - Updated all imports to use TranscriptTranslatorAutoProcessor instead of TranscriptTranslatorProcessor - Updated env.example with TRANSLATION_BACKEND and TRANSLATION_MODAL_API_KEY - Updated test to expect TranscriptTranslatorModalProcessor name - All tests passing * refactor: simplify transcript_translator base class to match other processors - Moved all implementation from base class to modal processor - Base class now only defines abstract _translate method - Follows the same minimal pattern as audio_diarization and audio_transcript base classes - Updated test mock to use _translate instead of get_translation - All tests passing * chore: clean up settings and improve type annotations - Remove deprecated generic API key variables from settings - Add comments to group Modal-specific settings - Improve type annotations for modal_api_key parameters * fix: typing * fix: passing key to openai * test: fix rtc test failing due to change on transcript It also correctly setup database from sqlite, in case our configuration is setup to postgres. * ci: deactivate translation backend by default * test: fix modal->mock * refactor: implementing igor review, mock to passthrough
67 lines
2.1 KiB
Python
67 lines
2.1 KiB
Python
"""
|
|
Implementation using the GPU service from modal.com
|
|
|
|
API will be a POST request to TRANSCRIPT_URL:
|
|
|
|
```form
|
|
"timestamp": 123.456
|
|
"source_language": "en"
|
|
"target_language": "en"
|
|
"file": <audio file>
|
|
```
|
|
|
|
"""
|
|
|
|
from openai import AsyncOpenAI
|
|
|
|
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
|
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
|
from reflector.processors.types import AudioFile, Transcript, Word
|
|
from reflector.settings import settings
|
|
|
|
|
|
class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|
def __init__(self, modal_api_key: str | None = None, **kwargs):
|
|
super().__init__()
|
|
if not settings.TRANSCRIPT_URL:
|
|
raise Exception(
|
|
"TRANSCRIPT_URL required to use AudioTranscriptModalProcessor"
|
|
)
|
|
self.transcript_url = settings.TRANSCRIPT_URL + "/v1"
|
|
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
|
self.modal_api_key = modal_api_key
|
|
|
|
async def _transcript(self, data: AudioFile):
|
|
async with AsyncOpenAI(
|
|
base_url=self.transcript_url,
|
|
api_key=self.modal_api_key,
|
|
timeout=self.timeout,
|
|
) as client:
|
|
self.logger.debug(f"Try to transcribe audio {data.name}")
|
|
|
|
audio_file = open(data.path, "rb")
|
|
transcription = await client.audio.transcriptions.create(
|
|
file=audio_file,
|
|
model="whisper-1",
|
|
response_format="verbose_json",
|
|
language=self.get_pref("audio:source_language", "en"),
|
|
timestamp_granularities=["word"],
|
|
)
|
|
self.logger.debug(f"Transcription: {transcription}")
|
|
transcript = Transcript(
|
|
words=[
|
|
Word(
|
|
text=word.word,
|
|
start=word.start,
|
|
end=word.end,
|
|
)
|
|
for word in transcription.words
|
|
],
|
|
)
|
|
transcript.add_offset(data.timestamp)
|
|
|
|
return transcript
|
|
|
|
|
|
AudioTranscriptAutoProcessor.register("modal", AudioTranscriptModalProcessor)
|