diff --git a/server/gpu/modal/reflector_transcriber.py b/server/gpu/modal/reflector_transcriber.py index 335342b2..55df052b 100644 --- a/server/gpu/modal/reflector_transcriber.py +++ b/server/gpu/modal/reflector_transcriber.py @@ -99,13 +99,13 @@ class Whisper: ) multilingual_transcript = {} - transcript_en = "" + transcript_source_lang = "" words = [] if segments: segments = list(segments) for segment in segments: - transcript_en += segment.text + transcript_source_lang += segment.text for word in segment.words: words.append( { @@ -115,12 +115,12 @@ class Whisper: } ) - multilingual_transcript["en"] = transcript_en + multilingual_transcript[source_language] = transcript_source_lang - if target_language != "en": + if target_language != source_language: self.translation_tokenizer.src_lang = source_language forced_bos_token_id = self.translation_tokenizer.get_lang_id(target_language) - encoded_transcript = self.translation_tokenizer(transcript_en, return_tensors="pt").to(self.device) + encoded_transcript = self.translation_tokenizer(transcript_source_lang, return_tensors="pt").to(self.device) generated_tokens = self.translation_model.generate( **encoded_transcript, forced_bos_token_id=forced_bos_token_id diff --git a/server/reflector/processors/audio_transcript_modal.py b/server/reflector/processors/audio_transcript_modal.py index f017240e..80b6e582 100644 --- a/server/reflector/processors/audio_transcript_modal.py +++ b/server/reflector/processors/audio_transcript_modal.py @@ -29,10 +29,7 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor): self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe" self.warmup_url = settings.TRANSCRIPT_URL + "/warmup" self.timeout = settings.TRANSCRIPT_TIMEOUT - self.headers = { - "Authorization": f"Bearer {modal_api_key}", - # "Content-Type": "multipart/form-data" - } + self.headers = {"Authorization": f"Bearer {modal_api_key}"} async def _warmup(self): try: @@ -90,7 +87,7 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor): if target_language in result["text"]: text = result["text"][target_language] else: - text = result["text"]["en"] + text = result["text"][source_language] transcript = Transcript( text=text, words=[