Translation enhancements (#247)

2026-02-05 02:16:46 +00:00 · 2023-09-26 19:49:54 +05:30
parent 4dbec9b154
commit 6a43297309
11 changed files with 303 additions and 126 deletions
--- a/server/reflector/processors/init.py
+++ b/server/reflector/processors/init.py
@@ -13,6 +13,7 @@ from .transcript_final_short_summary import (  # noqa: F401
 from .transcript_final_title import TranscriptFinalTitleProcessor  # noqa: F401
 from .transcript_liner import TranscriptLinerProcessor  # noqa: F401
 from .transcript_topic_detector import TranscriptTopicDetectorProcessor  # noqa: F401
+from .transcript_translator import TranscriptTranslatorProcessor  # noqa: F401
 from .types import (  # noqa: F401
    AudioFile,
    FinalLongSummary,
--- a/server/reflector/processors/audio_transcript_modal.py
+++ b/server/reflector/processors/audio_transcript_modal.py
@@ -18,7 +18,7 @@ import httpx

 from reflector.processors.audio_transcript import AudioTranscriptProcessor
 from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
-from reflector.processors.types import AudioFile, Transcript, TranslationLanguages, Word
+from reflector.processors.types import AudioFile, Transcript, Word
 from reflector.settings import settings
 from reflector.utils.retry import retry

@@ -53,21 +53,8 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
            files = {
                "file": (data.name, data.fd),
            }
-
-            # FIXME this should be a processor after, as each user may want
-            # different languages
            source_language = self.get_pref("audio:source_language", "en")
-            target_language = self.get_pref("audio:target_language", "en")
-            languages = TranslationLanguages()
-
-            # Only way to set the target should be the UI element like dropdown.
-            # Hence, this assert should never fail.
-            assert languages.is_supported(target_language)
-            json_payload = {
-                "source_language": source_language,
-                "target_language": target_language,
-            }
-
+            json_payload = {"source_language": source_language}
            response = await retry(client.post)(
                self.transcript_url,
                files=files,
@@ -81,16 +68,10 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
            )
            response.raise_for_status()
            result = response.json()
-
-            # Sanity check for translation status in the result
-            translation = None
-            if source_language != target_language and target_language in result["text"]:
-                translation = result["text"][target_language]
            text = result["text"][source_language]
            text = self.filter_profanity(text)
            transcript = Transcript(
                text=text,
-                translation=translation,
                words=[
                    Word(
                        text=word["text"],
--- a/server/reflector/processors/transcript_liner.py
+++ b/server/reflector/processors/transcript_liner.py
@@ -16,29 +16,35 @@ class TranscriptLinerProcessor(Processor):
        self.transcript = Transcript(words=[])
        self.max_text = max_text

+    def is_sentence_terminated(self, sentence) -> bool:
+        sentence_terminators = [".", "?", "!"]
+        for terminator in sentence_terminators:
+            if terminator in sentence:
+                return True
+        return False
+
    async def _push(self, data: Transcript):
        # merge both transcript
        self.transcript.merge(data)

        # check if a line is complete
-        if "." not in self.transcript.text:
+        if not self.is_sentence_terminated(self.transcript.text):
            # if the transcription text is still not too long, wait for more
            if len(self.transcript.text) < self.max_text:
                return

        # cut to the next .
-        partial = Transcript(translation=self.transcript.translation, words=[])
+        partial = Transcript(words=[])
        for word in self.transcript.words[:]:
            partial.text += word.text
            partial.words.append(word)
-            if "." not in word.text:
+            if not self.is_sentence_terminated(word.text):
                continue

            # emit line
            await self.emit(partial)
-
            # create new transcript
-            partial = Transcript(translation=self.transcript.translation, words=[])
+            partial = Transcript(words=[])

        self.transcript = partial

--- a/server/reflector/processors/transcript_translator.py
+++ b/server/reflector/processors/transcript_translator.py
@@ -0,0 +1,88 @@
+from time import monotonic
+
+import httpx
+
+from reflector.processors.base import Processor
+from reflector.processors.types import Transcript, TranslationLanguages
+from reflector.settings import settings
+from reflector.utils.retry import retry
+
+
+class TranscriptTranslatorProcessor(Processor):
+    """
+    Translate the transcript into the target language
+    """
+
+    INPUT_TYPE = Transcript
+    OUTPUT_TYPE = Transcript
+    TASK = "translate"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.transcript_url = settings.TRANSCRIPT_URL
+        self.timeout = settings.TRANSCRIPT_TIMEOUT
+        self.headers = {"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}"}
+
+    async def _warmup(self):
+        try:
+            async with httpx.AsyncClient() as client:
+                start = monotonic()
+                self.logger.debug("Translate modal: warming up...")
+                response = await client.post(
+                    settings.TRANSCRIPT_URL + "/warmup",
+                    headers=self.headers,
+                    timeout=self.timeout,
+                )
+                response.raise_for_status()
+                duration = monotonic() - start
+                self.logger.debug(f"Translate modal: warmup took {duration:.2f}s")
+        except Exception:
+            self.logger.exception("Translate modal: warmup failed")
+
+    async def _push(self, data: Transcript):
+        self.transcript = data
+        await self.flush()
+
+    async def get_translation(self, text: str) -> str:
+        self.logger.debug(f"Try to translate {text=}")
+        # FIXME this should be a processor after, as each user may want
+        # different languages
+        source_language = self.get_pref("audio:source_language", "en")
+        target_language = self.get_pref("audio:target_language", "en")
+
+        languages = TranslationLanguages()
+
+        # Only way to set the target should be the UI element like dropdown.
+        # Hence, this assert should never fail.
+        assert languages.is_supported(target_language)
+        assert target_language != source_language
+        source_language = self.get_pref("audio:source_language", "en")
+        target_language = self.get_pref("audio:target_language", "en")
+        json_payload = {
+            "text": text,
+            "source_language": source_language,
+            "target_language": target_language,
+        }
+        translation = None
+        async with httpx.AsyncClient() as client:
+            response = await retry(client.post)(
+                settings.TRANSCRIPT_URL + "/translate",
+                headers=self.headers,
+                params=json_payload,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            result = response.json()["text"]
+
+            # Sanity check for translation status in the result
+            if source_language != target_language and target_language in result:
+                translation = result[target_language]
+            self.logger.debug(f"Translation response: {text=}, {translation=}")
+        return translation
+
+    async def _flush(self):
+        if not self.transcript:
+            return
+        translation = await self.get_translation(text=self.transcript.text)
+        self.transcript.translation = translation
+        await self.emit(self.transcript)