Translation enhancements (#247)

This commit is contained in:
projects-g
2023-09-26 19:49:54 +05:30
committed by GitHub
parent 4dbec9b154
commit 6a43297309
11 changed files with 303 additions and 126 deletions

View File

@@ -13,6 +13,7 @@ from .transcript_final_short_summary import ( # noqa: F401
from .transcript_final_title import TranscriptFinalTitleProcessor # noqa: F401
from .transcript_liner import TranscriptLinerProcessor # noqa: F401
from .transcript_topic_detector import TranscriptTopicDetectorProcessor # noqa: F401
from .transcript_translator import TranscriptTranslatorProcessor # noqa: F401
from .types import ( # noqa: F401
AudioFile,
FinalLongSummary,

View File

@@ -18,7 +18,7 @@ import httpx
from reflector.processors.audio_transcript import AudioTranscriptProcessor
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
from reflector.processors.types import AudioFile, Transcript, TranslationLanguages, Word
from reflector.processors.types import AudioFile, Transcript, Word
from reflector.settings import settings
from reflector.utils.retry import retry
@@ -53,21 +53,8 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
files = {
"file": (data.name, data.fd),
}
# FIXME this should be a processor after, as each user may want
# different languages
source_language = self.get_pref("audio:source_language", "en")
target_language = self.get_pref("audio:target_language", "en")
languages = TranslationLanguages()
# Only way to set the target should be the UI element like dropdown.
# Hence, this assert should never fail.
assert languages.is_supported(target_language)
json_payload = {
"source_language": source_language,
"target_language": target_language,
}
json_payload = {"source_language": source_language}
response = await retry(client.post)(
self.transcript_url,
files=files,
@@ -81,16 +68,10 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
)
response.raise_for_status()
result = response.json()
# Sanity check for translation status in the result
translation = None
if source_language != target_language and target_language in result["text"]:
translation = result["text"][target_language]
text = result["text"][source_language]
text = self.filter_profanity(text)
transcript = Transcript(
text=text,
translation=translation,
words=[
Word(
text=word["text"],

View File

@@ -16,29 +16,35 @@ class TranscriptLinerProcessor(Processor):
self.transcript = Transcript(words=[])
self.max_text = max_text
def is_sentence_terminated(self, sentence) -> bool:
sentence_terminators = [".", "?", "!"]
for terminator in sentence_terminators:
if terminator in sentence:
return True
return False
async def _push(self, data: Transcript):
# merge both transcript
self.transcript.merge(data)
# check if a line is complete
if "." not in self.transcript.text:
if not self.is_sentence_terminated(self.transcript.text):
# if the transcription text is still not too long, wait for more
if len(self.transcript.text) < self.max_text:
return
# cut to the next .
partial = Transcript(translation=self.transcript.translation, words=[])
partial = Transcript(words=[])
for word in self.transcript.words[:]:
partial.text += word.text
partial.words.append(word)
if "." not in word.text:
if not self.is_sentence_terminated(word.text):
continue
# emit line
await self.emit(partial)
# create new transcript
partial = Transcript(translation=self.transcript.translation, words=[])
partial = Transcript(words=[])
self.transcript = partial

View File

@@ -0,0 +1,88 @@
from time import monotonic
import httpx
from reflector.processors.base import Processor
from reflector.processors.types import Transcript, TranslationLanguages
from reflector.settings import settings
from reflector.utils.retry import retry
class TranscriptTranslatorProcessor(Processor):
"""
Translate the transcript into the target language
"""
INPUT_TYPE = Transcript
OUTPUT_TYPE = Transcript
TASK = "translate"
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.transcript_url = settings.TRANSCRIPT_URL
self.timeout = settings.TRANSCRIPT_TIMEOUT
self.headers = {"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}"}
async def _warmup(self):
try:
async with httpx.AsyncClient() as client:
start = monotonic()
self.logger.debug("Translate modal: warming up...")
response = await client.post(
settings.TRANSCRIPT_URL + "/warmup",
headers=self.headers,
timeout=self.timeout,
)
response.raise_for_status()
duration = monotonic() - start
self.logger.debug(f"Translate modal: warmup took {duration:.2f}s")
except Exception:
self.logger.exception("Translate modal: warmup failed")
async def _push(self, data: Transcript):
self.transcript = data
await self.flush()
async def get_translation(self, text: str) -> str:
self.logger.debug(f"Try to translate {text=}")
# FIXME this should be a processor after, as each user may want
# different languages
source_language = self.get_pref("audio:source_language", "en")
target_language = self.get_pref("audio:target_language", "en")
languages = TranslationLanguages()
# Only way to set the target should be the UI element like dropdown.
# Hence, this assert should never fail.
assert languages.is_supported(target_language)
assert target_language != source_language
source_language = self.get_pref("audio:source_language", "en")
target_language = self.get_pref("audio:target_language", "en")
json_payload = {
"text": text,
"source_language": source_language,
"target_language": target_language,
}
translation = None
async with httpx.AsyncClient() as client:
response = await retry(client.post)(
settings.TRANSCRIPT_URL + "/translate",
headers=self.headers,
params=json_payload,
timeout=self.timeout,
)
response.raise_for_status()
result = response.json()["text"]
# Sanity check for translation status in the result
if source_language != target_language and target_language in result:
translation = result[target_language]
self.logger.debug(f"Translation response: {text=}, {translation=}")
return translation
async def _flush(self):
if not self.transcript:
return
translation = await self.get_translation(text=self.transcript.text)
self.transcript.translation = translation
await self.emit(self.transcript)