mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-22 05:09:05 +00:00
translation update
This commit is contained in:
@@ -11,13 +11,15 @@ API will be a POST request to TRANSCRIPT_URL:
|
||||
|
||||
"""
|
||||
|
||||
from time import monotonic
|
||||
|
||||
import httpx
|
||||
|
||||
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
||||
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
||||
from reflector.processors.types import AudioFile, Transcript, Word
|
||||
from reflector.processors.types import AudioFile, Transcript, TranslationLanguages, Word
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.retry import retry
|
||||
from time import monotonic
|
||||
import httpx
|
||||
|
||||
|
||||
class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
||||
@@ -28,6 +30,7 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
||||
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {modal_api_key}",
|
||||
# "Content-Type": "multipart/form-data"
|
||||
}
|
||||
|
||||
async def _warmup(self):
|
||||
@@ -52,11 +55,28 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
||||
files = {
|
||||
"file": (data.name, data.fd),
|
||||
}
|
||||
# TODO: Get the source / target language from the UI preferences dynamically
|
||||
# like context, session objects
|
||||
source_language = "en"
|
||||
target_language = "fr"
|
||||
languages = TranslationLanguages()
|
||||
|
||||
# Only way to set the target should be the UI element like dropdown.
|
||||
# Hence, this assert should never fail.
|
||||
assert languages.is_supported(target_language)
|
||||
data = {
|
||||
"source_language": source_language,
|
||||
"target_language": target_language,
|
||||
}
|
||||
|
||||
print("TRYING TO TRANSCRIBE")
|
||||
|
||||
response = await retry(client.post)(
|
||||
self.transcript_url,
|
||||
files=files,
|
||||
timeout=self.timeout,
|
||||
headers=self.headers,
|
||||
# data=data
|
||||
)
|
||||
|
||||
self.logger.debug(
|
||||
@@ -64,8 +84,14 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Sanity check for translation status in result
|
||||
if "target_language" in result["text"]:
|
||||
text = result["text"]["target_language"]
|
||||
else:
|
||||
text = result["text"]["en"]
|
||||
transcript = Transcript(
|
||||
text=result["text"],
|
||||
text=text,
|
||||
words=[
|
||||
Word(
|
||||
text=word["text"],
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from pydantic import BaseModel, PrivateAttr
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import io
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseModel, PrivateAttr
|
||||
|
||||
|
||||
class AudioFile(BaseModel):
|
||||
@@ -104,3 +105,117 @@ class TitleSummary(BaseModel):
|
||||
class FinalSummary(BaseModel):
|
||||
summary: str
|
||||
duration: float
|
||||
|
||||
|
||||
class TranslationLanguages(BaseModel):
|
||||
language_to_id_mapping: dict = {
|
||||
"Afrikaans": "af",
|
||||
"Albanian": "sq",
|
||||
"Amharic": "am",
|
||||
"Arabic": "ar",
|
||||
"Armenian": "hy",
|
||||
"Asturian": "ast",
|
||||
"Azerbaijani": "az",
|
||||
"Bashkir": "ba",
|
||||
"Belarusian": "be",
|
||||
"Bengali": "bn",
|
||||
"Bosnian": "bs",
|
||||
"Breton": "br",
|
||||
"Bulgarian": "bg",
|
||||
"Burmese": "my",
|
||||
"Catalan; Valencian": "ca",
|
||||
"Cebuano": "ceb",
|
||||
"Central Khmer": "km",
|
||||
"Chinese": "zh",
|
||||
"Croatian": "hr",
|
||||
"Czech": "cs",
|
||||
"Danish": "da",
|
||||
"Dutch; Flemish": "nl",
|
||||
"English": "en",
|
||||
"Estonian": "et",
|
||||
"Finnish": "fi",
|
||||
"French": "fr",
|
||||
"Fulah": "ff",
|
||||
"Gaelic; Scottish Gaelic": "gd",
|
||||
"Galician": "gl",
|
||||
"Ganda": "lg",
|
||||
"Georgian": "ka",
|
||||
"German": "de",
|
||||
"Greeek": "el",
|
||||
"Gujarati": "gu",
|
||||
"Haitian; Haitian Creole": "ht",
|
||||
"Hausa": "ha",
|
||||
"Hebrew": "he",
|
||||
"Hindi": "hi",
|
||||
"Hungarian": "hu",
|
||||
"Icelandic": "is",
|
||||
"Igbo": "ig",
|
||||
"Iloko": "ilo",
|
||||
"Indonesian": "id",
|
||||
"Irish": "ga",
|
||||
"Italian": "it",
|
||||
"Japanese": "ja",
|
||||
"Javanese": "jv",
|
||||
"Kannada": "kn",
|
||||
"Kazakh": "kk",
|
||||
"Korean": "ko",
|
||||
"Lao": "lo",
|
||||
"Latvian": "lv",
|
||||
"Lingala": "ln",
|
||||
"Lithuanian": "lt",
|
||||
"Luxembourgish; Letzeburgesch": "lb",
|
||||
"Macedonian": "mk",
|
||||
"Malagasy": "mg",
|
||||
"Malay": "ms",
|
||||
"Malayalam": "ml",
|
||||
"Marathi": "mr",
|
||||
"Mongolian": "mn",
|
||||
"Nepali": "ne",
|
||||
"Northern Sotho": "ns",
|
||||
"Norwegian": "no",
|
||||
"Occitan": "oc",
|
||||
"Oriya": "or",
|
||||
"Panjabi; Punjabi": "pa",
|
||||
"Persian": "fa",
|
||||
"Polish": "pl",
|
||||
"Portuguese": "pt",
|
||||
"Pushto; Pashto": "ps",
|
||||
"Romanian; Moldavian; Moldovan": "ro",
|
||||
"Russian": "ru",
|
||||
"Serbian": "sr",
|
||||
"Sindhi": "sd",
|
||||
"Sinhala; Sinhalese": "si",
|
||||
"Slovak": "sk",
|
||||
"Slovenian": "sl",
|
||||
"Somali": "so",
|
||||
"Spanish": "es",
|
||||
"Sundanese": "su",
|
||||
"Swahili": "sw",
|
||||
"Swati": "ss",
|
||||
"Swedish": "sv",
|
||||
"Tagalog": "tl",
|
||||
"Tamil": "ta",
|
||||
"Thai": "th",
|
||||
"Tswana": "tn",
|
||||
"Turkish": "tr",
|
||||
"Ukrainian": "uk",
|
||||
"Urdu": "ur",
|
||||
"Uzbek": "uz",
|
||||
"Vietnamese": "vi",
|
||||
"Welsh": "cy",
|
||||
"Western Frisian": "fy",
|
||||
"Wolof": "wo",
|
||||
"Xhosa": "xh",
|
||||
"Yiddish": "yi",
|
||||
"Yoruba": "yo",
|
||||
"Zulu": "zu",
|
||||
}
|
||||
|
||||
@property
|
||||
def supported_languages(self):
|
||||
return self.language_to_id_mapping.values()
|
||||
|
||||
def is_supported(self, lang_id: str) -> bool:
|
||||
if lang_id in self.supported_languages:
|
||||
return True
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user