Merge pull request #288 from Monadical-SAS/feat/lang-dropdown

Language codes and translation changes
This commit is contained in:
Sara
2023-10-15 15:42:54 +02:00
committed by GitHub
17 changed files with 621 additions and 281 deletions

View File

@@ -5,6 +5,7 @@ Reflector GPU backend - LLM
"""
import json
import os
import threading
from typing import Optional
import modal
@@ -67,7 +68,7 @@ llm_image = (
gpu="A100",
timeout=60 * 5,
container_idle_timeout=60 * 5,
concurrency_limit=2,
allow_concurrent_inputs=15,
image=llm_image,
)
class LLM:
@@ -108,6 +109,8 @@ class LLM:
self.gen_cfg = gen_cfg
self.GenerationConfig = GenerationConfig
self.lock = threading.Lock()
def __exit__(self, *args):
print("Exit llm")
@@ -123,30 +126,31 @@ class LLM:
gen_cfg = self.gen_cfg
# If a gen_schema is given, conform to gen_schema
if gen_schema:
import jsonformer
with self.lock:
if gen_schema:
import jsonformer
print(f"Schema {gen_schema=}")
jsonformer_llm = jsonformer.Jsonformer(
model=self.model,
tokenizer=self.tokenizer,
json_schema=json.loads(gen_schema),
prompt=prompt,
max_string_token_length=gen_cfg.max_new_tokens
)
response = jsonformer_llm()
else:
# If no gen_schema, perform prompt only generation
print(f"Schema {gen_schema=}")
jsonformer_llm = jsonformer.Jsonformer(
model=self.model,
tokenizer=self.tokenizer,
json_schema=json.loads(gen_schema),
prompt=prompt,
max_string_token_length=gen_cfg.max_new_tokens
)
response = jsonformer_llm()
else:
# If no gen_schema, perform prompt only generation
# tokenize prompt
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
self.model.device
)
output = self.model.generate(input_ids, generation_config=gen_cfg)
# tokenize prompt
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
self.model.device
)
output = self.model.generate(input_ids, generation_config=gen_cfg)
# decode output
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True)
response = response[len(prompt):]
# decode output
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True)
response = response[len(prompt):]
print(f"Generated {response=}")
return {"text": response}
@@ -158,6 +162,7 @@ class LLM:
@stub.function(
container_idle_timeout=60 * 10,
timeout=60 * 5,
allow_concurrent_inputs=45,
secrets=[
Secret.from_name("reflector-gpu"),
],
@@ -187,7 +192,7 @@ def web():
gen_cfg: Optional[dict] = None
@app.post("/llm", dependencies=[Depends(apikey_auth)])
async def llm(
def llm(
req: LLMRequest,
):
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None

View File

@@ -5,6 +5,7 @@ Reflector GPU backend - LLM
"""
import json
import os
import threading
from typing import Optional
import modal
@@ -67,7 +68,7 @@ llm_image = (
gpu="A10G",
timeout=60 * 5,
container_idle_timeout=60 * 5,
concurrency_limit=2,
allow_concurrent_inputs=10,
image=llm_image,
)
class LLM:
@@ -111,6 +112,7 @@ class LLM:
self.tokenizer = tokenizer
self.gen_cfg = gen_cfg
self.GenerationConfig = GenerationConfig
self.lock = threading.Lock()
def __exit__(self, *args):
print("Exit llm")
@@ -129,33 +131,34 @@ class LLM:
gen_cfg = self.gen_cfg
# If a gen_schema is given, conform to gen_schema
if gen_schema:
import jsonformer
with self.lock:
if gen_schema:
import jsonformer
print(f"Schema {gen_schema=}")
jsonformer_llm = jsonformer.Jsonformer(
model=self.model,
tokenizer=self.tokenizer,
json_schema=json.loads(gen_schema),
prompt=prompt,
max_string_token_length=gen_cfg.max_new_tokens
)
response = jsonformer_llm()
else:
# If no gen_schema, perform prompt only generation
print(f"Schema {gen_schema=}")
jsonformer_llm = jsonformer.Jsonformer(
model=self.model,
tokenizer=self.tokenizer,
json_schema=json.loads(gen_schema),
prompt=prompt,
max_string_token_length=gen_cfg.max_new_tokens
)
response = jsonformer_llm()
else:
# If no gen_schema, perform prompt only generation
# tokenize prompt
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
self.model.device
)
output = self.model.generate(input_ids, generation_config=gen_cfg)
# tokenize prompt
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
self.model.device
)
output = self.model.generate(input_ids, generation_config=gen_cfg)
# decode output
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True)
response = response[len(prompt):]
response = {
"long_summary": response
}
# decode output
response = self.tokenizer.decode(output[0].cpu(), skip_special_tokens=True)
response = response[len(prompt):]
response = {
"long_summary": response
}
print(f"Generated {response=}")
return {"text": response}
@@ -167,6 +170,7 @@ class LLM:
@stub.function(
container_idle_timeout=60 * 10,
timeout=60 * 5,
allow_concurrent_inputs=30,
secrets=[
Secret.from_name("reflector-gpu"),
],
@@ -196,7 +200,7 @@ def web():
gen_cfg: Optional[dict] = None
@app.post("/llm", dependencies=[Depends(apikey_auth)])
async def llm(
def llm(
req: LLMRequest,
):
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None

View File

@@ -5,6 +5,7 @@ Reflector GPU backend - transcriber
import os
import tempfile
import threading
from modal import Image, Secret, Stub, asgi_app, method
from pydantic import BaseModel
@@ -78,6 +79,7 @@ transcriber_image = (
gpu="A10G",
timeout=60 * 5,
container_idle_timeout=60 * 5,
allow_concurrent_inputs=6,
image=transcriber_image,
)
class Transcriber:
@@ -85,6 +87,7 @@ class Transcriber:
import faster_whisper
import torch
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
self.model = faster_whisper.WhisperModel(
@@ -106,14 +109,15 @@ class Transcriber:
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
fp.write(audio_data)
segments, _ = self.model.transcribe(
fp.name,
language=source_language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
with self.lock:
segments, _ = self.model.transcribe(
fp.name,
language=source_language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
multilingual_transcript = {}
transcript_source_lang = ""
@@ -147,6 +151,7 @@ class Transcriber:
@stub.function(
container_idle_timeout=60,
timeout=60,
allow_concurrent_inputs=40,
secrets=[
Secret.from_name("reflector-gpu"),
],
@@ -176,12 +181,12 @@ def web():
result: dict
@app.post("/transcribe", dependencies=[Depends(apikey_auth)])
async def transcribe(
def transcribe(
file: UploadFile,
source_language: Annotated[str, Body(...)] = "eng",
source_language: Annotated[str, Body(...)] = "en",
timestamp: Annotated[float, Body()] = 0.0
) -> TranscriptResponse:
audio_data = await file.read()
audio_data = file.file.read()
audio_suffix = file.filename.split(".")[-1]
assert audio_suffix in supported_audio_file_types

View File

@@ -4,7 +4,7 @@ Reflector GPU backend - transcriber
"""
import os
import tempfile
import threading
from modal import Image, Secret, Stub, asgi_app, method
from pydantic import BaseModel
@@ -26,8 +26,11 @@ stub = Stub(name="reflector-translator")
def install_seamless_communication():
import os
import subprocess
initial_dir = os.getcwd()
subprocess.run(["ssh-keyscan", "-t", "rsa", "github.com", ">>", "~/.ssh/known_hosts"])
subprocess.run(
["ssh-keyscan", "-t", "rsa", "github.com", ">>", "~/.ssh/known_hosts"]
)
subprocess.run(["rm", "-rf", "seamless_communication"])
subprocess.run(["git", "clone", SEAMLESS_GITEPO, "." + "/seamless_communication"])
os.chdir("seamless_communication")
@@ -54,13 +57,13 @@ def configure_seamless_m4t():
ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards"
with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'r') as file:
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'r') as file:
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file:
vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'r') as file:
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file:
unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'r') as file:
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file:
unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
@@ -69,27 +72,33 @@ def configure_seamless_m4t():
model_name = f"multitask_unity_{SEAMLESSM4T_MODEL_SIZE}.pt"
model_path = os.path.join(os.getcwd(), model_dir, latest_model_version, model_name)
vocoder_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-vocoder/snapshots"
vocoder_dir = (
f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-vocoder/snapshots"
)
available_vocoder_versions = os.listdir(vocoder_dir)
latest_vocoder_version = sorted(available_vocoder_versions)[-1]
vocoder_name = "vocoder_36langs.pt"
vocoder_path = os.path.join(os.getcwd(), vocoder_dir, latest_vocoder_version, vocoder_name)
vocoder_path = os.path.join(
os.getcwd(), vocoder_dir, latest_vocoder_version, vocoder_name
)
tokenizer_name = "tokenizer.model"
tokenizer_path = os.path.join(os.getcwd(), model_dir, latest_model_version, tokenizer_name)
tokenizer_path = os.path.join(
os.getcwd(), model_dir, latest_model_version, tokenizer_name
)
model_yaml_data['checkpoint'] = f"file:/{model_path}"
vocoder_yaml_data['checkpoint'] = f"file:/{vocoder_path}"
unity_100_yaml_data['tokenizer'] = f"file:/{tokenizer_path}"
unity_200_yaml_data['tokenizer'] = f"file:/{tokenizer_path}"
model_yaml_data["checkpoint"] = f"file:/{model_path}"
vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}"
unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'w') as file:
with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
yaml.dump(model_yaml_data, file)
with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'w') as file:
with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file:
yaml.dump(vocoder_yaml_data, file)
with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'w') as file:
with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file:
yaml.dump(unity_100_yaml_data, file)
with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'w') as file:
with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file:
yaml.dump(unity_200_yaml_data, file)
@@ -109,7 +118,7 @@ transcriber_image = (
"torchaudio",
"fairseq2",
"pyyaml",
"hf-transfer~=0.1"
"hf-transfer~=0.1",
)
.run_function(install_seamless_communication)
.run_function(download_seamlessm4t_model)
@@ -129,6 +138,7 @@ transcriber_image = (
gpu="A10G",
timeout=60 * 5,
container_idle_timeout=60 * 5,
allow_concurrent_inputs=4,
image=transcriber_image,
)
class Translator:
@@ -136,13 +146,14 @@ class Translator:
import torch
from seamless_communication.models.inference.translator import Translator
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
self.translator = Translator(
SEAMLESSM4T_MODEL_CARD_NAME,
SEAMLESSM4T_VOCODER_CARD_NAME,
torch.device(self.device),
dtype=torch.float32
dtype=torch.float32,
)
@method()
@@ -156,31 +167,212 @@ class Translator:
"""
# TODO: Enhance with complete list of lang codes
seamless_lang_code = {
"en": "eng",
"fr": "fra"
# Afrikaans
'af': 'afr',
# Amharic
'am': 'amh',
# Modern Standard Arabic
'ar': 'arb',
# Moroccan Arabic
'ary': 'ary',
# Egyptian Arabic
'arz': 'arz',
# Assamese
'as': 'asm',
# North Azerbaijani
'az': 'azj',
# Belarusian
'be': 'bel',
# Bengali
'bn': 'ben',
# Bosnian
'bs': 'bos',
# Bulgarian
'bg': 'bul',
# Catalan
'ca': 'cat',
# Cebuano
'ceb': 'ceb',
# Czech
'cs': 'ces',
# Central Kurdish
'ku': 'ckb',
# Mandarin Chinese
'cmn': 'cmn_Hant',
# Welsh
'cy': 'cym',
# Danish
'da': 'dan',
# German
'de': 'deu',
# Greek
'el': 'ell',
# English
'en': 'eng',
# Estonian
'et': 'est',
# Basque
'eu': 'eus',
# Finnish
'fi': 'fin',
# French
'fr': 'fra',
# Irish
'ga': 'gle',
# West Central Oromo,
'gaz': 'gaz',
# Galician
'gl': 'glg',
# Gujarati
'gu': 'guj',
# Hebrew
'he': 'heb',
# Hindi
'hi': 'hin',
# Croatian
'hr': 'hrv',
# Hungarian
'hu': 'hun',
# Armenian
'hy': 'hye',
# Igbo
'ig': 'ibo',
# Indonesian
'id': 'ind',
# Icelandic
'is': 'isl',
# Italian
'it': 'ita',
# Javanese
'jv': 'jav',
# Japanese
'ja': 'jpn',
# Kannada
'kn': 'kan',
# Georgian
'ka': 'kat',
# Kazakh
'kk': 'kaz',
# Halh Mongolian
'khk': 'khk',
# Khmer
'km': 'khm',
# Kyrgyz
'ky': 'kir',
# Korean
'ko': 'kor',
# Lao
'lo': 'lao',
# Lithuanian
'lt': 'lit',
# Ganda
'lg': 'lug',
# Luo
'luo': 'luo',
# Standard Latvian
'lv': 'lvs',
# Maithili
'mai': 'mai',
# Malayalam
'ml': 'mal',
# Marathi
'mr': 'mar',
# Macedonian
'mk': 'mkd',
# Maltese
'mt': 'mlt',
# Meitei
'mni': 'mni',
# Burmese
'my': 'mya',
# Dutch
'nl': 'nld',
# Norwegian Nynorsk
'nn': 'nno',
# Norwegian Bokmål
'nb': 'nob',
# Nepali
'ne': 'npi',
# Nyanja
'ny': 'nya',
# Odia
'or': 'ory',
# Punjabi
'pa': 'pan',
# Southern Pashto
'pbt': 'pbt',
# Western Persian
'pes': 'pes',
# Polish
'pl': 'pol',
# Portuguese
'pt': 'por',
# Romanian
'ro': 'ron',
# Russian
'ru': 'rus',
# Slovak
'sk': 'slk',
# Slovenian
'sl': 'slv',
# Shona
'sn': 'sna',
# Sindhi
'sd': 'snd',
# Somali
'so': 'som',
# Spanish
'es': 'spa',
# Serbian
'sr': 'srp',
# Swedish
'sv': 'swe',
# Swahili
'sw': 'swh',
# Tamil
'ta': 'tam',
# Telugu
'te': 'tel',
# Tajik
'tg': 'tgk',
# Tagalog
'tl': 'tgl',
# Thai
'th': 'tha',
# Turkish
'tr': 'tur',
# Ukrainian
'uk': 'ukr',
# Urdu
'ur': 'urd',
# Northern Uzbek
'uz': 'uzn',
# Vietnamese
'vi': 'vie',
# Yoruba
'yo': 'yor',
# Cantonese
'yue': 'yue',
# Standard Malay
'ms': 'zsm',
# Zulu
'zu': 'zul'
}
return seamless_lang_code.get(lang_code, "eng")
@method()
def translate_text(
self,
text: str,
source_language: str,
target_language: str
):
translated_text, _, _ = self.translator.predict(
text,
"t2tt",
src_lang=self.get_seamless_lang_code(source_language),
tgt_lang=self.get_seamless_lang_code(target_language),
ngram_filtering=True
)
return {
"text": {
source_language: text,
target_language: str(translated_text)
}
}
def translate_text(self, text: str, source_language: str, target_language: str):
with self.lock:
translated_text, _, _ = self.translator.predict(
text,
"t2tt",
src_lang=self.get_seamless_lang_code(source_language),
tgt_lang=self.get_seamless_lang_code(target_language),
ngram_filtering=True,
)
return {"text": {source_language: text, target_language: str(translated_text)}}
# -------------------------------------------------------------------
# Web API
# -------------------------------------------------------------------
@@ -189,6 +381,7 @@ class Translator:
@stub.function(
container_idle_timeout=60,
timeout=60,
allow_concurrent_inputs=40,
secrets=[
Secret.from_name("reflector-gpu"),
],
@@ -219,8 +412,8 @@ def web():
@app.post("/translate", dependencies=[Depends(apikey_auth)])
async def translate(
text: str,
source_language: Annotated[str, Body(...)] = "eng",
target_language: Annotated[str, Body(...)] = "fra",
source_language: Annotated[str, Body(...)] = "en",
target_language: Annotated[str, Body(...)] = "fr",
) -> TranslateResponse:
func = translatorstub.translate_text.spawn(
text=text,
@@ -230,8 +423,4 @@ def web():
result = func.get()
return result
@app.post("/warmup", dependencies=[Depends(apikey_auth)])
async def warmup():
return translatorstub.warmup.spawn().get()
return app

View File

@@ -8,7 +8,7 @@ API will be a POST request to TRANSCRIPT_URL:
"audio_url": "https://...",
"audio_ext": "wav",
"timestamp": 123.456
"language": "eng"
"language": "en"
}
```

View File

@@ -5,8 +5,8 @@ API will be a POST request to TRANSCRIPT_URL:
```form
"timestamp": 123.456
"source_language": "eng"
"target_language": "eng"
"source_language": "en"
"target_language": "en"
"file": <audio file>
```
@@ -33,7 +33,7 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
files = {
"file": (data.name, data.fd),
}
source_language = self.get_pref("audio:source_language", "eng")
source_language = self.get_pref("audio:source_language", "en")
json_payload = {"source_language": source_language}
response = await retry(client.post)(
self.transcript_url,

View File

@@ -14,7 +14,7 @@ class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):
async def _transcript(self, data: AudioFile):
segments, _ = self.model.transcribe(
data.path.as_posix(),
language="eng",
language="en",
beam_size=5,
# condition_on_previous_text=True,
word_timestamps=True,

View File

@@ -79,7 +79,7 @@ class TranscriptFinalLongSummaryProcessor(Processor):
sentence = str(sentence).strip()
if sentence.startswith("- "):
sentence.replace("- ", "* ")
else:
elif not sentence.startswith("*"):
sentence = "* " + sentence
sentence += " \n"
summary_sentences.append(sentence)

View File

@@ -28,8 +28,8 @@ class TranscriptTranslatorProcessor(Processor):
# FIXME this should be a processor after, as each user may want
# different languages
source_language = self.get_pref("audio:source_language", "eng")
target_language = self.get_pref("audio:target_language", "eng")
source_language = self.get_pref("audio:source_language", "en")
target_language = self.get_pref("audio:target_language", "en")
if source_language == target_language:
return

View File

@@ -120,56 +120,196 @@ class FinalTitle(BaseModel):
# https://github.com/facebookresearch/seamless_communication/tree/main/scripts/m4t/predict#supported-languages
class TranslationLanguages(BaseModel):
language_to_id_mapping: dict = {
"afr": "Afrikaans",
"azj": "North Azerbaijani",
"bos": "Bosnian",
"cat": "Catalan",
"ceb": "Cebuano",
"ces": "Czech",
"cym": "Welsh",
"dan": "Danish",
"deu": "German",
"eng": "English",
"est": "Estonian",
"eus": "Basque",
"fin": "Finnish",
"fra": "French",
"gaz": "West Central Oromo",
"gle": "Irish",
"glg": "Galician",
"hrv": "Croatian",
"hun": "Hungarian",
"ibo": "Igbo",
"ind": "Indonesian",
"isl": "Icelandic",
"ita": "Italian",
"jav": "Javanese",
"lit": "Lithuanian",
"lug": "Ganda",
"luo": "Luo",
"lvs": "Standard Latvian",
"mlt": "Maltese",
"nld": "Dutch",
"nno": "Norwegian Nynorsk",
"nob": "Norwegian Bokmål",
"nya": "Nyanja",
"pol": "Polish",
"por": "Portuguese",
"ron": "Romanian",
"slk": "Slovak",
"slv": "Slovenian",
"sna": "Shona",
"som": "Somali",
"spa": "Spanish",
"swe": "Swedish",
"swh": "Swahili",
"tgl": "Tagalog",
"tur": "Turkish",
"uzn": "Northern Uzbek",
"vie": "Vietnamese",
"yor": "Yoruba",
"zsm": "Standard Malay",
"zul": "Zulu",
# Afrikaans
"af": "afr",
# Amharic
"am": "amh",
# Modern Standard Arabic
"ar": "arb",
# Moroccan Arabic
"ary": "ary",
# Egyptian Arabic
"arz": "arz",
# Assamese
"as": "asm",
# North Azerbaijani
"az": "azj",
# Belarusian
"be": "bel",
# Bengali
"bn": "ben",
# Bosnian
"bs": "bos",
# Bulgarian
"bg": "bul",
# Catalan
"ca": "cat",
# Cebuano
"ceb": "ceb",
# Czech
"cs": "ces",
# Central Kurdish
"ku": "ckb",
# Mandarin Chinese
"cmn": "cmn_Hant",
# Welsh
"cy": "cym",
# Danish
"da": "dan",
# German
"de": "deu",
# Greek
"el": "ell",
# English
"en": "eng",
# Estonian
"et": "est",
# Basque
"eu": "eus",
# Finnish
"fi": "fin",
# French
"fr": "fra",
# Irish
"ga": "gle",
# West Central Oromo,
"gaz": "gaz",
# Galician
"gl": "glg",
# Gujarati
"gu": "guj",
# Hebrew
"he": "heb",
# Hindi
"hi": "hin",
# Croatian
"hr": "hrv",
# Hungarian
"hu": "hun",
# Armenian
"hy": "hye",
# Igbo
"ig": "ibo",
# Indonesian
"id": "ind",
# Icelandic
"is": "isl",
# Italian
"it": "ita",
# Javanese
"jv": "jav",
# Japanese
"ja": "jpn",
# Kannada
"kn": "kan",
# Georgian
"ka": "kat",
# Kazakh
"kk": "kaz",
# Halh Mongolian
"khk": "khk",
# Khmer
"km": "khm",
# Kyrgyz
"ky": "kir",
# Korean
"ko": "kor",
# Lao
"lo": "lao",
# Lithuanian
"lt": "lit",
# Ganda
"lg": "lug",
# Luo
"luo": "luo",
# Standard Latvian
"lv": "lvs",
# Maithili
"mai": "mai",
# Malayalam
"ml": "mal",
# Marathi
"mr": "mar",
# Macedonian
"mk": "mkd",
# Maltese
"mt": "mlt",
# Meitei
"mni": "mni",
# Burmese
"my": "mya",
# Dutch
"nl": "nld",
# Norwegian Nynorsk
"nn": "nno",
# Norwegian Bokmål
"nb": "nob",
# Nepali
"ne": "npi",
# Nyanja
"ny": "nya",
# Odia
"or": "ory",
# Punjabi
"pa": "pan",
# Southern Pashto
"pbt": "pbt",
# Western Persian
"pes": "pes",
# Polish
"pl": "pol",
# Portuguese
"pt": "por",
# Romanian
"ro": "ron",
# Russian
"ru": "rus",
# Slovak
"sk": "slk",
# Slovenian
"sl": "slv",
# Shona
"sn": "sna",
# Sindhi
"sd": "snd",
# Somali
"so": "som",
# Spanish
"es": "spa",
# Serbian
"sr": "srp",
# Swedish
"sv": "swe",
# Swahili
"sw": "swh",
# Tamil
"ta": "tam",
# Telugu
"te": "tel",
# Tajik
"tg": "tgk",
# Tagalog
"tl": "tgl",
# Thai
"th": "tha",
# Turkish
"tr": "tur",
# Ukrainian
"uk": "ukr",
# Urdu
"ur": "urd",
# Northern Uzbek
"uz": "uzn",
# Vietnamese
"vi": "vie",
# Yoruba
"yo": "yor",
# Cantonese
"yue": "yue",
# Standard Malay
"ms": "zsm",
# Zulu
"zu": "zul",
}
@property
@@ -177,6 +317,4 @@ class TranslationLanguages(BaseModel):
return self.language_to_id_mapping.keys()
def is_supported(self, lang_id: str) -> bool:
if lang_id in self.supported_languages:
return True
return False
return lang_id in self.supported_languages

View File

@@ -22,8 +22,8 @@ async def process_audio_file(
filename,
event_callback,
only_transcript=False,
source_language="eng",
target_language="eng",
source_language="en",
target_language="en",
):
# build pipeline for audio processing
processors = [
@@ -72,8 +72,8 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("source", help="Source file (mp3, wav, mp4...)")
parser.add_argument("--only-transcript", "-t", action="store_true")
parser.add_argument("--source-language", default="eng")
parser.add_argument("--target-language", default="eng")
parser.add_argument("--source-language", default="en")
parser.add_argument("--target-language", default="en")
parser.add_argument("--output", "-o", help="Output file (output.jsonl)")
args = parser.parse_args()

View File

@@ -90,8 +90,8 @@ async def rtc_offer_base(
event_callback=None,
event_callback_args=None,
audio_filename: Path | None = None,
source_language: str = "eng",
target_language: str = "eng",
source_language: str = "en",
target_language: str = "en",
):
# build an rtc session
offer = RTCSessionDescription(sdp=params.sdp, type=params.type)

View File

@@ -87,8 +87,8 @@ class Transcript(BaseModel):
long_summary: str | None = None
topics: list[TranscriptTopic] = []
events: list[TranscriptEvent] = []
source_language: str = "eng"
target_language: str = "eng"
source_language: str = "en"
target_language: str = "en"
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
ev = TranscriptEvent(event=event, data=data.model_dump())
@@ -170,8 +170,8 @@ class TranscriptController:
async def add(
self,
name: str,
source_language: str = "eng",
target_language: str = "eng",
source_language: str = "en",
target_language: str = "en",
user_id: str | None = None,
):
transcript = Transcript(
@@ -231,8 +231,8 @@ class GetTranscript(BaseModel):
class CreateTranscript(BaseModel):
name: str
source_language: str = Field("eng")
target_language: str = Field("eng")
source_language: str = Field("en")
target_language: str = Field("en")
class UpdateTranscript(BaseModel):

View File

@@ -46,7 +46,7 @@ async def dummy_transcript():
class TestAudioTranscriptProcessor(AudioTranscriptProcessor):
async def _transcript(self, data: AudioFile):
source_language = self.get_pref("audio:source_language", "eng")
source_language = self.get_pref("audio:source_language", "en")
print("transcripting", source_language)
print("pipeline", self.pipeline)
print("prefs", self.pipeline.prefs)

View File

@@ -10,15 +10,15 @@ async def test_transcript_create_default_translation():
response = await ac.post("/transcripts", json={"name": "test en"})
assert response.status_code == 200
assert response.json()["name"] == "test en"
assert response.json()["source_language"] == "eng"
assert response.json()["target_language"] == "eng"
assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "en"
tid = response.json()["id"]
response = await ac.get(f"/transcripts/{tid}")
assert response.status_code == 200
assert response.json()["name"] == "test en"
assert response.json()["source_language"] == "eng"
assert response.json()["target_language"] == "eng"
assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "en"
@pytest.mark.asyncio
@@ -31,15 +31,15 @@ async def test_transcript_create_en_fr_translation():
)
assert response.status_code == 200
assert response.json()["name"] == "test en/fr"
assert response.json()["source_language"] == "eng"
assert response.json()["target_language"] == "fra"
assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "fr"
tid = response.json()["id"]
response = await ac.get(f"/transcripts/{tid}")
assert response.status_code == 200
assert response.json()["name"] == "test en/fr"
assert response.json()["source_language"] == "eng"
assert response.json()["target_language"] == "fra"
assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "fr"
@pytest.mark.asyncio
@@ -52,12 +52,12 @@ async def test_transcript_create_fr_en_translation():
)
assert response.status_code == 200
assert response.json()["name"] == "test fr/en"
assert response.json()["source_language"] == "fra"
assert response.json()["target_language"] == "eng"
assert response.json()["source_language"] == "fr"
assert response.json()["target_language"] == "en"
tid = response.json()["id"]
response = await ac.get(f"/transcripts/{tid}")
assert response.status_code == 200
assert response.json()["name"] == "test fr/en"
assert response.json()["source_language"] == "fra"
assert response.json()["target_language"] == "eng"
assert response.json()["source_language"] == "fr"
assert response.json()["target_language"] == "en"

View File

@@ -8,14 +8,18 @@ type LanguageOption = {
};
const supportedLanguages: LanguageOption[] = [
{ value: "afr", name: "Afrikaans", script: "Latn" },
{
value: "amh",
value: "af",
name: "Afrikaans",
script: "Latn",
},
{
value: "am",
name: "Amharic",
script: "Ethi",
},
{
value: "arb",
value: "ar",
name: "Modern Standard Arabic",
script: "Arab",
},
@@ -30,37 +34,37 @@ const supportedLanguages: LanguageOption[] = [
script: "Arab",
},
{
value: "asm",
value: "as",
name: "Assamese",
script: "Beng",
},
{
value: "azj",
value: "az",
name: "North Azerbaijani",
script: "Latn",
},
{
value: "bel",
value: "be",
name: "Belarusian",
script: "Cyrl",
},
{
value: "ben",
value: "bn",
name: "Bengali",
script: "Beng",
},
{
value: "bos",
value: "bs",
name: "Bosnian",
script: "Latn",
},
{
value: "bul",
value: "bg",
name: "Bulgarian",
script: "Cyrl",
},
{
value: "cat",
value: "ca",
name: "Catalan",
script: "Latn",
},
@@ -70,12 +74,12 @@ const supportedLanguages: LanguageOption[] = [
script: "Latn",
},
{
value: "ces",
value: "cs",
name: "Czech",
script: "Latn",
},
{
value: "ckb",
value: "ku",
name: "Central Kurdish",
script: "Arab",
},
@@ -85,52 +89,47 @@ const supportedLanguages: LanguageOption[] = [
script: "Hans",
},
{
value: "cmn_Ha",
name: "Mandarin Chinese",
script: "Hant",
},
{
value: "cym",
value: "cy",
name: "Welsh",
script: "Latn",
},
{
value: "dan",
value: "da",
name: "Danish",
script: "Latn",
},
{
value: "deu",
value: "de",
name: "German",
script: "Latn",
},
{
value: "ell",
value: "el",
name: "Greek",
script: "Grek",
},
{
value: "eng",
value: "en",
name: "English",
script: "Latn",
},
{
value: "est",
value: "et",
name: "Estonian",
script: "Latn",
},
{
value: "eus",
value: "eu",
name: "Basque",
script: "Latn",
},
{
value: "fin",
value: "fi",
name: "Finnish",
script: "Latn",
},
{
value: "fra",
value: "fr",
name: "French",
script: "Latn",
},
@@ -140,87 +139,87 @@ const supportedLanguages: LanguageOption[] = [
script: "Latn",
},
{
value: "gle",
value: "ga",
name: "Irish",
script: "Latn",
},
{
value: "glg",
value: "gl",
name: "Galician",
script: "Latn",
},
{
value: "guj",
value: "gu",
name: "Gujarati",
script: "Gujr",
},
{
value: "heb",
value: "he",
name: "Hebrew",
script: "Hebr",
},
{
value: "hin",
value: "hi",
name: "Hindi",
script: "Deva",
},
{
value: "hrv",
value: "hr",
name: "Croatian",
script: "Latn",
},
{
value: "hun",
value: "hu",
name: "Hungarian",
script: "Latn",
},
{
value: "hye",
value: "hy",
name: "Armenian",
script: "Armn",
},
{
value: "ibo",
value: "ig",
name: "Igbo",
script: "Latn",
},
{
value: "ind",
value: "id",
name: "Indonesian",
script: "Latn",
},
{
value: "isl",
value: "is",
name: "Icelandic",
script: "Latn",
},
{
value: "ita",
value: "it",
name: "Italian",
script: "Latn",
},
{
value: "jav",
value: "jv",
name: "Javanese",
script: "Latn",
},
{
value: "jpn",
value: "ja",
name: "Japanese",
script: "Jpan",
},
{
value: "kan",
value: "kn",
name: "Kannada",
script: "Knda",
},
{
value: "kat",
value: "ka",
name: "Georgian",
script: "Geor",
},
{
value: "kaz",
value: "kk",
name: "Kazakh",
script: "Cyrl",
},
@@ -230,32 +229,32 @@ const supportedLanguages: LanguageOption[] = [
script: "Cyrl",
},
{
value: "khm",
value: "km",
name: "Khmer",
script: "Khmr",
},
{
value: "kir",
value: "ky",
name: "Kyrgyz",
script: "Cyrl",
},
{
value: "kor",
value: "ko",
name: "Korean",
script: "Kore",
},
{
value: "lao",
value: "lo",
name: "Lao",
script: "Laoo",
},
{
value: "lit",
value: "lt",
name: "Lithuanian",
script: "Latn",
},
{
value: "lug",
value: "lg",
name: "Ganda",
script: "Latn",
},
@@ -265,7 +264,7 @@ const supportedLanguages: LanguageOption[] = [
script: "Latn",
},
{
value: "lvs",
value: "lv",
name: "Standard Latvian",
script: "Latn",
},
@@ -275,22 +274,22 @@ const supportedLanguages: LanguageOption[] = [
script: "Deva",
},
{
value: "mal",
value: "ml",
name: "Malayalam",
script: "Mlym",
},
{
value: "mar",
value: "mr",
name: "Marathi",
script: "Deva",
},
{
value: "mkd",
value: "mk",
name: "Macedonian",
script: "Cyrl",
},
{
value: "mlt",
value: "mt",
name: "Maltese",
script: "Latn",
},
@@ -300,42 +299,42 @@ const supportedLanguages: LanguageOption[] = [
script: "Beng",
},
{
value: "mya",
value: "my",
name: "Burmese",
script: "Mymr",
},
{
value: "nld",
value: "nl",
name: "Dutch",
script: "Latn",
},
{
value: "nno",
value: "nn",
name: "Norwegian Nynorsk",
script: "Latn",
},
{
value: "nob",
value: "nb",
name: "Norwegian Bokmål",
script: "Latn",
},
{
value: "npi",
value: "ne",
name: "Nepali",
script: "Deva",
},
{
value: "nya",
value: "ny",
name: "Nyanja",
script: "Latn",
},
{
value: "ory",
value: "or",
name: "Odia",
script: "Orya",
},
{
value: "pan",
value: "pa",
name: "Punjabi",
script: "Guru",
},
@@ -350,122 +349,122 @@ const supportedLanguages: LanguageOption[] = [
script: "Arab",
},
{
value: "pol",
value: "pl",
name: "Polish",
script: "Latn",
},
{
value: "por",
value: "pt",
name: "Portuguese",
script: "Latn",
},
{
value: "ron",
value: "ro",
name: "Romanian",
script: "Latn",
},
{
value: "rus",
value: "ru",
name: "Russian",
script: "Cyrl",
},
{
value: "slk",
value: "sk",
name: "Slovak",
script: "Latn",
},
{
value: "slv",
value: "sl",
name: "Slovenian",
script: "Latn",
},
{
value: "sna",
value: "sn",
name: "Shona",
script: "Latn",
},
{
value: "snd",
value: "sd",
name: "Sindhi",
script: "Arab",
},
{
value: "som",
value: "so",
name: "Somali",
script: "Latn",
},
{
value: "spa",
value: "es",
name: "Spanish",
script: "Latn",
},
{
value: "srp",
value: "sr",
name: "Serbian",
script: "Cyrl",
},
{
value: "swe",
value: "sv",
name: "Swedish",
script: "Latn",
},
{
value: "swh",
value: "sw",
name: "Swahili",
script: "Latn",
},
{
value: "tam",
value: "ta",
name: "Tamil",
script: "Taml",
},
{
value: "tel",
value: "te",
name: "Telugu",
script: "Telu",
},
{
value: "tgk",
value: "tg",
name: "Tajik",
script: "Cyrl",
},
{
value: "tgl",
value: "tl",
name: "Tagalog",
script: "Latn",
},
{
value: "tha",
value: "th",
name: "Thai",
script: "Thai",
},
{
value: "tur",
value: "tr",
name: "Turkish",
script: "Latn",
},
{
value: "ukr",
value: "uk",
name: "Ukrainian",
script: "Cyrl",
},
{
value: "urd",
value: "ur",
name: "Urdu",
script: "Arab",
},
{
value: "uzn",
value: "uz",
name: "Northern Uzbek",
script: "Latn",
},
{
value: "vie",
value: "vi",
name: "Vietnamese",
script: "Latn",
},
{
value: "yor",
value: "yo",
name: "Yoruba",
script: "Latn",
},
@@ -475,12 +474,12 @@ const supportedLanguages: LanguageOption[] = [
script: "Hant",
},
{
value: "zsm",
value: "ms",
name: "Standard Malay",
script: "Latn",
},
{
value: "zul",
value: "zu",
name: "Zulu",
script: "Latn",
},

View File

@@ -25,7 +25,7 @@ const useCreateTranscript = (): CreateTranscript => {
const requestParameters: V1TranscriptsCreateRequest = {
createTranscript: {
name: params.name || "Weekly All-Hands", // Default
targetLanguage: params.targetLanguage || "eng", // Default
targetLanguage: params.targetLanguage || "en", // Default
},
};