Merge pull request #288 from Monadical-SAS/feat/lang-dropdown

Language codes and translation changes
This commit is contained in:
Sara
2023-10-15 15:42:54 +02:00
committed by GitHub
17 changed files with 621 additions and 281 deletions

View File

@@ -5,6 +5,7 @@ Reflector GPU backend - LLM
""" """
import json import json
import os import os
import threading
from typing import Optional from typing import Optional
import modal import modal
@@ -67,7 +68,7 @@ llm_image = (
gpu="A100", gpu="A100",
timeout=60 * 5, timeout=60 * 5,
container_idle_timeout=60 * 5, container_idle_timeout=60 * 5,
concurrency_limit=2, allow_concurrent_inputs=15,
image=llm_image, image=llm_image,
) )
class LLM: class LLM:
@@ -108,6 +109,8 @@ class LLM:
self.gen_cfg = gen_cfg self.gen_cfg = gen_cfg
self.GenerationConfig = GenerationConfig self.GenerationConfig = GenerationConfig
self.lock = threading.Lock()
def __exit__(self, *args): def __exit__(self, *args):
print("Exit llm") print("Exit llm")
@@ -123,6 +126,7 @@ class LLM:
gen_cfg = self.gen_cfg gen_cfg = self.gen_cfg
# If a gen_schema is given, conform to gen_schema # If a gen_schema is given, conform to gen_schema
with self.lock:
if gen_schema: if gen_schema:
import jsonformer import jsonformer
@@ -158,6 +162,7 @@ class LLM:
@stub.function( @stub.function(
container_idle_timeout=60 * 10, container_idle_timeout=60 * 10,
timeout=60 * 5, timeout=60 * 5,
allow_concurrent_inputs=45,
secrets=[ secrets=[
Secret.from_name("reflector-gpu"), Secret.from_name("reflector-gpu"),
], ],
@@ -187,7 +192,7 @@ def web():
gen_cfg: Optional[dict] = None gen_cfg: Optional[dict] = None
@app.post("/llm", dependencies=[Depends(apikey_auth)]) @app.post("/llm", dependencies=[Depends(apikey_auth)])
async def llm( def llm(
req: LLMRequest, req: LLMRequest,
): ):
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None

View File

@@ -5,6 +5,7 @@ Reflector GPU backend - LLM
""" """
import json import json
import os import os
import threading
from typing import Optional from typing import Optional
import modal import modal
@@ -67,7 +68,7 @@ llm_image = (
gpu="A10G", gpu="A10G",
timeout=60 * 5, timeout=60 * 5,
container_idle_timeout=60 * 5, container_idle_timeout=60 * 5,
concurrency_limit=2, allow_concurrent_inputs=10,
image=llm_image, image=llm_image,
) )
class LLM: class LLM:
@@ -111,6 +112,7 @@ class LLM:
self.tokenizer = tokenizer self.tokenizer = tokenizer
self.gen_cfg = gen_cfg self.gen_cfg = gen_cfg
self.GenerationConfig = GenerationConfig self.GenerationConfig = GenerationConfig
self.lock = threading.Lock()
def __exit__(self, *args): def __exit__(self, *args):
print("Exit llm") print("Exit llm")
@@ -129,6 +131,7 @@ class LLM:
gen_cfg = self.gen_cfg gen_cfg = self.gen_cfg
# If a gen_schema is given, conform to gen_schema # If a gen_schema is given, conform to gen_schema
with self.lock:
if gen_schema: if gen_schema:
import jsonformer import jsonformer
@@ -167,6 +170,7 @@ class LLM:
@stub.function( @stub.function(
container_idle_timeout=60 * 10, container_idle_timeout=60 * 10,
timeout=60 * 5, timeout=60 * 5,
allow_concurrent_inputs=30,
secrets=[ secrets=[
Secret.from_name("reflector-gpu"), Secret.from_name("reflector-gpu"),
], ],
@@ -196,7 +200,7 @@ def web():
gen_cfg: Optional[dict] = None gen_cfg: Optional[dict] = None
@app.post("/llm", dependencies=[Depends(apikey_auth)]) @app.post("/llm", dependencies=[Depends(apikey_auth)])
async def llm( def llm(
req: LLMRequest, req: LLMRequest,
): ):
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None

View File

@@ -5,6 +5,7 @@ Reflector GPU backend - transcriber
import os import os
import tempfile import tempfile
import threading
from modal import Image, Secret, Stub, asgi_app, method from modal import Image, Secret, Stub, asgi_app, method
from pydantic import BaseModel from pydantic import BaseModel
@@ -78,6 +79,7 @@ transcriber_image = (
gpu="A10G", gpu="A10G",
timeout=60 * 5, timeout=60 * 5,
container_idle_timeout=60 * 5, container_idle_timeout=60 * 5,
allow_concurrent_inputs=6,
image=transcriber_image, image=transcriber_image,
) )
class Transcriber: class Transcriber:
@@ -85,6 +87,7 @@ class Transcriber:
import faster_whisper import faster_whisper
import torch import torch
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available() self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu" self.device = "cuda" if self.use_gpu else "cpu"
self.model = faster_whisper.WhisperModel( self.model = faster_whisper.WhisperModel(
@@ -106,6 +109,7 @@ class Transcriber:
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp: with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
fp.write(audio_data) fp.write(audio_data)
with self.lock:
segments, _ = self.model.transcribe( segments, _ = self.model.transcribe(
fp.name, fp.name,
language=source_language, language=source_language,
@@ -147,6 +151,7 @@ class Transcriber:
@stub.function( @stub.function(
container_idle_timeout=60, container_idle_timeout=60,
timeout=60, timeout=60,
allow_concurrent_inputs=40,
secrets=[ secrets=[
Secret.from_name("reflector-gpu"), Secret.from_name("reflector-gpu"),
], ],
@@ -176,12 +181,12 @@ def web():
result: dict result: dict
@app.post("/transcribe", dependencies=[Depends(apikey_auth)]) @app.post("/transcribe", dependencies=[Depends(apikey_auth)])
async def transcribe( def transcribe(
file: UploadFile, file: UploadFile,
source_language: Annotated[str, Body(...)] = "eng", source_language: Annotated[str, Body(...)] = "en",
timestamp: Annotated[float, Body()] = 0.0 timestamp: Annotated[float, Body()] = 0.0
) -> TranscriptResponse: ) -> TranscriptResponse:
audio_data = await file.read() audio_data = file.file.read()
audio_suffix = file.filename.split(".")[-1] audio_suffix = file.filename.split(".")[-1]
assert audio_suffix in supported_audio_file_types assert audio_suffix in supported_audio_file_types

View File

@@ -4,7 +4,7 @@ Reflector GPU backend - transcriber
""" """
import os import os
import tempfile import threading
from modal import Image, Secret, Stub, asgi_app, method from modal import Image, Secret, Stub, asgi_app, method
from pydantic import BaseModel from pydantic import BaseModel
@@ -26,8 +26,11 @@ stub = Stub(name="reflector-translator")
def install_seamless_communication(): def install_seamless_communication():
import os import os
import subprocess import subprocess
initial_dir = os.getcwd() initial_dir = os.getcwd()
subprocess.run(["ssh-keyscan", "-t", "rsa", "github.com", ">>", "~/.ssh/known_hosts"]) subprocess.run(
["ssh-keyscan", "-t", "rsa", "github.com", ">>", "~/.ssh/known_hosts"]
)
subprocess.run(["rm", "-rf", "seamless_communication"]) subprocess.run(["rm", "-rf", "seamless_communication"])
subprocess.run(["git", "clone", SEAMLESS_GITEPO, "." + "/seamless_communication"]) subprocess.run(["git", "clone", SEAMLESS_GITEPO, "." + "/seamless_communication"])
os.chdir("seamless_communication") os.chdir("seamless_communication")
@@ -54,13 +57,13 @@ def configure_seamless_m4t():
ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards" ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards"
with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'r') as file: with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "r") as file:
model_yaml_data = yaml.load(file, Loader=yaml.FullLoader) model_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'r') as file: with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "r") as file:
vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader) vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'r') as file: with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "r") as file:
unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader) unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'r') as file: with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "r") as file:
unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader) unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader)
model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots" model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots"
@@ -69,27 +72,33 @@ def configure_seamless_m4t():
model_name = f"multitask_unity_{SEAMLESSM4T_MODEL_SIZE}.pt" model_name = f"multitask_unity_{SEAMLESSM4T_MODEL_SIZE}.pt"
model_path = os.path.join(os.getcwd(), model_dir, latest_model_version, model_name) model_path = os.path.join(os.getcwd(), model_dir, latest_model_version, model_name)
vocoder_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-vocoder/snapshots" vocoder_dir = (
f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-vocoder/snapshots"
)
available_vocoder_versions = os.listdir(vocoder_dir) available_vocoder_versions = os.listdir(vocoder_dir)
latest_vocoder_version = sorted(available_vocoder_versions)[-1] latest_vocoder_version = sorted(available_vocoder_versions)[-1]
vocoder_name = "vocoder_36langs.pt" vocoder_name = "vocoder_36langs.pt"
vocoder_path = os.path.join(os.getcwd(), vocoder_dir, latest_vocoder_version, vocoder_name) vocoder_path = os.path.join(
os.getcwd(), vocoder_dir, latest_vocoder_version, vocoder_name
)
tokenizer_name = "tokenizer.model" tokenizer_name = "tokenizer.model"
tokenizer_path = os.path.join(os.getcwd(), model_dir, latest_model_version, tokenizer_name) tokenizer_path = os.path.join(
os.getcwd(), model_dir, latest_model_version, tokenizer_name
)
model_yaml_data['checkpoint'] = f"file:/{model_path}" model_yaml_data["checkpoint"] = f"file:/{model_path}"
vocoder_yaml_data['checkpoint'] = f"file:/{vocoder_path}" vocoder_yaml_data["checkpoint"] = f"file:/{vocoder_path}"
unity_100_yaml_data['tokenizer'] = f"file:/{tokenizer_path}" unity_100_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
unity_200_yaml_data['tokenizer'] = f"file:/{tokenizer_path}" unity_200_yaml_data["tokenizer"] = f"file:/{tokenizer_path}"
with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'w') as file: with open(f"{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml", "w") as file:
yaml.dump(model_yaml_data, file) yaml.dump(model_yaml_data, file)
with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'w') as file: with open(f"{ASSETS_DIR}/vocoder_36langs.yaml", "w") as file:
yaml.dump(vocoder_yaml_data, file) yaml.dump(vocoder_yaml_data, file)
with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'w') as file: with open(f"{ASSETS_DIR}/unity_nllb-100.yaml", "w") as file:
yaml.dump(unity_100_yaml_data, file) yaml.dump(unity_100_yaml_data, file)
with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'w') as file: with open(f"{ASSETS_DIR}/unity_nllb-200.yaml", "w") as file:
yaml.dump(unity_200_yaml_data, file) yaml.dump(unity_200_yaml_data, file)
@@ -109,7 +118,7 @@ transcriber_image = (
"torchaudio", "torchaudio",
"fairseq2", "fairseq2",
"pyyaml", "pyyaml",
"hf-transfer~=0.1" "hf-transfer~=0.1",
) )
.run_function(install_seamless_communication) .run_function(install_seamless_communication)
.run_function(download_seamlessm4t_model) .run_function(download_seamlessm4t_model)
@@ -129,6 +138,7 @@ transcriber_image = (
gpu="A10G", gpu="A10G",
timeout=60 * 5, timeout=60 * 5,
container_idle_timeout=60 * 5, container_idle_timeout=60 * 5,
allow_concurrent_inputs=4,
image=transcriber_image, image=transcriber_image,
) )
class Translator: class Translator:
@@ -136,13 +146,14 @@ class Translator:
import torch import torch
from seamless_communication.models.inference.translator import Translator from seamless_communication.models.inference.translator import Translator
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available() self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu" self.device = "cuda" if self.use_gpu else "cpu"
self.translator = Translator( self.translator = Translator(
SEAMLESSM4T_MODEL_CARD_NAME, SEAMLESSM4T_MODEL_CARD_NAME,
SEAMLESSM4T_VOCODER_CARD_NAME, SEAMLESSM4T_VOCODER_CARD_NAME,
torch.device(self.device), torch.device(self.device),
dtype=torch.float32 dtype=torch.float32,
) )
@method() @method()
@@ -156,31 +167,212 @@ class Translator:
""" """
# TODO: Enhance with complete list of lang codes # TODO: Enhance with complete list of lang codes
seamless_lang_code = { seamless_lang_code = {
"en": "eng", # Afrikaans
"fr": "fra" 'af': 'afr',
# Amharic
'am': 'amh',
# Modern Standard Arabic
'ar': 'arb',
# Moroccan Arabic
'ary': 'ary',
# Egyptian Arabic
'arz': 'arz',
# Assamese
'as': 'asm',
# North Azerbaijani
'az': 'azj',
# Belarusian
'be': 'bel',
# Bengali
'bn': 'ben',
# Bosnian
'bs': 'bos',
# Bulgarian
'bg': 'bul',
# Catalan
'ca': 'cat',
# Cebuano
'ceb': 'ceb',
# Czech
'cs': 'ces',
# Central Kurdish
'ku': 'ckb',
# Mandarin Chinese
'cmn': 'cmn_Hant',
# Welsh
'cy': 'cym',
# Danish
'da': 'dan',
# German
'de': 'deu',
# Greek
'el': 'ell',
# English
'en': 'eng',
# Estonian
'et': 'est',
# Basque
'eu': 'eus',
# Finnish
'fi': 'fin',
# French
'fr': 'fra',
# Irish
'ga': 'gle',
# West Central Oromo,
'gaz': 'gaz',
# Galician
'gl': 'glg',
# Gujarati
'gu': 'guj',
# Hebrew
'he': 'heb',
# Hindi
'hi': 'hin',
# Croatian
'hr': 'hrv',
# Hungarian
'hu': 'hun',
# Armenian
'hy': 'hye',
# Igbo
'ig': 'ibo',
# Indonesian
'id': 'ind',
# Icelandic
'is': 'isl',
# Italian
'it': 'ita',
# Javanese
'jv': 'jav',
# Japanese
'ja': 'jpn',
# Kannada
'kn': 'kan',
# Georgian
'ka': 'kat',
# Kazakh
'kk': 'kaz',
# Halh Mongolian
'khk': 'khk',
# Khmer
'km': 'khm',
# Kyrgyz
'ky': 'kir',
# Korean
'ko': 'kor',
# Lao
'lo': 'lao',
# Lithuanian
'lt': 'lit',
# Ganda
'lg': 'lug',
# Luo
'luo': 'luo',
# Standard Latvian
'lv': 'lvs',
# Maithili
'mai': 'mai',
# Malayalam
'ml': 'mal',
# Marathi
'mr': 'mar',
# Macedonian
'mk': 'mkd',
# Maltese
'mt': 'mlt',
# Meitei
'mni': 'mni',
# Burmese
'my': 'mya',
# Dutch
'nl': 'nld',
# Norwegian Nynorsk
'nn': 'nno',
# Norwegian Bokmål
'nb': 'nob',
# Nepali
'ne': 'npi',
# Nyanja
'ny': 'nya',
# Odia
'or': 'ory',
# Punjabi
'pa': 'pan',
# Southern Pashto
'pbt': 'pbt',
# Western Persian
'pes': 'pes',
# Polish
'pl': 'pol',
# Portuguese
'pt': 'por',
# Romanian
'ro': 'ron',
# Russian
'ru': 'rus',
# Slovak
'sk': 'slk',
# Slovenian
'sl': 'slv',
# Shona
'sn': 'sna',
# Sindhi
'sd': 'snd',
# Somali
'so': 'som',
# Spanish
'es': 'spa',
# Serbian
'sr': 'srp',
# Swedish
'sv': 'swe',
# Swahili
'sw': 'swh',
# Tamil
'ta': 'tam',
# Telugu
'te': 'tel',
# Tajik
'tg': 'tgk',
# Tagalog
'tl': 'tgl',
# Thai
'th': 'tha',
# Turkish
'tr': 'tur',
# Ukrainian
'uk': 'ukr',
# Urdu
'ur': 'urd',
# Northern Uzbek
'uz': 'uzn',
# Vietnamese
'vi': 'vie',
# Yoruba
'yo': 'yor',
# Cantonese
'yue': 'yue',
# Standard Malay
'ms': 'zsm',
# Zulu
'zu': 'zul'
} }
return seamless_lang_code.get(lang_code, "eng") return seamless_lang_code.get(lang_code, "eng")
@method() @method()
def translate_text( def translate_text(self, text: str, source_language: str, target_language: str):
self, with self.lock:
text: str,
source_language: str,
target_language: str
):
translated_text, _, _ = self.translator.predict( translated_text, _, _ = self.translator.predict(
text, text,
"t2tt", "t2tt",
src_lang=self.get_seamless_lang_code(source_language), src_lang=self.get_seamless_lang_code(source_language),
tgt_lang=self.get_seamless_lang_code(target_language), tgt_lang=self.get_seamless_lang_code(target_language),
ngram_filtering=True ngram_filtering=True,
) )
return { return {"text": {source_language: text, target_language: str(translated_text)}}
"text": {
source_language: text,
target_language: str(translated_text)
}
}
# ------------------------------------------------------------------- # -------------------------------------------------------------------
# Web API # Web API
# ------------------------------------------------------------------- # -------------------------------------------------------------------
@@ -189,6 +381,7 @@ class Translator:
@stub.function( @stub.function(
container_idle_timeout=60, container_idle_timeout=60,
timeout=60, timeout=60,
allow_concurrent_inputs=40,
secrets=[ secrets=[
Secret.from_name("reflector-gpu"), Secret.from_name("reflector-gpu"),
], ],
@@ -219,8 +412,8 @@ def web():
@app.post("/translate", dependencies=[Depends(apikey_auth)]) @app.post("/translate", dependencies=[Depends(apikey_auth)])
async def translate( async def translate(
text: str, text: str,
source_language: Annotated[str, Body(...)] = "eng", source_language: Annotated[str, Body(...)] = "en",
target_language: Annotated[str, Body(...)] = "fra", target_language: Annotated[str, Body(...)] = "fr",
) -> TranslateResponse: ) -> TranslateResponse:
func = translatorstub.translate_text.spawn( func = translatorstub.translate_text.spawn(
text=text, text=text,
@@ -230,8 +423,4 @@ def web():
result = func.get() result = func.get()
return result return result
@app.post("/warmup", dependencies=[Depends(apikey_auth)])
async def warmup():
return translatorstub.warmup.spawn().get()
return app return app

View File

@@ -8,7 +8,7 @@ API will be a POST request to TRANSCRIPT_URL:
"audio_url": "https://...", "audio_url": "https://...",
"audio_ext": "wav", "audio_ext": "wav",
"timestamp": 123.456 "timestamp": 123.456
"language": "eng" "language": "en"
} }
``` ```

View File

@@ -5,8 +5,8 @@ API will be a POST request to TRANSCRIPT_URL:
```form ```form
"timestamp": 123.456 "timestamp": 123.456
"source_language": "eng" "source_language": "en"
"target_language": "eng" "target_language": "en"
"file": <audio file> "file": <audio file>
``` ```
@@ -33,7 +33,7 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
files = { files = {
"file": (data.name, data.fd), "file": (data.name, data.fd),
} }
source_language = self.get_pref("audio:source_language", "eng") source_language = self.get_pref("audio:source_language", "en")
json_payload = {"source_language": source_language} json_payload = {"source_language": source_language}
response = await retry(client.post)( response = await retry(client.post)(
self.transcript_url, self.transcript_url,

View File

@@ -14,7 +14,7 @@ class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):
async def _transcript(self, data: AudioFile): async def _transcript(self, data: AudioFile):
segments, _ = self.model.transcribe( segments, _ = self.model.transcribe(
data.path.as_posix(), data.path.as_posix(),
language="eng", language="en",
beam_size=5, beam_size=5,
# condition_on_previous_text=True, # condition_on_previous_text=True,
word_timestamps=True, word_timestamps=True,

View File

@@ -79,7 +79,7 @@ class TranscriptFinalLongSummaryProcessor(Processor):
sentence = str(sentence).strip() sentence = str(sentence).strip()
if sentence.startswith("- "): if sentence.startswith("- "):
sentence.replace("- ", "* ") sentence.replace("- ", "* ")
else: elif not sentence.startswith("*"):
sentence = "* " + sentence sentence = "* " + sentence
sentence += " \n" sentence += " \n"
summary_sentences.append(sentence) summary_sentences.append(sentence)

View File

@@ -28,8 +28,8 @@ class TranscriptTranslatorProcessor(Processor):
# FIXME this should be a processor after, as each user may want # FIXME this should be a processor after, as each user may want
# different languages # different languages
source_language = self.get_pref("audio:source_language", "eng") source_language = self.get_pref("audio:source_language", "en")
target_language = self.get_pref("audio:target_language", "eng") target_language = self.get_pref("audio:target_language", "en")
if source_language == target_language: if source_language == target_language:
return return

View File

@@ -120,56 +120,196 @@ class FinalTitle(BaseModel):
# https://github.com/facebookresearch/seamless_communication/tree/main/scripts/m4t/predict#supported-languages # https://github.com/facebookresearch/seamless_communication/tree/main/scripts/m4t/predict#supported-languages
class TranslationLanguages(BaseModel): class TranslationLanguages(BaseModel):
language_to_id_mapping: dict = { language_to_id_mapping: dict = {
"afr": "Afrikaans", # Afrikaans
"azj": "North Azerbaijani", "af": "afr",
"bos": "Bosnian", # Amharic
"cat": "Catalan", "am": "amh",
"ceb": "Cebuano", # Modern Standard Arabic
"ces": "Czech", "ar": "arb",
"cym": "Welsh", # Moroccan Arabic
"dan": "Danish", "ary": "ary",
"deu": "German", # Egyptian Arabic
"eng": "English", "arz": "arz",
"est": "Estonian", # Assamese
"eus": "Basque", "as": "asm",
"fin": "Finnish", # North Azerbaijani
"fra": "French", "az": "azj",
"gaz": "West Central Oromo", # Belarusian
"gle": "Irish", "be": "bel",
"glg": "Galician", # Bengali
"hrv": "Croatian", "bn": "ben",
"hun": "Hungarian", # Bosnian
"ibo": "Igbo", "bs": "bos",
"ind": "Indonesian", # Bulgarian
"isl": "Icelandic", "bg": "bul",
"ita": "Italian", # Catalan
"jav": "Javanese", "ca": "cat",
"lit": "Lithuanian", # Cebuano
"lug": "Ganda", "ceb": "ceb",
"luo": "Luo", # Czech
"lvs": "Standard Latvian", "cs": "ces",
"mlt": "Maltese", # Central Kurdish
"nld": "Dutch", "ku": "ckb",
"nno": "Norwegian Nynorsk", # Mandarin Chinese
"nob": "Norwegian Bokmål", "cmn": "cmn_Hant",
"nya": "Nyanja", # Welsh
"pol": "Polish", "cy": "cym",
"por": "Portuguese", # Danish
"ron": "Romanian", "da": "dan",
"slk": "Slovak", # German
"slv": "Slovenian", "de": "deu",
"sna": "Shona", # Greek
"som": "Somali", "el": "ell",
"spa": "Spanish", # English
"swe": "Swedish", "en": "eng",
"swh": "Swahili", # Estonian
"tgl": "Tagalog", "et": "est",
"tur": "Turkish", # Basque
"uzn": "Northern Uzbek", "eu": "eus",
"vie": "Vietnamese", # Finnish
"yor": "Yoruba", "fi": "fin",
"zsm": "Standard Malay", # French
"zul": "Zulu", "fr": "fra",
# Irish
"ga": "gle",
# West Central Oromo,
"gaz": "gaz",
# Galician
"gl": "glg",
# Gujarati
"gu": "guj",
# Hebrew
"he": "heb",
# Hindi
"hi": "hin",
# Croatian
"hr": "hrv",
# Hungarian
"hu": "hun",
# Armenian
"hy": "hye",
# Igbo
"ig": "ibo",
# Indonesian
"id": "ind",
# Icelandic
"is": "isl",
# Italian
"it": "ita",
# Javanese
"jv": "jav",
# Japanese
"ja": "jpn",
# Kannada
"kn": "kan",
# Georgian
"ka": "kat",
# Kazakh
"kk": "kaz",
# Halh Mongolian
"khk": "khk",
# Khmer
"km": "khm",
# Kyrgyz
"ky": "kir",
# Korean
"ko": "kor",
# Lao
"lo": "lao",
# Lithuanian
"lt": "lit",
# Ganda
"lg": "lug",
# Luo
"luo": "luo",
# Standard Latvian
"lv": "lvs",
# Maithili
"mai": "mai",
# Malayalam
"ml": "mal",
# Marathi
"mr": "mar",
# Macedonian
"mk": "mkd",
# Maltese
"mt": "mlt",
# Meitei
"mni": "mni",
# Burmese
"my": "mya",
# Dutch
"nl": "nld",
# Norwegian Nynorsk
"nn": "nno",
# Norwegian Bokmål
"nb": "nob",
# Nepali
"ne": "npi",
# Nyanja
"ny": "nya",
# Odia
"or": "ory",
# Punjabi
"pa": "pan",
# Southern Pashto
"pbt": "pbt",
# Western Persian
"pes": "pes",
# Polish
"pl": "pol",
# Portuguese
"pt": "por",
# Romanian
"ro": "ron",
# Russian
"ru": "rus",
# Slovak
"sk": "slk",
# Slovenian
"sl": "slv",
# Shona
"sn": "sna",
# Sindhi
"sd": "snd",
# Somali
"so": "som",
# Spanish
"es": "spa",
# Serbian
"sr": "srp",
# Swedish
"sv": "swe",
# Swahili
"sw": "swh",
# Tamil
"ta": "tam",
# Telugu
"te": "tel",
# Tajik
"tg": "tgk",
# Tagalog
"tl": "tgl",
# Thai
"th": "tha",
# Turkish
"tr": "tur",
# Ukrainian
"uk": "ukr",
# Urdu
"ur": "urd",
# Northern Uzbek
"uz": "uzn",
# Vietnamese
"vi": "vie",
# Yoruba
"yo": "yor",
# Cantonese
"yue": "yue",
# Standard Malay
"ms": "zsm",
# Zulu
"zu": "zul",
} }
@property @property
@@ -177,6 +317,4 @@ class TranslationLanguages(BaseModel):
return self.language_to_id_mapping.keys() return self.language_to_id_mapping.keys()
def is_supported(self, lang_id: str) -> bool: def is_supported(self, lang_id: str) -> bool:
if lang_id in self.supported_languages: return lang_id in self.supported_languages
return True
return False

View File

@@ -22,8 +22,8 @@ async def process_audio_file(
filename, filename,
event_callback, event_callback,
only_transcript=False, only_transcript=False,
source_language="eng", source_language="en",
target_language="eng", target_language="en",
): ):
# build pipeline for audio processing # build pipeline for audio processing
processors = [ processors = [
@@ -72,8 +72,8 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("source", help="Source file (mp3, wav, mp4...)") parser.add_argument("source", help="Source file (mp3, wav, mp4...)")
parser.add_argument("--only-transcript", "-t", action="store_true") parser.add_argument("--only-transcript", "-t", action="store_true")
parser.add_argument("--source-language", default="eng") parser.add_argument("--source-language", default="en")
parser.add_argument("--target-language", default="eng") parser.add_argument("--target-language", default="en")
parser.add_argument("--output", "-o", help="Output file (output.jsonl)") parser.add_argument("--output", "-o", help="Output file (output.jsonl)")
args = parser.parse_args() args = parser.parse_args()

View File

@@ -90,8 +90,8 @@ async def rtc_offer_base(
event_callback=None, event_callback=None,
event_callback_args=None, event_callback_args=None,
audio_filename: Path | None = None, audio_filename: Path | None = None,
source_language: str = "eng", source_language: str = "en",
target_language: str = "eng", target_language: str = "en",
): ):
# build an rtc session # build an rtc session
offer = RTCSessionDescription(sdp=params.sdp, type=params.type) offer = RTCSessionDescription(sdp=params.sdp, type=params.type)

View File

@@ -87,8 +87,8 @@ class Transcript(BaseModel):
long_summary: str | None = None long_summary: str | None = None
topics: list[TranscriptTopic] = [] topics: list[TranscriptTopic] = []
events: list[TranscriptEvent] = [] events: list[TranscriptEvent] = []
source_language: str = "eng" source_language: str = "en"
target_language: str = "eng" target_language: str = "en"
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent: def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
ev = TranscriptEvent(event=event, data=data.model_dump()) ev = TranscriptEvent(event=event, data=data.model_dump())
@@ -170,8 +170,8 @@ class TranscriptController:
async def add( async def add(
self, self,
name: str, name: str,
source_language: str = "eng", source_language: str = "en",
target_language: str = "eng", target_language: str = "en",
user_id: str | None = None, user_id: str | None = None,
): ):
transcript = Transcript( transcript = Transcript(
@@ -231,8 +231,8 @@ class GetTranscript(BaseModel):
class CreateTranscript(BaseModel): class CreateTranscript(BaseModel):
name: str name: str
source_language: str = Field("eng") source_language: str = Field("en")
target_language: str = Field("eng") target_language: str = Field("en")
class UpdateTranscript(BaseModel): class UpdateTranscript(BaseModel):

View File

@@ -46,7 +46,7 @@ async def dummy_transcript():
class TestAudioTranscriptProcessor(AudioTranscriptProcessor): class TestAudioTranscriptProcessor(AudioTranscriptProcessor):
async def _transcript(self, data: AudioFile): async def _transcript(self, data: AudioFile):
source_language = self.get_pref("audio:source_language", "eng") source_language = self.get_pref("audio:source_language", "en")
print("transcripting", source_language) print("transcripting", source_language)
print("pipeline", self.pipeline) print("pipeline", self.pipeline)
print("prefs", self.pipeline.prefs) print("prefs", self.pipeline.prefs)

View File

@@ -10,15 +10,15 @@ async def test_transcript_create_default_translation():
response = await ac.post("/transcripts", json={"name": "test en"}) response = await ac.post("/transcripts", json={"name": "test en"})
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["name"] == "test en" assert response.json()["name"] == "test en"
assert response.json()["source_language"] == "eng" assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "eng" assert response.json()["target_language"] == "en"
tid = response.json()["id"] tid = response.json()["id"]
response = await ac.get(f"/transcripts/{tid}") response = await ac.get(f"/transcripts/{tid}")
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["name"] == "test en" assert response.json()["name"] == "test en"
assert response.json()["source_language"] == "eng" assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "eng" assert response.json()["target_language"] == "en"
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -31,15 +31,15 @@ async def test_transcript_create_en_fr_translation():
) )
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["name"] == "test en/fr" assert response.json()["name"] == "test en/fr"
assert response.json()["source_language"] == "eng" assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "fra" assert response.json()["target_language"] == "fr"
tid = response.json()["id"] tid = response.json()["id"]
response = await ac.get(f"/transcripts/{tid}") response = await ac.get(f"/transcripts/{tid}")
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["name"] == "test en/fr" assert response.json()["name"] == "test en/fr"
assert response.json()["source_language"] == "eng" assert response.json()["source_language"] == "en"
assert response.json()["target_language"] == "fra" assert response.json()["target_language"] == "fr"
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -52,12 +52,12 @@ async def test_transcript_create_fr_en_translation():
) )
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["name"] == "test fr/en" assert response.json()["name"] == "test fr/en"
assert response.json()["source_language"] == "fra" assert response.json()["source_language"] == "fr"
assert response.json()["target_language"] == "eng" assert response.json()["target_language"] == "en"
tid = response.json()["id"] tid = response.json()["id"]
response = await ac.get(f"/transcripts/{tid}") response = await ac.get(f"/transcripts/{tid}")
assert response.status_code == 200 assert response.status_code == 200
assert response.json()["name"] == "test fr/en" assert response.json()["name"] == "test fr/en"
assert response.json()["source_language"] == "fra" assert response.json()["source_language"] == "fr"
assert response.json()["target_language"] == "eng" assert response.json()["target_language"] == "en"

View File

@@ -8,14 +8,18 @@ type LanguageOption = {
}; };
const supportedLanguages: LanguageOption[] = [ const supportedLanguages: LanguageOption[] = [
{ value: "afr", name: "Afrikaans", script: "Latn" },
{ {
value: "amh", value: "af",
name: "Afrikaans",
script: "Latn",
},
{
value: "am",
name: "Amharic", name: "Amharic",
script: "Ethi", script: "Ethi",
}, },
{ {
value: "arb", value: "ar",
name: "Modern Standard Arabic", name: "Modern Standard Arabic",
script: "Arab", script: "Arab",
}, },
@@ -30,37 +34,37 @@ const supportedLanguages: LanguageOption[] = [
script: "Arab", script: "Arab",
}, },
{ {
value: "asm", value: "as",
name: "Assamese", name: "Assamese",
script: "Beng", script: "Beng",
}, },
{ {
value: "azj", value: "az",
name: "North Azerbaijani", name: "North Azerbaijani",
script: "Latn", script: "Latn",
}, },
{ {
value: "bel", value: "be",
name: "Belarusian", name: "Belarusian",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "ben", value: "bn",
name: "Bengali", name: "Bengali",
script: "Beng", script: "Beng",
}, },
{ {
value: "bos", value: "bs",
name: "Bosnian", name: "Bosnian",
script: "Latn", script: "Latn",
}, },
{ {
value: "bul", value: "bg",
name: "Bulgarian", name: "Bulgarian",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "cat", value: "ca",
name: "Catalan", name: "Catalan",
script: "Latn", script: "Latn",
}, },
@@ -70,12 +74,12 @@ const supportedLanguages: LanguageOption[] = [
script: "Latn", script: "Latn",
}, },
{ {
value: "ces", value: "cs",
name: "Czech", name: "Czech",
script: "Latn", script: "Latn",
}, },
{ {
value: "ckb", value: "ku",
name: "Central Kurdish", name: "Central Kurdish",
script: "Arab", script: "Arab",
}, },
@@ -85,52 +89,47 @@ const supportedLanguages: LanguageOption[] = [
script: "Hans", script: "Hans",
}, },
{ {
value: "cmn_Ha", value: "cy",
name: "Mandarin Chinese",
script: "Hant",
},
{
value: "cym",
name: "Welsh", name: "Welsh",
script: "Latn", script: "Latn",
}, },
{ {
value: "dan", value: "da",
name: "Danish", name: "Danish",
script: "Latn", script: "Latn",
}, },
{ {
value: "deu", value: "de",
name: "German", name: "German",
script: "Latn", script: "Latn",
}, },
{ {
value: "ell", value: "el",
name: "Greek", name: "Greek",
script: "Grek", script: "Grek",
}, },
{ {
value: "eng", value: "en",
name: "English", name: "English",
script: "Latn", script: "Latn",
}, },
{ {
value: "est", value: "et",
name: "Estonian", name: "Estonian",
script: "Latn", script: "Latn",
}, },
{ {
value: "eus", value: "eu",
name: "Basque", name: "Basque",
script: "Latn", script: "Latn",
}, },
{ {
value: "fin", value: "fi",
name: "Finnish", name: "Finnish",
script: "Latn", script: "Latn",
}, },
{ {
value: "fra", value: "fr",
name: "French", name: "French",
script: "Latn", script: "Latn",
}, },
@@ -140,87 +139,87 @@ const supportedLanguages: LanguageOption[] = [
script: "Latn", script: "Latn",
}, },
{ {
value: "gle", value: "ga",
name: "Irish", name: "Irish",
script: "Latn", script: "Latn",
}, },
{ {
value: "glg", value: "gl",
name: "Galician", name: "Galician",
script: "Latn", script: "Latn",
}, },
{ {
value: "guj", value: "gu",
name: "Gujarati", name: "Gujarati",
script: "Gujr", script: "Gujr",
}, },
{ {
value: "heb", value: "he",
name: "Hebrew", name: "Hebrew",
script: "Hebr", script: "Hebr",
}, },
{ {
value: "hin", value: "hi",
name: "Hindi", name: "Hindi",
script: "Deva", script: "Deva",
}, },
{ {
value: "hrv", value: "hr",
name: "Croatian", name: "Croatian",
script: "Latn", script: "Latn",
}, },
{ {
value: "hun", value: "hu",
name: "Hungarian", name: "Hungarian",
script: "Latn", script: "Latn",
}, },
{ {
value: "hye", value: "hy",
name: "Armenian", name: "Armenian",
script: "Armn", script: "Armn",
}, },
{ {
value: "ibo", value: "ig",
name: "Igbo", name: "Igbo",
script: "Latn", script: "Latn",
}, },
{ {
value: "ind", value: "id",
name: "Indonesian", name: "Indonesian",
script: "Latn", script: "Latn",
}, },
{ {
value: "isl", value: "is",
name: "Icelandic", name: "Icelandic",
script: "Latn", script: "Latn",
}, },
{ {
value: "ita", value: "it",
name: "Italian", name: "Italian",
script: "Latn", script: "Latn",
}, },
{ {
value: "jav", value: "jv",
name: "Javanese", name: "Javanese",
script: "Latn", script: "Latn",
}, },
{ {
value: "jpn", value: "ja",
name: "Japanese", name: "Japanese",
script: "Jpan", script: "Jpan",
}, },
{ {
value: "kan", value: "kn",
name: "Kannada", name: "Kannada",
script: "Knda", script: "Knda",
}, },
{ {
value: "kat", value: "ka",
name: "Georgian", name: "Georgian",
script: "Geor", script: "Geor",
}, },
{ {
value: "kaz", value: "kk",
name: "Kazakh", name: "Kazakh",
script: "Cyrl", script: "Cyrl",
}, },
@@ -230,32 +229,32 @@ const supportedLanguages: LanguageOption[] = [
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "khm", value: "km",
name: "Khmer", name: "Khmer",
script: "Khmr", script: "Khmr",
}, },
{ {
value: "kir", value: "ky",
name: "Kyrgyz", name: "Kyrgyz",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "kor", value: "ko",
name: "Korean", name: "Korean",
script: "Kore", script: "Kore",
}, },
{ {
value: "lao", value: "lo",
name: "Lao", name: "Lao",
script: "Laoo", script: "Laoo",
}, },
{ {
value: "lit", value: "lt",
name: "Lithuanian", name: "Lithuanian",
script: "Latn", script: "Latn",
}, },
{ {
value: "lug", value: "lg",
name: "Ganda", name: "Ganda",
script: "Latn", script: "Latn",
}, },
@@ -265,7 +264,7 @@ const supportedLanguages: LanguageOption[] = [
script: "Latn", script: "Latn",
}, },
{ {
value: "lvs", value: "lv",
name: "Standard Latvian", name: "Standard Latvian",
script: "Latn", script: "Latn",
}, },
@@ -275,22 +274,22 @@ const supportedLanguages: LanguageOption[] = [
script: "Deva", script: "Deva",
}, },
{ {
value: "mal", value: "ml",
name: "Malayalam", name: "Malayalam",
script: "Mlym", script: "Mlym",
}, },
{ {
value: "mar", value: "mr",
name: "Marathi", name: "Marathi",
script: "Deva", script: "Deva",
}, },
{ {
value: "mkd", value: "mk",
name: "Macedonian", name: "Macedonian",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "mlt", value: "mt",
name: "Maltese", name: "Maltese",
script: "Latn", script: "Latn",
}, },
@@ -300,42 +299,42 @@ const supportedLanguages: LanguageOption[] = [
script: "Beng", script: "Beng",
}, },
{ {
value: "mya", value: "my",
name: "Burmese", name: "Burmese",
script: "Mymr", script: "Mymr",
}, },
{ {
value: "nld", value: "nl",
name: "Dutch", name: "Dutch",
script: "Latn", script: "Latn",
}, },
{ {
value: "nno", value: "nn",
name: "Norwegian Nynorsk", name: "Norwegian Nynorsk",
script: "Latn", script: "Latn",
}, },
{ {
value: "nob", value: "nb",
name: "Norwegian Bokmål", name: "Norwegian Bokmål",
script: "Latn", script: "Latn",
}, },
{ {
value: "npi", value: "ne",
name: "Nepali", name: "Nepali",
script: "Deva", script: "Deva",
}, },
{ {
value: "nya", value: "ny",
name: "Nyanja", name: "Nyanja",
script: "Latn", script: "Latn",
}, },
{ {
value: "ory", value: "or",
name: "Odia", name: "Odia",
script: "Orya", script: "Orya",
}, },
{ {
value: "pan", value: "pa",
name: "Punjabi", name: "Punjabi",
script: "Guru", script: "Guru",
}, },
@@ -350,122 +349,122 @@ const supportedLanguages: LanguageOption[] = [
script: "Arab", script: "Arab",
}, },
{ {
value: "pol", value: "pl",
name: "Polish", name: "Polish",
script: "Latn", script: "Latn",
}, },
{ {
value: "por", value: "pt",
name: "Portuguese", name: "Portuguese",
script: "Latn", script: "Latn",
}, },
{ {
value: "ron", value: "ro",
name: "Romanian", name: "Romanian",
script: "Latn", script: "Latn",
}, },
{ {
value: "rus", value: "ru",
name: "Russian", name: "Russian",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "slk", value: "sk",
name: "Slovak", name: "Slovak",
script: "Latn", script: "Latn",
}, },
{ {
value: "slv", value: "sl",
name: "Slovenian", name: "Slovenian",
script: "Latn", script: "Latn",
}, },
{ {
value: "sna", value: "sn",
name: "Shona", name: "Shona",
script: "Latn", script: "Latn",
}, },
{ {
value: "snd", value: "sd",
name: "Sindhi", name: "Sindhi",
script: "Arab", script: "Arab",
}, },
{ {
value: "som", value: "so",
name: "Somali", name: "Somali",
script: "Latn", script: "Latn",
}, },
{ {
value: "spa", value: "es",
name: "Spanish", name: "Spanish",
script: "Latn", script: "Latn",
}, },
{ {
value: "srp", value: "sr",
name: "Serbian", name: "Serbian",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "swe", value: "sv",
name: "Swedish", name: "Swedish",
script: "Latn", script: "Latn",
}, },
{ {
value: "swh", value: "sw",
name: "Swahili", name: "Swahili",
script: "Latn", script: "Latn",
}, },
{ {
value: "tam", value: "ta",
name: "Tamil", name: "Tamil",
script: "Taml", script: "Taml",
}, },
{ {
value: "tel", value: "te",
name: "Telugu", name: "Telugu",
script: "Telu", script: "Telu",
}, },
{ {
value: "tgk", value: "tg",
name: "Tajik", name: "Tajik",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "tgl", value: "tl",
name: "Tagalog", name: "Tagalog",
script: "Latn", script: "Latn",
}, },
{ {
value: "tha", value: "th",
name: "Thai", name: "Thai",
script: "Thai", script: "Thai",
}, },
{ {
value: "tur", value: "tr",
name: "Turkish", name: "Turkish",
script: "Latn", script: "Latn",
}, },
{ {
value: "ukr", value: "uk",
name: "Ukrainian", name: "Ukrainian",
script: "Cyrl", script: "Cyrl",
}, },
{ {
value: "urd", value: "ur",
name: "Urdu", name: "Urdu",
script: "Arab", script: "Arab",
}, },
{ {
value: "uzn", value: "uz",
name: "Northern Uzbek", name: "Northern Uzbek",
script: "Latn", script: "Latn",
}, },
{ {
value: "vie", value: "vi",
name: "Vietnamese", name: "Vietnamese",
script: "Latn", script: "Latn",
}, },
{ {
value: "yor", value: "yo",
name: "Yoruba", name: "Yoruba",
script: "Latn", script: "Latn",
}, },
@@ -475,12 +474,12 @@ const supportedLanguages: LanguageOption[] = [
script: "Hant", script: "Hant",
}, },
{ {
value: "zsm", value: "ms",
name: "Standard Malay", name: "Standard Malay",
script: "Latn", script: "Latn",
}, },
{ {
value: "zul", value: "zu",
name: "Zulu", name: "Zulu",
script: "Latn", script: "Latn",
}, },

View File

@@ -25,7 +25,7 @@ const useCreateTranscript = (): CreateTranscript => {
const requestParameters: V1TranscriptsCreateRequest = { const requestParameters: V1TranscriptsCreateRequest = {
createTranscript: { createTranscript: {
name: params.name || "Weekly All-Hands", // Default name: params.name || "Weekly All-Hands", // Default
targetLanguage: params.targetLanguage || "eng", // Default targetLanguage: params.targetLanguage || "en", // Default
}, },
}; };