From 628c69f81c69ecb69aa110f42707bc804e6adc4b Mon Sep 17 00:00:00 2001 From: projects-g <63178974+projects-g@users.noreply.github.com> Date: Fri, 13 Oct 2023 22:01:21 +0530 Subject: [PATCH] Separate out transcription and translation into own Modal deployments (#268) * abstract transcript/translate into separate GPU apps * update app names * update transformers library version * update env.example file --- server/env.example | 1 + server/gpu/modal/reflector_transcriber.py | 138 +--------- server/gpu/modal/reflector_translator.py | 237 ++++++++++++++++++ .../processors/transcript_translator.py | 6 +- server/reflector/settings.py | 4 + 5 files changed, 246 insertions(+), 140 deletions(-) create mode 100644 server/gpu/modal/reflector_translator.py diff --git a/server/env.example b/server/env.example index 4e9b7311..8c4dcdab 100644 --- a/server/env.example +++ b/server/env.example @@ -48,6 +48,7 @@ ## Using serverless modal.com (require reflector-gpu-modal deployed) #TRANSCRIPT_BACKEND=modal #TRANSCRIPT_URL=https://xxxxx--reflector-transcriber-web.modal.run +#TRANSLATE_URL=https://xxxxx--reflector-translator-web.modal.run #TRANSCRIPT_MODAL_API_KEY=xxxxx ## Using serverless banana.dev (require reflector-gpu-banana deployed) diff --git a/server/gpu/modal/reflector_transcriber.py b/server/gpu/modal/reflector_transcriber.py index 3404cfe4..69558c8e 100644 --- a/server/gpu/modal/reflector_transcriber.py +++ b/server/gpu/modal/reflector_transcriber.py @@ -14,34 +14,12 @@ WHISPER_MODEL: str = "large-v2" WHISPER_COMPUTE_TYPE: str = "float16" WHISPER_NUM_WORKERS: int = 1 -# Seamless M4T -SEAMLESSM4T_MODEL_SIZE: str = "medium" -SEAMLESSM4T_MODEL_CARD_NAME: str = f"seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}" -SEAMLESSM4T_VOCODER_CARD_NAME: str = "vocoder_36langs" - -HF_SEAMLESS_M4TEPO: str = f"facebook/seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}" -HF_SEAMLESS_M4T_VOCODEREPO: str = "facebook/seamless-m4t-vocoder" - -SEAMLESS_GITEPO: str = "https://github.com/facebookresearch/seamless_communication.git" -SEAMLESS_MODEL_DIR: str = "m4t" WHISPER_MODEL_DIR = "/root/transcription_models" stub = Stub(name="reflector-transcriber") -def install_seamless_communication(): - import os - import subprocess - initial_dir = os.getcwd() - subprocess.run(["ssh-keyscan", "-t", "rsa", "github.com", ">>", "~/.ssh/known_hosts"]) - subprocess.run(["rm", "-rf", "seamless_communication"]) - subprocess.run(["git", "clone", SEAMLESS_GITEPO, "." + "/seamless_communication"]) - os.chdir("seamless_communication") - subprocess.run(["pip", "install", "-e", "."]) - os.chdir(initial_dir) - - def download_whisper(): from faster_whisper.utils import download_model @@ -50,18 +28,6 @@ def download_whisper(): print("Whisper model downloaded") -def download_seamlessm4t_model(): - from huggingface_hub import snapshot_download - - print("Downloading Transcriber model & tokenizer") - snapshot_download(HF_SEAMLESS_M4TEPO, cache_dir=SEAMLESS_MODEL_DIR) - print("Transcriber model & tokenizer downloaded") - - print("Downloading vocoder weights") - snapshot_download(HF_SEAMLESS_M4T_VOCODEREPO, cache_dir=SEAMLESS_MODEL_DIR) - print("Vocoder weights downloaded") - - def migrate_cache_llm(): """ XXX The cache for model files in Transformers v4.22.0 has been updated. @@ -76,52 +42,6 @@ def migrate_cache_llm(): print("LLM cache moved") -def configure_seamless_m4t(): - import os - - import yaml - - ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards" - - with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'r') as file: - model_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'r') as file: - vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'r') as file: - unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'r') as file: - unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader) - - model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots" - available_model_versions = os.listdir(model_dir) - latest_model_version = sorted(available_model_versions)[-1] - model_name = f"multitask_unity_{SEAMLESSM4T_MODEL_SIZE}.pt" - model_path = os.path.join(os.getcwd(), model_dir, latest_model_version, model_name) - - vocoder_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-vocoder/snapshots" - available_vocoder_versions = os.listdir(vocoder_dir) - latest_vocoder_version = sorted(available_vocoder_versions)[-1] - vocoder_name = "vocoder_36langs.pt" - vocoder_path = os.path.join(os.getcwd(), vocoder_dir, latest_vocoder_version, vocoder_name) - - tokenizer_name = "tokenizer.model" - tokenizer_path = os.path.join(os.getcwd(), model_dir, latest_model_version, tokenizer_name) - - model_yaml_data['checkpoint'] = f"file:/{model_path}" - vocoder_yaml_data['checkpoint'] = f"file:/{vocoder_path}" - unity_100_yaml_data['tokenizer'] = f"file:/{tokenizer_path}" - unity_200_yaml_data['tokenizer'] = f"file:/{tokenizer_path}" - - with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'w') as file: - yaml.dump(model_yaml_data, file) - with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'w') as file: - yaml.dump(vocoder_yaml_data, file) - with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'w') as file: - yaml.dump(unity_100_yaml_data, file) - with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'w') as file: - yaml.dump(unity_200_yaml_data, file) - - transcriber_image = ( Image.debian_slim(python_version="3.10.8") .apt_install("git") @@ -131,7 +51,7 @@ transcriber_image = ( "faster-whisper", "requests", "torch", - "transformers", + "transformers==4.34.0", "sentencepiece", "protobuf", "huggingface_hub==0.16.4", @@ -141,9 +61,6 @@ transcriber_image = ( "pyyaml", "hf-transfer~=0.1" ) - .run_function(install_seamless_communication) - .run_function(download_seamlessm4t_model) - .run_function(configure_seamless_m4t) .run_function(download_whisper) .run_function(migrate_cache_llm) .env( @@ -167,7 +84,6 @@ class Transcriber: def __enter__(self): import faster_whisper import torch - from seamless_communication.models.inference.translator import Translator self.use_gpu = torch.cuda.is_available() self.device = "cuda" if self.use_gpu else "cpu" @@ -178,12 +94,6 @@ class Transcriber: num_workers=WHISPER_NUM_WORKERS, download_root=WHISPER_MODEL_DIR ) - self.translator = Translator( - SEAMLESSM4T_MODEL_CARD_NAME, - SEAMLESSM4T_VOCODER_CARD_NAME, - torch.device(self.device), - dtype=torch.float32 - ) @method() def transcribe_segment( @@ -229,38 +139,6 @@ class Transcriber: "words": words } - def get_seamless_lang_code(self, lang_code: str): - """ - The codes for SeamlessM4T is different from regular standards. - For ex, French is "fra" and not "fr". - """ - # TODO: Enhance with complete list of lang codes - seamless_lang_code = { - "en": "eng", - "fr": "fra" - } - return seamless_lang_code.get(lang_code, "eng") - - @method() - def translate_text( - self, - text: str, - source_language: str, - target_language: str - ): - translated_text, _, _ = self.translator.predict( - text, - "t2tt", - src_lang=self.get_seamless_lang_code(source_language), - tgt_lang=self.get_seamless_lang_code(target_language), - ngram_filtering=True - ) - return { - "text": { - source_language: text, - target_language: str(translated_text) - } - } # ------------------------------------------------------------------- # Web API # ------------------------------------------------------------------- @@ -316,18 +194,4 @@ def web(): result = func.get() return result - @app.post("/translate", dependencies=[Depends(apikey_auth)]) - async def translate( - text: str, - source_language: Annotated[str, Body(...)] = "en", - target_language: Annotated[str, Body(...)] = "fr", - ) -> TranscriptResponse: - func = transcriberstub.translate_text.spawn( - text=text, - source_language=source_language, - target_language=target_language, - ) - result = func.get() - return result - return app diff --git a/server/gpu/modal/reflector_translator.py b/server/gpu/modal/reflector_translator.py new file mode 100644 index 00000000..69ea719a --- /dev/null +++ b/server/gpu/modal/reflector_translator.py @@ -0,0 +1,237 @@ +""" +Reflector GPU backend - transcriber +=================================== +""" + +import os +import tempfile + +from modal import Image, Secret, Stub, asgi_app, method +from pydantic import BaseModel + +# Seamless M4T +SEAMLESSM4T_MODEL_SIZE: str = "medium" +SEAMLESSM4T_MODEL_CARD_NAME: str = f"seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}" +SEAMLESSM4T_VOCODER_CARD_NAME: str = "vocoder_36langs" + +HF_SEAMLESS_M4TEPO: str = f"facebook/seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}" +HF_SEAMLESS_M4T_VOCODEREPO: str = "facebook/seamless-m4t-vocoder" + +SEAMLESS_GITEPO: str = "https://github.com/facebookresearch/seamless_communication.git" +SEAMLESS_MODEL_DIR: str = "m4t" + +stub = Stub(name="reflector-translator") + + +def install_seamless_communication(): + import os + import subprocess + initial_dir = os.getcwd() + subprocess.run(["ssh-keyscan", "-t", "rsa", "github.com", ">>", "~/.ssh/known_hosts"]) + subprocess.run(["rm", "-rf", "seamless_communication"]) + subprocess.run(["git", "clone", SEAMLESS_GITEPO, "." + "/seamless_communication"]) + os.chdir("seamless_communication") + subprocess.run(["pip", "install", "-e", "."]) + os.chdir(initial_dir) + + +def download_seamlessm4t_model(): + from huggingface_hub import snapshot_download + + print("Downloading Transcriber model & tokenizer") + snapshot_download(HF_SEAMLESS_M4TEPO, cache_dir=SEAMLESS_MODEL_DIR) + print("Transcriber model & tokenizer downloaded") + + print("Downloading vocoder weights") + snapshot_download(HF_SEAMLESS_M4T_VOCODEREPO, cache_dir=SEAMLESS_MODEL_DIR) + print("Vocoder weights downloaded") + + +def configure_seamless_m4t(): + import os + + import yaml + + ASSETS_DIR: str = "./seamless_communication/src/seamless_communication/assets/cards" + + with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'r') as file: + model_yaml_data = yaml.load(file, Loader=yaml.FullLoader) + with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'r') as file: + vocoder_yaml_data = yaml.load(file, Loader=yaml.FullLoader) + with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'r') as file: + unity_100_yaml_data = yaml.load(file, Loader=yaml.FullLoader) + with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'r') as file: + unity_200_yaml_data = yaml.load(file, Loader=yaml.FullLoader) + + model_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-{SEAMLESSM4T_MODEL_SIZE}/snapshots" + available_model_versions = os.listdir(model_dir) + latest_model_version = sorted(available_model_versions)[-1] + model_name = f"multitask_unity_{SEAMLESSM4T_MODEL_SIZE}.pt" + model_path = os.path.join(os.getcwd(), model_dir, latest_model_version, model_name) + + vocoder_dir = f"{SEAMLESS_MODEL_DIR}/models--facebook--seamless-m4t-vocoder/snapshots" + available_vocoder_versions = os.listdir(vocoder_dir) + latest_vocoder_version = sorted(available_vocoder_versions)[-1] + vocoder_name = "vocoder_36langs.pt" + vocoder_path = os.path.join(os.getcwd(), vocoder_dir, latest_vocoder_version, vocoder_name) + + tokenizer_name = "tokenizer.model" + tokenizer_path = os.path.join(os.getcwd(), model_dir, latest_model_version, tokenizer_name) + + model_yaml_data['checkpoint'] = f"file:/{model_path}" + vocoder_yaml_data['checkpoint'] = f"file:/{vocoder_path}" + unity_100_yaml_data['tokenizer'] = f"file:/{tokenizer_path}" + unity_200_yaml_data['tokenizer'] = f"file:/{tokenizer_path}" + + with open(f'{ASSETS_DIR}/seamlessM4T_{SEAMLESSM4T_MODEL_SIZE}.yaml', 'w') as file: + yaml.dump(model_yaml_data, file) + with open(f'{ASSETS_DIR}/vocoder_36langs.yaml', 'w') as file: + yaml.dump(vocoder_yaml_data, file) + with open(f'{ASSETS_DIR}/unity_nllb-100.yaml', 'w') as file: + yaml.dump(unity_100_yaml_data, file) + with open(f'{ASSETS_DIR}/unity_nllb-200.yaml', 'w') as file: + yaml.dump(unity_200_yaml_data, file) + + +transcriber_image = ( + Image.debian_slim(python_version="3.10.8") + .apt_install("git") + .apt_install("wget") + .apt_install("libsndfile-dev") + .pip_install( + "requests", + "torch", + "transformers==4.34.0", + "sentencepiece", + "protobuf", + "huggingface_hub==0.16.4", + "gitpython", + "torchaudio", + "fairseq2", + "pyyaml", + "hf-transfer~=0.1" + ) + .run_function(install_seamless_communication) + .run_function(download_seamlessm4t_model) + .run_function(configure_seamless_m4t) + .env( + { + "LD_LIBRARY_PATH": ( + "/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib/:" + "/opt/conda/lib/python3.10/site-packages/nvidia/cublas/lib/" + ) + } + ) +) + + +@stub.cls( + gpu="A10G", + timeout=60 * 5, + container_idle_timeout=60 * 5, + image=transcriber_image, +) +class Translator: + def __enter__(self): + import torch + from seamless_communication.models.inference.translator import Translator + + self.use_gpu = torch.cuda.is_available() + self.device = "cuda" if self.use_gpu else "cpu" + self.translator = Translator( + SEAMLESSM4T_MODEL_CARD_NAME, + SEAMLESSM4T_VOCODER_CARD_NAME, + torch.device(self.device), + dtype=torch.float32 + ) + + @method() + def warmup(self): + return {"status": "ok"} + + def get_seamless_lang_code(self, lang_code: str): + """ + The codes for SeamlessM4T is different from regular standards. + For ex, French is "fra" and not "fr". + """ + # TODO: Enhance with complete list of lang codes + seamless_lang_code = { + "en": "eng", + "fr": "fra" + } + return seamless_lang_code.get(lang_code, "eng") + + @method() + def translate_text( + self, + text: str, + source_language: str, + target_language: str + ): + translated_text, _, _ = self.translator.predict( + text, + "t2tt", + src_lang=self.get_seamless_lang_code(source_language), + tgt_lang=self.get_seamless_lang_code(target_language), + ngram_filtering=True + ) + return { + "text": { + source_language: text, + target_language: str(translated_text) + } + } +# ------------------------------------------------------------------- +# Web API +# ------------------------------------------------------------------- + + +@stub.function( + container_idle_timeout=60, + timeout=60, + secrets=[ + Secret.from_name("reflector-gpu"), + ], +) +@asgi_app() +def web(): + from fastapi import Body, Depends, FastAPI, HTTPException, status + from fastapi.security import OAuth2PasswordBearer + from typing_extensions import Annotated + + translatorstub = Translator() + + app = FastAPI() + + oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") + + def apikey_auth(apikey: str = Depends(oauth2_scheme)): + if apikey != os.environ["REFLECTOR_GPU_APIKEY"]: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid API key", + headers={"WWW-Authenticate": "Bearer"}, + ) + + class TranslateResponse(BaseModel): + result: dict + + @app.post("/translate", dependencies=[Depends(apikey_auth)]) + async def translate( + text: str, + source_language: Annotated[str, Body(...)] = "en", + target_language: Annotated[str, Body(...)] = "fr", + ) -> TranslateResponse: + func = translatorstub.translate_text.spawn( + text=text, + source_language=source_language, + target_language=target_language, + ) + result = func.get() + return result + + @app.post("/warmup", dependencies=[Depends(apikey_auth)]) + async def warmup(): + return translatorstub.warmup.spawn().get() + + return app diff --git a/server/reflector/processors/transcript_translator.py b/server/reflector/processors/transcript_translator.py index ae2c68e1..77b8f5be 100644 --- a/server/reflector/processors/transcript_translator.py +++ b/server/reflector/processors/transcript_translator.py @@ -16,8 +16,8 @@ class TranscriptTranslatorProcessor(Processor): def __init__(self, **kwargs): super().__init__(**kwargs) - self.transcript_url = settings.TRANSCRIPT_URL - self.timeout = settings.TRANSCRIPT_TIMEOUT + self.translate_url = settings.TRANSLATE_URL + self.timeout = settings.TRANSLATE_TIMEOUT self.headers = {"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}"} async def _push(self, data: Transcript): @@ -46,7 +46,7 @@ class TranscriptTranslatorProcessor(Processor): async with httpx.AsyncClient() as client: response = await retry(client.post)( - settings.TRANSCRIPT_URL + "/translate", + self.translate_url + "/translate", headers=self.headers, params=json_payload, timeout=self.timeout, diff --git a/server/reflector/settings.py b/server/reflector/settings.py index 4cec6b96..fa2d1296 100644 --- a/server/reflector/settings.py +++ b/server/reflector/settings.py @@ -38,6 +38,10 @@ class Settings(BaseSettings): TRANSCRIPT_URL: str | None = None TRANSCRIPT_TIMEOUT: int = 90 + # Translate into the target language + TRANSLATE_URL: str | None = None + TRANSLATE_TIMEOUT: int = 90 + # Audio transcription banana.dev configuration TRANSCRIPT_BANANA_API_KEY: str | None = None TRANSCRIPT_BANANA_MODEL_KEY: str | None = None