From 012390d0aac7fe5bc88f390a7ee358a3c2d42132 Mon Sep 17 00:00:00 2001 From: Gokul Mohanarangan Date: Wed, 30 Aug 2023 10:43:51 +0530 Subject: [PATCH] backup --- server/gpu/modal/reflector_llm.py | 2 +- server/gpu/modal/reflector_transcriber.py | 42 +++++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/server/gpu/modal/reflector_llm.py b/server/gpu/modal/reflector_llm.py index 1a3f77d6..89580466 100644 --- a/server/gpu/modal/reflector_llm.py +++ b/server/gpu/modal/reflector_llm.py @@ -38,7 +38,7 @@ def migrate_cache_llm(): from transformers.utils.hub import move_cache print("Moving LLM cache") - move_cache() + move_cache(cache_dir=IMAGE_MODEL_DIR) print("LLM cache moved") diff --git a/server/gpu/modal/reflector_transcriber.py b/server/gpu/modal/reflector_transcriber.py index f06706c8..84b24bb7 100644 --- a/server/gpu/modal/reflector_transcriber.py +++ b/server/gpu/modal/reflector_transcriber.py @@ -13,19 +13,40 @@ from pydantic import BaseModel WHISPER_MODEL: str = "large-v2" WHISPER_COMPUTE_TYPE: str = "float16" WHISPER_NUM_WORKERS: int = 1 -WHISPER_CACHE_DIR: str = "/cache/whisper" + +MODEL_DIR = "/model" # Translation Model TRANSLATION_MODEL = "facebook/m2m100_418M" -stub = Stub(name="reflector-transcriber") +stub = Stub(name="reflector-transtest") -def download_whisper(): +def download_models(): from faster_whisper.utils import download_model + from huggingface_hub import snapshot_download - download_model(WHISPER_MODEL, local_files_only=False) + print("Downloading Whisper model") + download_model(WHISPER_MODEL) + print("Whisper model downloaded") + print("Downloading Translation model") + ignore_patterns = ["*.ot"] + snapshot_download(TRANSLATION_MODEL, cache_dir=MODEL_DIR, ignore_patterns=ignore_patterns) + print("Translation model downloaded") + +def migrate_cache_llm(): + """ + XXX The cache for model files in Transformers v4.22.0 has been updated. + Migrating your old cache. This is a one-time only operation. You can + interrupt this and resume the migration later on by calling + `transformers.utils.move_cache()`. + """ + from transformers.utils.hub import move_cache + + print("Moving LLM cache") + move_cache() + print("LLM cache moved") whisper_image = ( Image.debian_slim(python_version="3.10.8") @@ -38,7 +59,8 @@ whisper_image = ( "sentencepiece", "protobuf", ) - .run_function(download_whisper) + .run_function(download_models) + .run_function(migrate_cache_llm) .env( { "LD_LIBRARY_PATH": ( @@ -69,8 +91,14 @@ class Whisper: compute_type=WHISPER_COMPUTE_TYPE, num_workers=WHISPER_NUM_WORKERS, ) - self.translation_model = M2M100ForConditionalGeneration.from_pretrained(TRANSLATION_MODEL).to(self.device) - self.translation_tokenizer = M2M100Tokenizer.from_pretrained(TRANSLATION_MODEL) + self.translation_model = M2M100ForConditionalGeneration.from_pretrained( + TRANSLATION_MODEL, + cache_dir=TRANSCRIPTION_MODEL_DIR + ).to(self.device) + self.translation_tokenizer = M2M100Tokenizer.from_pretrained( + TRANSLATION_MODEL, + cache_dir=TRANSCRIPTION_MODEL_DIR + ) @method()