keep models in cache and load from cache

This commit is contained in:
Gokul Mohanarangan
2023-09-08 10:05:17 +05:30
parent 2bed312e64
commit 9a7b89adaa
2 changed files with 19 additions and 7 deletions

View File

@@ -17,9 +17,8 @@ LLM_TORCH_DTYPE: str = "bfloat16"
LLM_MAX_NEW_TOKENS: int = 300 LLM_MAX_NEW_TOKENS: int = 300
IMAGE_MODEL_DIR = "/root/llm_models" IMAGE_MODEL_DIR = "/root/llm_models"
volume = modal.NetworkFileSystem.persisted("reflector-llm-models")
stub = Stub(name="reflector-llmtest1") stub = Stub(name="reflector-llm")
def download_llm(): def download_llm():
@@ -40,7 +39,7 @@ def migrate_cache_llm():
from transformers.utils.hub import move_cache from transformers.utils.hub import move_cache
print("Moving LLM cache") print("Moving LLM cache")
move_cache() move_cache(cache_dir=IMAGE_MODEL_DIR, new_cache_dir=IMAGE_MODEL_DIR)
print("LLM cache moved") print("LLM cache moved")
@@ -60,6 +59,7 @@ llm_image = (
) )
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
.run_function(download_llm) .run_function(download_llm)
.run_function(migrate_cache_llm)
) )
@@ -69,7 +69,6 @@ llm_image = (
container_idle_timeout=60 * 5, container_idle_timeout=60 * 5,
concurrency_limit=2, concurrency_limit=2,
image=llm_image, image=llm_image,
network_file_systems={IMAGE_MODEL_DIR: volume},
) )
class LLM: class LLM:
def __enter__(self): def __enter__(self):

View File

@@ -19,9 +19,8 @@ WHISPER_NUM_WORKERS: int = 1
TRANSLATION_MODEL = "facebook/m2m100_418M" TRANSLATION_MODEL = "facebook/m2m100_418M"
IMAGE_MODEL_DIR = "/root/transcription_models" IMAGE_MODEL_DIR = "/root/transcription_models"
volume = modal.NetworkFileSystem.persisted("reflector-transcribe-models")
stub = Stub(name="reflector-transtest1") stub = Stub(name="reflector-transcriber")
def download_whisper(cache_dir: str | None = None): def download_whisper(cache_dir: str | None = None):
@@ -52,6 +51,20 @@ def download_models():
print(f"Model downloads complete.") print(f"Model downloads complete.")
def migrate_cache_llm():
"""
XXX The cache for model files in Transformers v4.22.0 has been updated.
Migrating your old cache. This is a one-time only operation. You can
interrupt this and resume the migration later on by calling
`transformers.utils.move_cache()`.
"""
from transformers.utils.hub import move_cache
print("Moving LLM cache")
move_cache(cache_dir=IMAGE_MODEL_DIR, new_cache_dir=IMAGE_MODEL_DIR)
print("LLM cache moved")
whisper_image = ( whisper_image = (
Image.debian_slim(python_version="3.10.8") Image.debian_slim(python_version="3.10.8")
.apt_install("git") .apt_install("git")
@@ -65,6 +78,7 @@ whisper_image = (
"huggingface_hub==0.16.4", "huggingface_hub==0.16.4",
) )
.run_function(download_models) .run_function(download_models)
.run_function(migrate_cache_llm)
.env( .env(
{ {
"LD_LIBRARY_PATH": ( "LD_LIBRARY_PATH": (
@@ -80,7 +94,6 @@ whisper_image = (
gpu="A10G", gpu="A10G",
container_idle_timeout=60, container_idle_timeout=60,
image=whisper_image, image=whisper_image,
network_file_systems={IMAGE_MODEL_DIR: volume},
) )
class Whisper: class Whisper:
def __enter__(self): def __enter__(self):