From 5cb132cac7bc6a363e8e1ffce7c70841e9113111 Mon Sep 17 00:00:00 2001 From: projects-g <63178974+projects-g@users.noreply.github.com> Date: Wed, 8 Nov 2023 22:02:48 +0530 Subject: [PATCH] fix loading shards from local cache (#313) --- server/gpu/modal/reflector_llm.py | 6 ++++-- server/gpu/modal/reflector_llm_zephyr.py | 8 +++++--- server/gpu/modal/reflector_transcriber.py | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/server/gpu/modal/reflector_llm.py b/server/gpu/modal/reflector_llm.py index 02feedb7..f1e9d166 100644 --- a/server/gpu/modal/reflector_llm.py +++ b/server/gpu/modal/reflector_llm.py @@ -81,7 +81,8 @@ class LLM: LLM_MODEL, torch_dtype=getattr(torch, LLM_TORCH_DTYPE), low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE, - cache_dir=IMAGE_MODEL_DIR + cache_dir=IMAGE_MODEL_DIR, + local_files_only=True ) # JSONFormer doesn't yet support generation configs @@ -96,7 +97,8 @@ class LLM: print("Instance llm tokenizer") tokenizer = AutoTokenizer.from_pretrained( LLM_MODEL, - cache_dir=IMAGE_MODEL_DIR + cache_dir=IMAGE_MODEL_DIR, + local_files_only=True ) # move model to gpu diff --git a/server/gpu/modal/reflector_llm_zephyr.py b/server/gpu/modal/reflector_llm_zephyr.py index cbb436b0..b101f5f2 100644 --- a/server/gpu/modal/reflector_llm_zephyr.py +++ b/server/gpu/modal/reflector_llm_zephyr.py @@ -17,7 +17,7 @@ LLM_LOW_CPU_MEM_USAGE: bool = True LLM_TORCH_DTYPE: str = "bfloat16" LLM_MAX_NEW_TOKENS: int = 300 -IMAGE_MODEL_DIR = "/root/llm_models" +IMAGE_MODEL_DIR = "/root/llm_models/zephyr" stub = Stub(name="reflector-llm-zephyr") @@ -81,7 +81,8 @@ class LLM: LLM_MODEL, torch_dtype=getattr(torch, LLM_TORCH_DTYPE), low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE, - cache_dir=IMAGE_MODEL_DIR + cache_dir=IMAGE_MODEL_DIR, + local_files_only=True ) # JSONFormer doesn't yet support generation configs @@ -96,7 +97,8 @@ class LLM: print("Instance llm tokenizer") tokenizer = AutoTokenizer.from_pretrained( LLM_MODEL, - cache_dir=IMAGE_MODEL_DIR + cache_dir=IMAGE_MODEL_DIR, + local_files_only=True ) gen_cfg.pad_token_id = tokenizer.eos_token_id gen_cfg.eos_token_id = tokenizer.eos_token_id diff --git a/server/gpu/modal/reflector_transcriber.py b/server/gpu/modal/reflector_transcriber.py index bee9ccd1..4f746ded 100644 --- a/server/gpu/modal/reflector_transcriber.py +++ b/server/gpu/modal/reflector_transcriber.py @@ -95,7 +95,8 @@ class Transcriber: device=self.device, compute_type=WHISPER_COMPUTE_TYPE, num_workers=WHISPER_NUM_WORKERS, - download_root=WHISPER_MODEL_DIR + download_root=WHISPER_MODEL_DIR, + local_files_only=True ) @method()