upgrade modal

2026-02-04 18:06:48 +00:00 · 2024-08-12 12:24:14 +02:00
parent a4077005b2
commit 004787c055
5 changed files with 36 additions and 31 deletions
--- a/server/gpu/modal_deployments/reflector_llm_zephyr.py
+++ b/server/gpu/modal_deployments/reflector_llm_zephyr.py
@@ -9,7 +9,7 @@ import threading
 from typing import Optional

 import modal
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter, exit

 # LLM
 LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
@@ -19,7 +19,7 @@ LLM_MAX_NEW_TOKENS: int = 300

 IMAGE_MODEL_DIR = "/root/llm_models/zephyr"

-stub = Stub(name="reflector-llm-zephyr")
+app = App(name="reflector-llm-zephyr")


 def download_llm():
@@ -64,7 +64,7 @@ llm_image = (
 )


-@stub.cls(
+@app.cls(
    gpu="A10G",
    timeout=60 * 5,
    container_idle_timeout=60 * 5,
@@ -72,7 +72,8 @@ llm_image = (
    image=llm_image,
 )
 class LLM:
-    def __enter__(self):
+    @enter()
+    def enter(self):
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

@@ -116,7 +117,8 @@ class LLM:
        self.GenerationConfig = GenerationConfig
        self.lock = threading.Lock()

-    def __exit__(self, *args):
+    @exit()
+    def exit():
        print("Exit llm")

    @method()
@@ -169,7 +171,7 @@ class LLM:
 # -------------------------------------------------------------------


-@stub.function(
+@app.function(
    container_idle_timeout=60 * 10,
    timeout=60 * 5,
    allow_concurrent_inputs=30,