mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
upgrade modal
This commit is contained in:
@@ -9,7 +9,7 @@ import threading
|
||||
from typing import Optional
|
||||
|
||||
import modal
|
||||
from modal import Image, Secret, Stub, asgi_app, method
|
||||
from modal import Image, Secret, App, asgi_app, method, enter, exit
|
||||
|
||||
# LLM
|
||||
LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
|
||||
@@ -19,7 +19,7 @@ LLM_MAX_NEW_TOKENS: int = 300
|
||||
|
||||
IMAGE_MODEL_DIR = "/root/llm_models/zephyr"
|
||||
|
||||
stub = Stub(name="reflector-llm-zephyr")
|
||||
app = App(name="reflector-llm-zephyr")
|
||||
|
||||
|
||||
def download_llm():
|
||||
@@ -64,7 +64,7 @@ llm_image = (
|
||||
)
|
||||
|
||||
|
||||
@stub.cls(
|
||||
@app.cls(
|
||||
gpu="A10G",
|
||||
timeout=60 * 5,
|
||||
container_idle_timeout=60 * 5,
|
||||
@@ -72,7 +72,8 @@ llm_image = (
|
||||
image=llm_image,
|
||||
)
|
||||
class LLM:
|
||||
def __enter__(self):
|
||||
@enter()
|
||||
def enter(self):
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
||||
|
||||
@@ -116,7 +117,8 @@ class LLM:
|
||||
self.GenerationConfig = GenerationConfig
|
||||
self.lock = threading.Lock()
|
||||
|
||||
def __exit__(self, *args):
|
||||
@exit()
|
||||
def exit():
|
||||
print("Exit llm")
|
||||
|
||||
@method()
|
||||
@@ -169,7 +171,7 @@ class LLM:
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
|
||||
@stub.function(
|
||||
@app.function(
|
||||
container_idle_timeout=60 * 10,
|
||||
timeout=60 * 5,
|
||||
allow_concurrent_inputs=30,
|
||||
|
||||
Reference in New Issue
Block a user