mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 12:49:06 +00:00
server: remove warmup methods everywhere
This commit is contained in:
@@ -1,17 +1,15 @@
|
||||
import importlib
|
||||
import json
|
||||
import re
|
||||
from time import monotonic
|
||||
from typing import TypeVar
|
||||
|
||||
import nltk
|
||||
from prometheus_client import Counter, Histogram
|
||||
from transformers import GenerationConfig
|
||||
|
||||
from reflector.llm.llm_params import TaskParams
|
||||
from reflector.logger import logger as reflector_logger
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.retry import retry
|
||||
from transformers import GenerationConfig
|
||||
|
||||
T = TypeVar("T", bound="LLM")
|
||||
|
||||
@@ -112,20 +110,6 @@ class LLM:
|
||||
self.m_generate_success = self.m_generate_success.labels(name)
|
||||
self.m_generate_failure = self.m_generate_failure.labels(name)
|
||||
|
||||
async def warmup(self, logger: reflector_logger):
|
||||
start = monotonic()
|
||||
name = self.__class__.__name__
|
||||
logger.info(f"LLM[{name}] warming up...")
|
||||
try:
|
||||
await self._warmup(logger=logger)
|
||||
duration = monotonic() - start
|
||||
logger.info(f"LLM[{name}] warmup took {duration:.2f} seconds")
|
||||
except Exception:
|
||||
logger.exception(f"LLM[{name}] warmup failed, ignoring")
|
||||
|
||||
async def _warmup(self, logger: reflector_logger):
|
||||
pass
|
||||
|
||||
@property
|
||||
def tokenizer(self):
|
||||
"""
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import httpx
|
||||
from transformers import AutoTokenizer, GenerationConfig
|
||||
|
||||
from reflector.llm.base import LLM
|
||||
from reflector.logger import logger as reflector_logger
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.retry import retry
|
||||
from transformers import AutoTokenizer, GenerationConfig
|
||||
|
||||
|
||||
class ModalLLM(LLM):
|
||||
@@ -12,7 +11,6 @@ class ModalLLM(LLM):
|
||||
super().__init__()
|
||||
self.timeout = settings.LLM_TIMEOUT
|
||||
self.llm_url = settings.LLM_URL + "/llm"
|
||||
self.llm_warmup_url = settings.LLM_URL + "/warmup"
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
|
||||
}
|
||||
@@ -27,15 +25,6 @@ class ModalLLM(LLM):
|
||||
# Replace this with a HTTP call
|
||||
return ["lmsys/vicuna-13b-v1.5"]
|
||||
|
||||
async def _warmup(self, logger):
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
self.llm_warmup_url,
|
||||
headers=self.headers,
|
||||
timeout=60 * 5,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
async def _generate(
|
||||
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user