server: remove warmup methods everywhere

This commit is contained in:
2023-10-05 23:44:50 +02:00
committed by Mathieu Virbel
parent 50b89ecdda
commit 47f7e1836e
11 changed files with 2 additions and 134 deletions

View File

@@ -1,17 +1,15 @@
import importlib
import json
import re
from time import monotonic
from typing import TypeVar
import nltk
from prometheus_client import Counter, Histogram
from transformers import GenerationConfig
from reflector.llm.llm_params import TaskParams
from reflector.logger import logger as reflector_logger
from reflector.settings import settings
from reflector.utils.retry import retry
from transformers import GenerationConfig
T = TypeVar("T", bound="LLM")
@@ -112,20 +110,6 @@ class LLM:
self.m_generate_success = self.m_generate_success.labels(name)
self.m_generate_failure = self.m_generate_failure.labels(name)
async def warmup(self, logger: reflector_logger):
start = monotonic()
name = self.__class__.__name__
logger.info(f"LLM[{name}] warming up...")
try:
await self._warmup(logger=logger)
duration = monotonic() - start
logger.info(f"LLM[{name}] warmup took {duration:.2f} seconds")
except Exception:
logger.exception(f"LLM[{name}] warmup failed, ignoring")
async def _warmup(self, logger: reflector_logger):
pass
@property
def tokenizer(self):
"""

View File

@@ -1,10 +1,9 @@
import httpx
from transformers import AutoTokenizer, GenerationConfig
from reflector.llm.base import LLM
from reflector.logger import logger as reflector_logger
from reflector.settings import settings
from reflector.utils.retry import retry
from transformers import AutoTokenizer, GenerationConfig
class ModalLLM(LLM):
@@ -12,7 +11,6 @@ class ModalLLM(LLM):
super().__init__()
self.timeout = settings.LLM_TIMEOUT
self.llm_url = settings.LLM_URL + "/llm"
self.llm_warmup_url = settings.LLM_URL + "/warmup"
self.headers = {
"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
}
@@ -27,15 +25,6 @@ class ModalLLM(LLM):
# Replace this with a HTTP call
return ["lmsys/vicuna-13b-v1.5"]
async def _warmup(self, logger):
async with httpx.AsyncClient() as client:
response = await client.post(
self.llm_warmup_url,
headers=self.headers,
timeout=60 * 5,
)
response.raise_for_status()
async def _generate(
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
):