server: implement warmup event for llm and transcription

This commit is contained in:
Mathieu Virbel
2023-08-11 15:32:41 +02:00
parent a2518df3bd
commit 38a5ee0da2
8 changed files with 85 additions and 5 deletions

View File

@@ -9,10 +9,20 @@ class ModalLLM(LLM):
super().__init__()
self.timeout = settings.LLM_TIMEOUT
self.llm_url = settings.LLM_URL + "/llm"
self.llm_warmup_url = settings.LLM_URL + "/warmup"
self.headers = {
"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
}
async def _warmup(self, logger):
async with httpx.AsyncClient() as client:
response = await client.post(
self.llm_warmup_url,
headers=self.headers,
timeout=self.timeout,
)
response.raise_for_status()
async def _generate(self, prompt: str, **kwargs):
async with httpx.AsyncClient() as client:
response = await retry(client.post)(