server: remove warmup, increase LLM timeout for now

This commit is contained in:
Mathieu Virbel
2023-08-11 19:56:39 +02:00
parent 82ce8202bd
commit 01806ce037
4 changed files with 9 additions and 5 deletions

View File

@@ -19,7 +19,7 @@ class ModalLLM(LLM):
response = await client.post(
self.llm_warmup_url,
headers=self.headers,
timeout=60**5,
timeout=60 * 5,
)
response.raise_for_status()

View File

@@ -39,7 +39,10 @@ class TranscriptTopicDetectorProcessor(Processor):
self.transcript = data
else:
self.transcript.merge(data)
if len(self.transcript.text) < self.min_transcript_length:
text_length = len(self.transcript.text)
required_length = self.min_transcript_length
if text_length <= required_length:
self.logger.info(f"Topic detector {text_length}/{required_length}")
return
await self.flush()
@@ -47,7 +50,7 @@ class TranscriptTopicDetectorProcessor(Processor):
if not self.transcript:
return
text = self.transcript.text
self.logger.info(f"Detect topic on {len(text)} length transcript")
self.logger.info(f"Topic detector got {len(text)} length transcript")
prompt = self.PROMPT.format(input_text=text)
result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger)
summary = TitleSummary(

View File

@@ -58,7 +58,7 @@ class Settings(BaseSettings):
LLM_OPENAI_KEY: str | None = None
LLM_OPENAI_MODEL: str = "gpt-3.5-turbo"
LLM_OPENAI_TEMPERATURE: float = 0.7
LLM_TIMEOUT: int = 90
LLM_TIMEOUT: int = 60 * 5 # take cold start into account
LLM_MAX_TOKENS: int = 1024
LLM_TEMPERATURE: float = 0.7

View File

@@ -159,7 +159,8 @@ async def rtc_offer_base(
TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary),
)
await ctx.pipeline.warmup()
# FIXME: warmup is not working well yet
# await ctx.pipeline.warmup()
# handle RTC peer connection
pc = RTCPeerConnection()