server: remove warmup, increase LLM timeout for now

This commit is contained in:
Mathieu Virbel
2023-08-11 19:56:39 +02:00
parent 82ce8202bd
commit 01806ce037
4 changed files with 9 additions and 5 deletions

View File

@@ -19,7 +19,7 @@ class ModalLLM(LLM):
response = await client.post( response = await client.post(
self.llm_warmup_url, self.llm_warmup_url,
headers=self.headers, headers=self.headers,
timeout=60**5, timeout=60 * 5,
) )
response.raise_for_status() response.raise_for_status()

View File

@@ -39,7 +39,10 @@ class TranscriptTopicDetectorProcessor(Processor):
self.transcript = data self.transcript = data
else: else:
self.transcript.merge(data) self.transcript.merge(data)
if len(self.transcript.text) < self.min_transcript_length: text_length = len(self.transcript.text)
required_length = self.min_transcript_length
if text_length <= required_length:
self.logger.info(f"Topic detector {text_length}/{required_length}")
return return
await self.flush() await self.flush()
@@ -47,7 +50,7 @@ class TranscriptTopicDetectorProcessor(Processor):
if not self.transcript: if not self.transcript:
return return
text = self.transcript.text text = self.transcript.text
self.logger.info(f"Detect topic on {len(text)} length transcript") self.logger.info(f"Topic detector got {len(text)} length transcript")
prompt = self.PROMPT.format(input_text=text) prompt = self.PROMPT.format(input_text=text)
result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger) result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger)
summary = TitleSummary( summary = TitleSummary(

View File

@@ -58,7 +58,7 @@ class Settings(BaseSettings):
LLM_OPENAI_KEY: str | None = None LLM_OPENAI_KEY: str | None = None
LLM_OPENAI_MODEL: str = "gpt-3.5-turbo" LLM_OPENAI_MODEL: str = "gpt-3.5-turbo"
LLM_OPENAI_TEMPERATURE: float = 0.7 LLM_OPENAI_TEMPERATURE: float = 0.7
LLM_TIMEOUT: int = 90 LLM_TIMEOUT: int = 60 * 5 # take cold start into account
LLM_MAX_TOKENS: int = 1024 LLM_MAX_TOKENS: int = 1024
LLM_TEMPERATURE: float = 0.7 LLM_TEMPERATURE: float = 0.7

View File

@@ -159,7 +159,8 @@ async def rtc_offer_base(
TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic), TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary), TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary),
) )
await ctx.pipeline.warmup() # FIXME: warmup is not working well yet
# await ctx.pipeline.warmup()
# handle RTC peer connection # handle RTC peer connection
pc = RTCPeerConnection() pc = RTCPeerConnection()