mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
server: remove warmup, increase LLM timeout for now
This commit is contained in:
@@ -19,7 +19,7 @@ class ModalLLM(LLM):
|
||||
response = await client.post(
|
||||
self.llm_warmup_url,
|
||||
headers=self.headers,
|
||||
timeout=60**5,
|
||||
timeout=60 * 5,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
@@ -39,7 +39,10 @@ class TranscriptTopicDetectorProcessor(Processor):
|
||||
self.transcript = data
|
||||
else:
|
||||
self.transcript.merge(data)
|
||||
if len(self.transcript.text) < self.min_transcript_length:
|
||||
text_length = len(self.transcript.text)
|
||||
required_length = self.min_transcript_length
|
||||
if text_length <= required_length:
|
||||
self.logger.info(f"Topic detector {text_length}/{required_length}")
|
||||
return
|
||||
await self.flush()
|
||||
|
||||
@@ -47,7 +50,7 @@ class TranscriptTopicDetectorProcessor(Processor):
|
||||
if not self.transcript:
|
||||
return
|
||||
text = self.transcript.text
|
||||
self.logger.info(f"Detect topic on {len(text)} length transcript")
|
||||
self.logger.info(f"Topic detector got {len(text)} length transcript")
|
||||
prompt = self.PROMPT.format(input_text=text)
|
||||
result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger)
|
||||
summary = TitleSummary(
|
||||
|
||||
@@ -58,7 +58,7 @@ class Settings(BaseSettings):
|
||||
LLM_OPENAI_KEY: str | None = None
|
||||
LLM_OPENAI_MODEL: str = "gpt-3.5-turbo"
|
||||
LLM_OPENAI_TEMPERATURE: float = 0.7
|
||||
LLM_TIMEOUT: int = 90
|
||||
LLM_TIMEOUT: int = 60 * 5 # take cold start into account
|
||||
LLM_MAX_TOKENS: int = 1024
|
||||
LLM_TEMPERATURE: float = 0.7
|
||||
|
||||
|
||||
@@ -159,7 +159,8 @@ async def rtc_offer_base(
|
||||
TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
|
||||
TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary),
|
||||
)
|
||||
await ctx.pipeline.warmup()
|
||||
# FIXME: warmup is not working well yet
|
||||
# await ctx.pipeline.warmup()
|
||||
|
||||
# handle RTC peer connection
|
||||
pc = RTCPeerConnection()
|
||||
|
||||
Reference in New Issue
Block a user