From 01806ce037543ef1940f0a683342c85bcfa3552f Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Fri, 11 Aug 2023 19:56:39 +0200 Subject: [PATCH] server: remove warmup, increase LLM timeout for now --- server/reflector/llm/llm_modal.py | 2 +- server/reflector/processors/transcript_topic_detector.py | 7 +++++-- server/reflector/settings.py | 2 +- server/reflector/views/rtc_offer.py | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/server/reflector/llm/llm_modal.py b/server/reflector/llm/llm_modal.py index a1c9c61b..7f23aa0d 100644 --- a/server/reflector/llm/llm_modal.py +++ b/server/reflector/llm/llm_modal.py @@ -19,7 +19,7 @@ class ModalLLM(LLM): response = await client.post( self.llm_warmup_url, headers=self.headers, - timeout=60**5, + timeout=60 * 5, ) response.raise_for_status() diff --git a/server/reflector/processors/transcript_topic_detector.py b/server/reflector/processors/transcript_topic_detector.py index f4a9286a..b626e8a2 100644 --- a/server/reflector/processors/transcript_topic_detector.py +++ b/server/reflector/processors/transcript_topic_detector.py @@ -39,7 +39,10 @@ class TranscriptTopicDetectorProcessor(Processor): self.transcript = data else: self.transcript.merge(data) - if len(self.transcript.text) < self.min_transcript_length: + text_length = len(self.transcript.text) + required_length = self.min_transcript_length + if text_length <= required_length: + self.logger.info(f"Topic detector {text_length}/{required_length}") return await self.flush() @@ -47,7 +50,7 @@ class TranscriptTopicDetectorProcessor(Processor): if not self.transcript: return text = self.transcript.text - self.logger.info(f"Detect topic on {len(text)} length transcript") + self.logger.info(f"Topic detector got {len(text)} length transcript") prompt = self.PROMPT.format(input_text=text) result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger) summary = TitleSummary( diff --git a/server/reflector/settings.py b/server/reflector/settings.py index af2c02b2..5d049191 100644 --- a/server/reflector/settings.py +++ b/server/reflector/settings.py @@ -58,7 +58,7 @@ class Settings(BaseSettings): LLM_OPENAI_KEY: str | None = None LLM_OPENAI_MODEL: str = "gpt-3.5-turbo" LLM_OPENAI_TEMPERATURE: float = 0.7 - LLM_TIMEOUT: int = 90 + LLM_TIMEOUT: int = 60 * 5 # take cold start into account LLM_MAX_TOKENS: int = 1024 LLM_TEMPERATURE: float = 0.7 diff --git a/server/reflector/views/rtc_offer.py b/server/reflector/views/rtc_offer.py index 2e9ed1b6..aef00580 100644 --- a/server/reflector/views/rtc_offer.py +++ b/server/reflector/views/rtc_offer.py @@ -159,7 +159,8 @@ async def rtc_offer_base( TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic), TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary), ) - await ctx.pipeline.warmup() + # FIXME: warmup is not working well yet + # await ctx.pipeline.warmup() # handle RTC peer connection pc = RTCPeerConnection()