server: remove warmup, increase LLM timeout for now

2026-02-04 18:06:48 +00:00 · 2023-08-11 19:56:39 +02:00
parent 82ce8202bd
commit 01806ce037
4 changed files with 9 additions and 5 deletions
--- a/server/reflector/llm/llm_modal.py
+++ b/server/reflector/llm/llm_modal.py
@@ -19,7 +19,7 @@ class ModalLLM(LLM):
            response = await client.post(
                self.llm_warmup_url,
                headers=self.headers,
-                timeout=60**5,
+                timeout=60 * 5,
            )
            response.raise_for_status()

--- a/server/reflector/processors/transcript_topic_detector.py
+++ b/server/reflector/processors/transcript_topic_detector.py
@@ -39,7 +39,10 @@ class TranscriptTopicDetectorProcessor(Processor):
            self.transcript = data
        else:
            self.transcript.merge(data)
-        if len(self.transcript.text) < self.min_transcript_length:
+        text_length = len(self.transcript.text)
+        required_length = self.min_transcript_length
+        if text_length <= required_length:
+            self.logger.info(f"Topic detector {text_length}/{required_length}")
            return
        await self.flush()

@@ -47,7 +50,7 @@ class TranscriptTopicDetectorProcessor(Processor):
        if not self.transcript:
            return
        text = self.transcript.text
-        self.logger.info(f"Detect topic on {len(text)} length transcript")
+        self.logger.info(f"Topic detector got {len(text)} length transcript")
        prompt = self.PROMPT.format(input_text=text)
        result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger)
        summary = TitleSummary(
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -58,7 +58,7 @@ class Settings(BaseSettings):
    LLM_OPENAI_KEY: str | None = None
    LLM_OPENAI_MODEL: str = "gpt-3.5-turbo"
    LLM_OPENAI_TEMPERATURE: float = 0.7
-    LLM_TIMEOUT: int = 90
+    LLM_TIMEOUT: int = 60 * 5  # take cold start into account
    LLM_MAX_TOKENS: int = 1024
    LLM_TEMPERATURE: float = 0.7

--- a/server/reflector/views/rtc_offer.py
+++ b/server/reflector/views/rtc_offer.py
@@ -159,7 +159,8 @@ async def rtc_offer_base(
        TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
        TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary),
    )
-    await ctx.pipeline.warmup()
+    # FIXME: warmup is not working well yet
+    # await ctx.pipeline.warmup()

    # handle RTC peer connection
    pc = RTCPeerConnection()