mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
server: remove warmup, increase LLM timeout for now
This commit is contained in:
@@ -19,7 +19,7 @@ class ModalLLM(LLM):
|
|||||||
response = await client.post(
|
response = await client.post(
|
||||||
self.llm_warmup_url,
|
self.llm_warmup_url,
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
timeout=60**5,
|
timeout=60 * 5,
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,10 @@ class TranscriptTopicDetectorProcessor(Processor):
|
|||||||
self.transcript = data
|
self.transcript = data
|
||||||
else:
|
else:
|
||||||
self.transcript.merge(data)
|
self.transcript.merge(data)
|
||||||
if len(self.transcript.text) < self.min_transcript_length:
|
text_length = len(self.transcript.text)
|
||||||
|
required_length = self.min_transcript_length
|
||||||
|
if text_length <= required_length:
|
||||||
|
self.logger.info(f"Topic detector {text_length}/{required_length}")
|
||||||
return
|
return
|
||||||
await self.flush()
|
await self.flush()
|
||||||
|
|
||||||
@@ -47,7 +50,7 @@ class TranscriptTopicDetectorProcessor(Processor):
|
|||||||
if not self.transcript:
|
if not self.transcript:
|
||||||
return
|
return
|
||||||
text = self.transcript.text
|
text = self.transcript.text
|
||||||
self.logger.info(f"Detect topic on {len(text)} length transcript")
|
self.logger.info(f"Topic detector got {len(text)} length transcript")
|
||||||
prompt = self.PROMPT.format(input_text=text)
|
prompt = self.PROMPT.format(input_text=text)
|
||||||
result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger)
|
result = await retry(self.llm.generate)(prompt=prompt, logger=self.logger)
|
||||||
summary = TitleSummary(
|
summary = TitleSummary(
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ class Settings(BaseSettings):
|
|||||||
LLM_OPENAI_KEY: str | None = None
|
LLM_OPENAI_KEY: str | None = None
|
||||||
LLM_OPENAI_MODEL: str = "gpt-3.5-turbo"
|
LLM_OPENAI_MODEL: str = "gpt-3.5-turbo"
|
||||||
LLM_OPENAI_TEMPERATURE: float = 0.7
|
LLM_OPENAI_TEMPERATURE: float = 0.7
|
||||||
LLM_TIMEOUT: int = 90
|
LLM_TIMEOUT: int = 60 * 5 # take cold start into account
|
||||||
LLM_MAX_TOKENS: int = 1024
|
LLM_MAX_TOKENS: int = 1024
|
||||||
LLM_TEMPERATURE: float = 0.7
|
LLM_TEMPERATURE: float = 0.7
|
||||||
|
|
||||||
|
|||||||
@@ -159,7 +159,8 @@ async def rtc_offer_base(
|
|||||||
TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
|
TranscriptTopicDetectorProcessor.as_threaded(callback=on_topic),
|
||||||
TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary),
|
TranscriptFinalSummaryProcessor.as_threaded(callback=on_final_summary),
|
||||||
)
|
)
|
||||||
await ctx.pipeline.warmup()
|
# FIXME: warmup is not working well yet
|
||||||
|
# await ctx.pipeline.warmup()
|
||||||
|
|
||||||
# handle RTC peer connection
|
# handle RTC peer connection
|
||||||
pc = RTCPeerConnection()
|
pc = RTCPeerConnection()
|
||||||
|
|||||||
Reference in New Issue
Block a user