diff --git a/server/gpu/modal/README.md b/server/gpu/modal/README.md index 9491a00c..dee4052e 100644 --- a/server/gpu/modal/README.md +++ b/server/gpu/modal/README.md @@ -43,17 +43,6 @@ Authentication must be passed with the `Authorization` header, using the `bearer Authorization: bearer ``` -### Warmup (both) - -`POST /warmup` - -**response** -``` -{ - "status": "ok" -} -``` - ### LLM `POST /llm` diff --git a/server/gpu/modal/reflector_llm.py b/server/gpu/modal/reflector_llm.py index a4e88aae..7d7bb57d 100644 --- a/server/gpu/modal/reflector_llm.py +++ b/server/gpu/modal/reflector_llm.py @@ -111,11 +111,6 @@ class LLM: def __exit__(self, *args): print("Exit llm") - @method() - def warmup(self): - print("Warmup ok") - return {"status": "ok"} - @method() def generate(self, prompt: str, gen_schema: str | None, gen_cfg: str | None) -> dict: """ @@ -201,8 +196,4 @@ def web(): result = func.get() return result - @app.post("/warmup", dependencies=[Depends(apikey_auth)]) - async def warmup(): - return llmstub.warmup.spawn().get() - return app diff --git a/server/gpu/modal/reflector_transcriber.py b/server/gpu/modal/reflector_transcriber.py index 81e073c1..3404cfe4 100644 --- a/server/gpu/modal/reflector_transcriber.py +++ b/server/gpu/modal/reflector_transcriber.py @@ -185,10 +185,6 @@ class Transcriber: dtype=torch.float32 ) - @method() - def warmup(self): - return {"status": "ok"} - @method() def transcribe_segment( self, @@ -334,8 +330,4 @@ def web(): result = func.get() return result - @app.post("/warmup", dependencies=[Depends(apikey_auth)]) - async def warmup(): - return transcriberstub.warmup.spawn().get() - return app diff --git a/server/reflector/llm/base.py b/server/reflector/llm/base.py index 5eb2f15d..603a0e65 100644 --- a/server/reflector/llm/base.py +++ b/server/reflector/llm/base.py @@ -1,17 +1,15 @@ import importlib import json import re -from time import monotonic from typing import TypeVar import nltk from prometheus_client import Counter, Histogram -from transformers import GenerationConfig - from reflector.llm.llm_params import TaskParams from reflector.logger import logger as reflector_logger from reflector.settings import settings from reflector.utils.retry import retry +from transformers import GenerationConfig T = TypeVar("T", bound="LLM") @@ -112,20 +110,6 @@ class LLM: self.m_generate_success = self.m_generate_success.labels(name) self.m_generate_failure = self.m_generate_failure.labels(name) - async def warmup(self, logger: reflector_logger): - start = monotonic() - name = self.__class__.__name__ - logger.info(f"LLM[{name}] warming up...") - try: - await self._warmup(logger=logger) - duration = monotonic() - start - logger.info(f"LLM[{name}] warmup took {duration:.2f} seconds") - except Exception: - logger.exception(f"LLM[{name}] warmup failed, ignoring") - - async def _warmup(self, logger: reflector_logger): - pass - @property def tokenizer(self): """ diff --git a/server/reflector/llm/llm_modal.py b/server/reflector/llm/llm_modal.py index b427833b..2c9d9bc9 100644 --- a/server/reflector/llm/llm_modal.py +++ b/server/reflector/llm/llm_modal.py @@ -1,10 +1,9 @@ import httpx -from transformers import AutoTokenizer, GenerationConfig - from reflector.llm.base import LLM from reflector.logger import logger as reflector_logger from reflector.settings import settings from reflector.utils.retry import retry +from transformers import AutoTokenizer, GenerationConfig class ModalLLM(LLM): @@ -12,7 +11,6 @@ class ModalLLM(LLM): super().__init__() self.timeout = settings.LLM_TIMEOUT self.llm_url = settings.LLM_URL + "/llm" - self.llm_warmup_url = settings.LLM_URL + "/warmup" self.headers = { "Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}", } @@ -27,15 +25,6 @@ class ModalLLM(LLM): # Replace this with a HTTP call return ["lmsys/vicuna-13b-v1.5"] - async def _warmup(self, logger): - async with httpx.AsyncClient() as client: - response = await client.post( - self.llm_warmup_url, - headers=self.headers, - timeout=60 * 5, - ) - response.raise_for_status() - async def _generate( self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs ): diff --git a/server/reflector/processors/audio_transcript_auto.py b/server/reflector/processors/audio_transcript_auto.py index 3bc10102..f223a52d 100644 --- a/server/reflector/processors/audio_transcript_auto.py +++ b/server/reflector/processors/audio_transcript_auto.py @@ -52,9 +52,6 @@ class AudioTranscriptAutoProcessor(AudioTranscriptProcessor): def off(self, callback): self.processor.off(callback) - async def _warmup(self): - return await self.processor._warmup() - async def _push(self, data: AudioFile): return await self.processor._push(data) diff --git a/server/reflector/processors/audio_transcript_modal.py b/server/reflector/processors/audio_transcript_modal.py index f3f36e61..201ed9d4 100644 --- a/server/reflector/processors/audio_transcript_modal.py +++ b/server/reflector/processors/audio_transcript_modal.py @@ -12,10 +12,7 @@ API will be a POST request to TRANSCRIPT_URL: """ -from time import monotonic - import httpx - from reflector.processors.audio_transcript import AudioTranscriptProcessor from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor from reflector.processors.types import AudioFile, Transcript, Word @@ -27,26 +24,9 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor): def __init__(self, modal_api_key: str): super().__init__() self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe" - self.warmup_url = settings.TRANSCRIPT_URL + "/warmup" self.timeout = settings.TRANSCRIPT_TIMEOUT self.headers = {"Authorization": f"Bearer {modal_api_key}"} - async def _warmup(self): - try: - async with httpx.AsyncClient() as client: - start = monotonic() - self.logger.debug("Transcribe modal: warming up...") - response = await client.post( - self.warmup_url, - headers=self.headers, - timeout=self.timeout, - ) - response.raise_for_status() - duration = monotonic() - start - self.logger.debug(f"Transcribe modal: warmup took {duration:.2f}s") - except Exception: - self.logger.exception("Transcribe modal: warmup failed") - async def _transcript(self, data: AudioFile): async with httpx.AsyncClient() as client: self.logger.debug(f"Try to transcribe audio {data.name}") diff --git a/server/reflector/processors/base.py b/server/reflector/processors/base.py index 646a1846..6771e11e 100644 --- a/server/reflector/processors/base.py +++ b/server/reflector/processors/base.py @@ -5,7 +5,6 @@ from uuid import uuid4 from prometheus_client import Counter, Gauge, Histogram from pydantic import BaseModel - from reflector.logger import logger @@ -18,7 +17,6 @@ class PipelineEvent(BaseModel): class Processor: INPUT_TYPE: type = None OUTPUT_TYPE: type = None - WARMUP_EVENT: str = "WARMUP_EVENT" m_processor = Histogram( "processor", @@ -172,21 +170,12 @@ class Processor: def describe(self, level=0): logger.info(" " * level + self.__class__.__name__) - async def warmup(self): - """ - Warmup the processor - """ - await self._warmup() - async def _push(self, data): raise NotImplementedError async def _flush(self): pass - async def _warmup(self): - pass - @classmethod def as_threaded(cls, *args, **kwargs): """ @@ -242,12 +231,6 @@ class ThreadedProcessor(Processor): if data is None: await self.processor.flush() break - if data == self.WARMUP_EVENT: - self.logger.debug( - f"Warming up {self.processor.__class__.__name__}" - ) - await self.processor.warmup() - continue try: await self.processor.push(data) except Exception: @@ -258,9 +241,6 @@ class ThreadedProcessor(Processor): finally: self.queue.task_done() - async def _warmup(self): - await self.queue.put(self.WARMUP_EVENT) - async def _push(self, data): await self.queue.put(data) @@ -309,10 +289,6 @@ class BroadcastProcessor(Processor): for processor in self.processors: processor.set_pipeline(pipeline) - async def _warmup(self): - for processor in self.processors: - await processor.warmup() - async def _push(self, data): for processor in self.processors: await processor.push(data) @@ -352,7 +328,6 @@ class Pipeline(Processor): OUTPUT_TYPE = None def __init__(self, *processors: Processor): - self._warmed_up = False super().__init__() self.logger = logger.bind(pipeline=self.uid) self.logger.info("Pipeline created") @@ -369,11 +344,6 @@ class Pipeline(Processor): self.INPUT_TYPE = processors[0].INPUT_TYPE self.OUTPUT_TYPE = processors[-1].OUTPUT_TYPE - async def _warmup(self): - for processor in self.processors: - self.logger.debug(f"Warming up {processor.__class__.__name__}") - await processor.warmup() - async def _push(self, data): await self.processors[0].push(data) diff --git a/server/reflector/processors/transcript_topic_detector.py b/server/reflector/processors/transcript_topic_detector.py index 297c55a5..dd14ce93 100644 --- a/server/reflector/processors/transcript_topic_detector.py +++ b/server/reflector/processors/transcript_topic_detector.py @@ -22,9 +22,6 @@ class TranscriptTopicDetectorProcessor(Processor): self.llm = LLM.get_instance() self.params = LLMTaskParams.get_instance(self.TASK).task_params - async def _warmup(self): - await self.llm.warmup(logger=self.logger) - async def _push(self, data: Transcript): if self.transcript is None: self.transcript = data diff --git a/server/reflector/processors/transcript_translator.py b/server/reflector/processors/transcript_translator.py index f6f2e521..ae2c68e1 100644 --- a/server/reflector/processors/transcript_translator.py +++ b/server/reflector/processors/transcript_translator.py @@ -1,5 +1,3 @@ -from time import monotonic - import httpx from reflector.processors.base import Processor from reflector.processors.types import Transcript, TranslationLanguages @@ -22,22 +20,6 @@ class TranscriptTranslatorProcessor(Processor): self.timeout = settings.TRANSCRIPT_TIMEOUT self.headers = {"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}"} - async def _warmup(self): - try: - async with httpx.AsyncClient() as client: - start = monotonic() - self.logger.debug("Translate modal: warming up...") - response = await client.post( - settings.TRANSCRIPT_URL + "/warmup", - headers=self.headers, - timeout=self.timeout, - ) - response.raise_for_status() - duration = monotonic() - start - self.logger.debug(f"Translate modal: warmup took {duration:.2f}s") - except Exception: - self.logger.exception("Translate modal: warmup failed") - async def _push(self, data: Transcript): self.transcript = data await self.flush() diff --git a/server/reflector/views/rtc_offer.py b/server/reflector/views/rtc_offer.py index d767153e..5662d989 100644 --- a/server/reflector/views/rtc_offer.py +++ b/server/reflector/views/rtc_offer.py @@ -8,7 +8,6 @@ from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription from fastapi import APIRouter, Request from prometheus_client import Gauge from pydantic import BaseModel - from reflector.events import subscribers_shutdown from reflector.logger import logger from reflector.processors import ( @@ -239,8 +238,6 @@ async def rtc_offer_base( ctx.pipeline = Pipeline(*processors) ctx.pipeline.set_pref("audio:source_language", source_language) ctx.pipeline.set_pref("audio:target_language", target_language) - # FIXME: warmup is not working well yet - # await ctx.pipeline.warmup() # handle RTC peer connection pc = RTCPeerConnection()