mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
server: remove warmup methods everywhere
This commit is contained in:
@@ -43,17 +43,6 @@ Authentication must be passed with the `Authorization` header, using the `bearer
|
|||||||
Authorization: bearer <REFLECTOR_APIKEY>
|
Authorization: bearer <REFLECTOR_APIKEY>
|
||||||
```
|
```
|
||||||
|
|
||||||
### Warmup (both)
|
|
||||||
|
|
||||||
`POST /warmup`
|
|
||||||
|
|
||||||
**response**
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"status": "ok"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### LLM
|
### LLM
|
||||||
|
|
||||||
`POST /llm`
|
`POST /llm`
|
||||||
|
|||||||
@@ -111,11 +111,6 @@ class LLM:
|
|||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
print("Exit llm")
|
print("Exit llm")
|
||||||
|
|
||||||
@method()
|
|
||||||
def warmup(self):
|
|
||||||
print("Warmup ok")
|
|
||||||
return {"status": "ok"}
|
|
||||||
|
|
||||||
@method()
|
@method()
|
||||||
def generate(self, prompt: str, gen_schema: str | None, gen_cfg: str | None) -> dict:
|
def generate(self, prompt: str, gen_schema: str | None, gen_cfg: str | None) -> dict:
|
||||||
"""
|
"""
|
||||||
@@ -201,8 +196,4 @@ def web():
|
|||||||
result = func.get()
|
result = func.get()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@app.post("/warmup", dependencies=[Depends(apikey_auth)])
|
|
||||||
async def warmup():
|
|
||||||
return llmstub.warmup.spawn().get()
|
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|||||||
@@ -185,10 +185,6 @@ class Transcriber:
|
|||||||
dtype=torch.float32
|
dtype=torch.float32
|
||||||
)
|
)
|
||||||
|
|
||||||
@method()
|
|
||||||
def warmup(self):
|
|
||||||
return {"status": "ok"}
|
|
||||||
|
|
||||||
@method()
|
@method()
|
||||||
def transcribe_segment(
|
def transcribe_segment(
|
||||||
self,
|
self,
|
||||||
@@ -334,8 +330,4 @@ def web():
|
|||||||
result = func.get()
|
result = func.get()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@app.post("/warmup", dependencies=[Depends(apikey_auth)])
|
|
||||||
async def warmup():
|
|
||||||
return transcriberstub.warmup.spawn().get()
|
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|||||||
@@ -1,17 +1,15 @@
|
|||||||
import importlib
|
import importlib
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from time import monotonic
|
|
||||||
from typing import TypeVar
|
from typing import TypeVar
|
||||||
|
|
||||||
import nltk
|
import nltk
|
||||||
from prometheus_client import Counter, Histogram
|
from prometheus_client import Counter, Histogram
|
||||||
from transformers import GenerationConfig
|
|
||||||
|
|
||||||
from reflector.llm.llm_params import TaskParams
|
from reflector.llm.llm_params import TaskParams
|
||||||
from reflector.logger import logger as reflector_logger
|
from reflector.logger import logger as reflector_logger
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.utils.retry import retry
|
from reflector.utils.retry import retry
|
||||||
|
from transformers import GenerationConfig
|
||||||
|
|
||||||
T = TypeVar("T", bound="LLM")
|
T = TypeVar("T", bound="LLM")
|
||||||
|
|
||||||
@@ -112,20 +110,6 @@ class LLM:
|
|||||||
self.m_generate_success = self.m_generate_success.labels(name)
|
self.m_generate_success = self.m_generate_success.labels(name)
|
||||||
self.m_generate_failure = self.m_generate_failure.labels(name)
|
self.m_generate_failure = self.m_generate_failure.labels(name)
|
||||||
|
|
||||||
async def warmup(self, logger: reflector_logger):
|
|
||||||
start = monotonic()
|
|
||||||
name = self.__class__.__name__
|
|
||||||
logger.info(f"LLM[{name}] warming up...")
|
|
||||||
try:
|
|
||||||
await self._warmup(logger=logger)
|
|
||||||
duration = monotonic() - start
|
|
||||||
logger.info(f"LLM[{name}] warmup took {duration:.2f} seconds")
|
|
||||||
except Exception:
|
|
||||||
logger.exception(f"LLM[{name}] warmup failed, ignoring")
|
|
||||||
|
|
||||||
async def _warmup(self, logger: reflector_logger):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tokenizer(self):
|
def tokenizer(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
import httpx
|
import httpx
|
||||||
from transformers import AutoTokenizer, GenerationConfig
|
|
||||||
|
|
||||||
from reflector.llm.base import LLM
|
from reflector.llm.base import LLM
|
||||||
from reflector.logger import logger as reflector_logger
|
from reflector.logger import logger as reflector_logger
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.utils.retry import retry
|
from reflector.utils.retry import retry
|
||||||
|
from transformers import AutoTokenizer, GenerationConfig
|
||||||
|
|
||||||
|
|
||||||
class ModalLLM(LLM):
|
class ModalLLM(LLM):
|
||||||
@@ -12,7 +11,6 @@ class ModalLLM(LLM):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.timeout = settings.LLM_TIMEOUT
|
self.timeout = settings.LLM_TIMEOUT
|
||||||
self.llm_url = settings.LLM_URL + "/llm"
|
self.llm_url = settings.LLM_URL + "/llm"
|
||||||
self.llm_warmup_url = settings.LLM_URL + "/warmup"
|
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
|
"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
|
||||||
}
|
}
|
||||||
@@ -27,15 +25,6 @@ class ModalLLM(LLM):
|
|||||||
# Replace this with a HTTP call
|
# Replace this with a HTTP call
|
||||||
return ["lmsys/vicuna-13b-v1.5"]
|
return ["lmsys/vicuna-13b-v1.5"]
|
||||||
|
|
||||||
async def _warmup(self, logger):
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
response = await client.post(
|
|
||||||
self.llm_warmup_url,
|
|
||||||
headers=self.headers,
|
|
||||||
timeout=60 * 5,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
async def _generate(
|
async def _generate(
|
||||||
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
|
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -52,9 +52,6 @@ class AudioTranscriptAutoProcessor(AudioTranscriptProcessor):
|
|||||||
def off(self, callback):
|
def off(self, callback):
|
||||||
self.processor.off(callback)
|
self.processor.off(callback)
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
return await self.processor._warmup()
|
|
||||||
|
|
||||||
async def _push(self, data: AudioFile):
|
async def _push(self, data: AudioFile):
|
||||||
return await self.processor._push(data)
|
return await self.processor._push(data)
|
||||||
|
|
||||||
|
|||||||
@@ -12,10 +12,7 @@ API will be a POST request to TRANSCRIPT_URL:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from time import monotonic
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
||||||
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
||||||
from reflector.processors.types import AudioFile, Transcript, Word
|
from reflector.processors.types import AudioFile, Transcript, Word
|
||||||
@@ -27,26 +24,9 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|||||||
def __init__(self, modal_api_key: str):
|
def __init__(self, modal_api_key: str):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe"
|
self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe"
|
||||||
self.warmup_url = settings.TRANSCRIPT_URL + "/warmup"
|
|
||||||
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
||||||
self.headers = {"Authorization": f"Bearer {modal_api_key}"}
|
self.headers = {"Authorization": f"Bearer {modal_api_key}"}
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
start = monotonic()
|
|
||||||
self.logger.debug("Transcribe modal: warming up...")
|
|
||||||
response = await client.post(
|
|
||||||
self.warmup_url,
|
|
||||||
headers=self.headers,
|
|
||||||
timeout=self.timeout,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
duration = monotonic() - start
|
|
||||||
self.logger.debug(f"Transcribe modal: warmup took {duration:.2f}s")
|
|
||||||
except Exception:
|
|
||||||
self.logger.exception("Transcribe modal: warmup failed")
|
|
||||||
|
|
||||||
async def _transcript(self, data: AudioFile):
|
async def _transcript(self, data: AudioFile):
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
self.logger.debug(f"Try to transcribe audio {data.name}")
|
self.logger.debug(f"Try to transcribe audio {data.name}")
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ from uuid import uuid4
|
|||||||
|
|
||||||
from prometheus_client import Counter, Gauge, Histogram
|
from prometheus_client import Counter, Gauge, Histogram
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
|
||||||
@@ -18,7 +17,6 @@ class PipelineEvent(BaseModel):
|
|||||||
class Processor:
|
class Processor:
|
||||||
INPUT_TYPE: type = None
|
INPUT_TYPE: type = None
|
||||||
OUTPUT_TYPE: type = None
|
OUTPUT_TYPE: type = None
|
||||||
WARMUP_EVENT: str = "WARMUP_EVENT"
|
|
||||||
|
|
||||||
m_processor = Histogram(
|
m_processor = Histogram(
|
||||||
"processor",
|
"processor",
|
||||||
@@ -172,21 +170,12 @@ class Processor:
|
|||||||
def describe(self, level=0):
|
def describe(self, level=0):
|
||||||
logger.info(" " * level + self.__class__.__name__)
|
logger.info(" " * level + self.__class__.__name__)
|
||||||
|
|
||||||
async def warmup(self):
|
|
||||||
"""
|
|
||||||
Warmup the processor
|
|
||||||
"""
|
|
||||||
await self._warmup()
|
|
||||||
|
|
||||||
async def _push(self, data):
|
async def _push(self, data):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
async def _flush(self):
|
async def _flush(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def as_threaded(cls, *args, **kwargs):
|
def as_threaded(cls, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
@@ -242,12 +231,6 @@ class ThreadedProcessor(Processor):
|
|||||||
if data is None:
|
if data is None:
|
||||||
await self.processor.flush()
|
await self.processor.flush()
|
||||||
break
|
break
|
||||||
if data == self.WARMUP_EVENT:
|
|
||||||
self.logger.debug(
|
|
||||||
f"Warming up {self.processor.__class__.__name__}"
|
|
||||||
)
|
|
||||||
await self.processor.warmup()
|
|
||||||
continue
|
|
||||||
try:
|
try:
|
||||||
await self.processor.push(data)
|
await self.processor.push(data)
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -258,9 +241,6 @@ class ThreadedProcessor(Processor):
|
|||||||
finally:
|
finally:
|
||||||
self.queue.task_done()
|
self.queue.task_done()
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
await self.queue.put(self.WARMUP_EVENT)
|
|
||||||
|
|
||||||
async def _push(self, data):
|
async def _push(self, data):
|
||||||
await self.queue.put(data)
|
await self.queue.put(data)
|
||||||
|
|
||||||
@@ -309,10 +289,6 @@ class BroadcastProcessor(Processor):
|
|||||||
for processor in self.processors:
|
for processor in self.processors:
|
||||||
processor.set_pipeline(pipeline)
|
processor.set_pipeline(pipeline)
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
for processor in self.processors:
|
|
||||||
await processor.warmup()
|
|
||||||
|
|
||||||
async def _push(self, data):
|
async def _push(self, data):
|
||||||
for processor in self.processors:
|
for processor in self.processors:
|
||||||
await processor.push(data)
|
await processor.push(data)
|
||||||
@@ -352,7 +328,6 @@ class Pipeline(Processor):
|
|||||||
OUTPUT_TYPE = None
|
OUTPUT_TYPE = None
|
||||||
|
|
||||||
def __init__(self, *processors: Processor):
|
def __init__(self, *processors: Processor):
|
||||||
self._warmed_up = False
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.logger = logger.bind(pipeline=self.uid)
|
self.logger = logger.bind(pipeline=self.uid)
|
||||||
self.logger.info("Pipeline created")
|
self.logger.info("Pipeline created")
|
||||||
@@ -369,11 +344,6 @@ class Pipeline(Processor):
|
|||||||
self.INPUT_TYPE = processors[0].INPUT_TYPE
|
self.INPUT_TYPE = processors[0].INPUT_TYPE
|
||||||
self.OUTPUT_TYPE = processors[-1].OUTPUT_TYPE
|
self.OUTPUT_TYPE = processors[-1].OUTPUT_TYPE
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
for processor in self.processors:
|
|
||||||
self.logger.debug(f"Warming up {processor.__class__.__name__}")
|
|
||||||
await processor.warmup()
|
|
||||||
|
|
||||||
async def _push(self, data):
|
async def _push(self, data):
|
||||||
await self.processors[0].push(data)
|
await self.processors[0].push(data)
|
||||||
|
|
||||||
|
|||||||
@@ -22,9 +22,6 @@ class TranscriptTopicDetectorProcessor(Processor):
|
|||||||
self.llm = LLM.get_instance()
|
self.llm = LLM.get_instance()
|
||||||
self.params = LLMTaskParams.get_instance(self.TASK).task_params
|
self.params = LLMTaskParams.get_instance(self.TASK).task_params
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
await self.llm.warmup(logger=self.logger)
|
|
||||||
|
|
||||||
async def _push(self, data: Transcript):
|
async def _push(self, data: Transcript):
|
||||||
if self.transcript is None:
|
if self.transcript is None:
|
||||||
self.transcript = data
|
self.transcript = data
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
from time import monotonic
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from reflector.processors.base import Processor
|
from reflector.processors.base import Processor
|
||||||
from reflector.processors.types import Transcript, TranslationLanguages
|
from reflector.processors.types import Transcript, TranslationLanguages
|
||||||
@@ -22,22 +20,6 @@ class TranscriptTranslatorProcessor(Processor):
|
|||||||
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
||||||
self.headers = {"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}"}
|
self.headers = {"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}"}
|
||||||
|
|
||||||
async def _warmup(self):
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
start = monotonic()
|
|
||||||
self.logger.debug("Translate modal: warming up...")
|
|
||||||
response = await client.post(
|
|
||||||
settings.TRANSCRIPT_URL + "/warmup",
|
|
||||||
headers=self.headers,
|
|
||||||
timeout=self.timeout,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
duration = monotonic() - start
|
|
||||||
self.logger.debug(f"Translate modal: warmup took {duration:.2f}s")
|
|
||||||
except Exception:
|
|
||||||
self.logger.exception("Translate modal: warmup failed")
|
|
||||||
|
|
||||||
async def _push(self, data: Transcript):
|
async def _push(self, data: Transcript):
|
||||||
self.transcript = data
|
self.transcript = data
|
||||||
await self.flush()
|
await self.flush()
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
|
|||||||
from fastapi import APIRouter, Request
|
from fastapi import APIRouter, Request
|
||||||
from prometheus_client import Gauge
|
from prometheus_client import Gauge
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from reflector.events import subscribers_shutdown
|
from reflector.events import subscribers_shutdown
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.processors import (
|
from reflector.processors import (
|
||||||
@@ -239,8 +238,6 @@ async def rtc_offer_base(
|
|||||||
ctx.pipeline = Pipeline(*processors)
|
ctx.pipeline = Pipeline(*processors)
|
||||||
ctx.pipeline.set_pref("audio:source_language", source_language)
|
ctx.pipeline.set_pref("audio:source_language", source_language)
|
||||||
ctx.pipeline.set_pref("audio:target_language", target_language)
|
ctx.pipeline.set_pref("audio:target_language", target_language)
|
||||||
# FIXME: warmup is not working well yet
|
|
||||||
# await ctx.pipeline.warmup()
|
|
||||||
|
|
||||||
# handle RTC peer connection
|
# handle RTC peer connection
|
||||||
pc = RTCPeerConnection()
|
pc = RTCPeerConnection()
|
||||||
|
|||||||
Reference in New Issue
Block a user