mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
server: implement modal backend for llm and transcription
This commit is contained in:
43
server/reflector/llm/llm_modal.py
Normal file
43
server/reflector/llm/llm_modal.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
from reflector.llm.base import LLM
|
||||||
|
from reflector.settings import settings
|
||||||
|
from reflector.utils.retry import retry
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class ModalLLM(LLM):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.timeout = settings.LLM_TIMEOUT
|
||||||
|
self.llm_url = settings.LLM_URL + "/llm"
|
||||||
|
self.headers = {
|
||||||
|
"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _generate(self, prompt: str, **kwargs):
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await retry(client.post)(
|
||||||
|
self.llm_url,
|
||||||
|
headers=self.headers,
|
||||||
|
json={"prompt": prompt},
|
||||||
|
timeout=self.timeout,
|
||||||
|
retry_timeout=60 * 5,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
text = response.json()["text"]
|
||||||
|
text = text[len(prompt) :] # remove prompt
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
LLM.register("modal", ModalLLM)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
llm = ModalLLM()
|
||||||
|
result = await llm.generate("Hello, my name is", logger=logger)
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
62
server/reflector/processors/audio_transcript_modal.py
Normal file
62
server/reflector/processors/audio_transcript_modal.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""
|
||||||
|
Implementation using the GPU service from modal.com
|
||||||
|
|
||||||
|
API will be a POST request to TRANSCRIPT_URL:
|
||||||
|
|
||||||
|
```form
|
||||||
|
"timestamp": 123.456
|
||||||
|
"language": "en"
|
||||||
|
"file": <audio file>
|
||||||
|
```
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
||||||
|
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
||||||
|
from reflector.processors.types import AudioFile, Transcript, Word
|
||||||
|
from reflector.settings import settings
|
||||||
|
from reflector.utils.retry import retry
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
||||||
|
def __init__(self, modal_api_key: str):
|
||||||
|
super().__init__()
|
||||||
|
self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe"
|
||||||
|
self.timeout = settings.TRANSCRIPT_TIMEOUT
|
||||||
|
self.headers = {
|
||||||
|
"Authorization": f"Bearer {modal_api_key}",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _transcript(self, data: AudioFile):
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
print(f"Try to transcribe audio {data.path.name}")
|
||||||
|
files = {
|
||||||
|
"file": (data.path.name, data.path.open("rb")),
|
||||||
|
}
|
||||||
|
form = {
|
||||||
|
"timestamp": float(round(data.timestamp, 2)),
|
||||||
|
}
|
||||||
|
response = await retry(client.post)(
|
||||||
|
self.transcript_url,
|
||||||
|
files=files,
|
||||||
|
data=form,
|
||||||
|
timeout=self.timeout,
|
||||||
|
headers=self.headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Transcript response: {response.status_code} {response.content}")
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
transcript = Transcript(
|
||||||
|
text=result["text"],
|
||||||
|
words=[
|
||||||
|
Word(text=word["text"], start=word["start"], end=word["end"])
|
||||||
|
for word in result["words"]
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
return transcript
|
||||||
|
|
||||||
|
|
||||||
|
AudioTranscriptAutoProcessor.register("modal", AudioTranscriptModalProcessor)
|
||||||
@@ -36,6 +36,9 @@ class Settings(BaseSettings):
|
|||||||
TRANSCRIPT_BANANA_API_KEY: str | None = None
|
TRANSCRIPT_BANANA_API_KEY: str | None = None
|
||||||
TRANSCRIPT_BANANA_MODEL_KEY: str | None = None
|
TRANSCRIPT_BANANA_MODEL_KEY: str | None = None
|
||||||
|
|
||||||
|
# Audio transcription modal.com configuration
|
||||||
|
TRANSCRIPT_MODAL_API_KEY: str | None = None
|
||||||
|
|
||||||
# Audio transcription storage
|
# Audio transcription storage
|
||||||
TRANSCRIPT_STORAGE_BACKEND: str = "aws"
|
TRANSCRIPT_STORAGE_BACKEND: str = "aws"
|
||||||
|
|
||||||
@@ -63,6 +66,9 @@ class Settings(BaseSettings):
|
|||||||
LLM_BANANA_API_KEY: str | None = None
|
LLM_BANANA_API_KEY: str | None = None
|
||||||
LLM_BANANA_MODEL_KEY: str | None = None
|
LLM_BANANA_MODEL_KEY: str | None = None
|
||||||
|
|
||||||
|
# LLM Modal configuration
|
||||||
|
LLM_MODAL_API_KEY: str | None = None
|
||||||
|
|
||||||
# Sentry
|
# Sentry
|
||||||
SENTRY_DSN: str | None = None
|
SENTRY_DSN: str | None = None
|
||||||
|
|
||||||
|
|||||||
@@ -58,12 +58,14 @@ def retry(fn):
|
|||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
except HTTPStatusError as e:
|
except HTTPStatusError as e:
|
||||||
|
logger.exception(e)
|
||||||
status_code = e.response.status_code
|
status_code = e.response.status_code
|
||||||
logger.debug(f"HTTP status {status_code} - {e}")
|
logger.debug(f"HTTP status {status_code} - {e}")
|
||||||
if status_code in retry_httpx_status_stop:
|
if status_code in retry_httpx_status_stop:
|
||||||
message = f"HTTP status {status_code} is in retry_httpx_status_stop"
|
message = f"HTTP status {status_code} is in retry_httpx_status_stop"
|
||||||
raise RetryHTTPException(message) from e
|
raise RetryHTTPException(message) from e
|
||||||
except retry_ignore_exc_types as e:
|
except retry_ignore_exc_types as e:
|
||||||
|
logger.exception(e)
|
||||||
last_exception = e
|
last_exception = e
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|||||||
Reference in New Issue
Block a user