server: implement modal backend for llm and transcription

This commit is contained in:
Mathieu Virbel
2023-08-11 12:34:11 +02:00
parent a822c9d482
commit 445d3c1221
4 changed files with 113 additions and 0 deletions

View File

@@ -0,0 +1,43 @@
from reflector.llm.base import LLM
from reflector.settings import settings
from reflector.utils.retry import retry
import httpx
class ModalLLM(LLM):
def __init__(self):
super().__init__()
self.timeout = settings.LLM_TIMEOUT
self.llm_url = settings.LLM_URL + "/llm"
self.headers = {
"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
}
async def _generate(self, prompt: str, **kwargs):
async with httpx.AsyncClient() as client:
response = await retry(client.post)(
self.llm_url,
headers=self.headers,
json={"prompt": prompt},
timeout=self.timeout,
retry_timeout=60 * 5,
)
response.raise_for_status()
text = response.json()["text"]
text = text[len(prompt) :] # remove prompt
return text
LLM.register("modal", ModalLLM)
if __name__ == "__main__":
from reflector.logger import logger
async def main():
llm = ModalLLM()
result = await llm.generate("Hello, my name is", logger=logger)
print(result)
import asyncio
asyncio.run(main())

View File

@@ -0,0 +1,62 @@
"""
Implementation using the GPU service from modal.com
API will be a POST request to TRANSCRIPT_URL:
```form
"timestamp": 123.456
"language": "en"
"file": <audio file>
```
"""
from reflector.processors.audio_transcript import AudioTranscriptProcessor
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
from reflector.processors.types import AudioFile, Transcript, Word
from reflector.settings import settings
from reflector.utils.retry import retry
import httpx
class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
def __init__(self, modal_api_key: str):
super().__init__()
self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe"
self.timeout = settings.TRANSCRIPT_TIMEOUT
self.headers = {
"Authorization": f"Bearer {modal_api_key}",
}
async def _transcript(self, data: AudioFile):
async with httpx.AsyncClient() as client:
print(f"Try to transcribe audio {data.path.name}")
files = {
"file": (data.path.name, data.path.open("rb")),
}
form = {
"timestamp": float(round(data.timestamp, 2)),
}
response = await retry(client.post)(
self.transcript_url,
files=files,
data=form,
timeout=self.timeout,
headers=self.headers,
)
print(f"Transcript response: {response.status_code} {response.content}")
response.raise_for_status()
result = response.json()
transcript = Transcript(
text=result["text"],
words=[
Word(text=word["text"], start=word["start"], end=word["end"])
for word in result["words"]
],
)
return transcript
AudioTranscriptAutoProcessor.register("modal", AudioTranscriptModalProcessor)

View File

@@ -36,6 +36,9 @@ class Settings(BaseSettings):
TRANSCRIPT_BANANA_API_KEY: str | None = None
TRANSCRIPT_BANANA_MODEL_KEY: str | None = None
# Audio transcription modal.com configuration
TRANSCRIPT_MODAL_API_KEY: str | None = None
# Audio transcription storage
TRANSCRIPT_STORAGE_BACKEND: str = "aws"
@@ -63,6 +66,9 @@ class Settings(BaseSettings):
LLM_BANANA_API_KEY: str | None = None
LLM_BANANA_MODEL_KEY: str | None = None
# LLM Modal configuration
LLM_MODAL_API_KEY: str | None = None
# Sentry
SENTRY_DSN: str | None = None

View File

@@ -58,12 +58,14 @@ def retry(fn):
if result:
return result
except HTTPStatusError as e:
logger.exception(e)
status_code = e.response.status_code
logger.debug(f"HTTP status {status_code} - {e}")
if status_code in retry_httpx_status_stop:
message = f"HTTP status {status_code} is in retry_httpx_status_stop"
raise RetryHTTPException(message) from e
except retry_ignore_exc_types as e:
logger.exception(e)
last_exception = e
logger.debug(