server: implement modal backend for llm and transcription

2026-04-25 14:45:19 +00:00 · 2023-08-11 12:34:11 +02:00
parent a822c9d482
commit 445d3c1221
4 changed files with 113 additions and 0 deletions
--- a/server/reflector/llm/llm_modal.py
+++ b/server/reflector/llm/llm_modal.py
@@ -0,0 +1,43 @@
 from reflector.llm.base import LLM
 from reflector.settings import settings
 from reflector.utils.retry import retry
 import httpx
 class ModalLLM(LLM):
    def __init__(self):
        super().__init__()
        self.timeout = settings.LLM_TIMEOUT
        self.llm_url = settings.LLM_URL + "/llm"
        self.headers = {
            "Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}",
        }
    async def _generate(self, prompt: str, **kwargs):
        async with httpx.AsyncClient() as client:
            response = await retry(client.post)(
                self.llm_url,
                headers=self.headers,
                json={"prompt": prompt},
                timeout=self.timeout,
                retry_timeout=60 * 5,
            )
            response.raise_for_status()
            text = response.json()["text"]
            text = text[len(prompt) :]  # remove prompt
            return text
 LLM.register("modal", ModalLLM)
 if __name__ == "__main__":
    from reflector.logger import logger
    async def main():
        llm = ModalLLM()
        result = await llm.generate("Hello, my name is", logger=logger)
        print(result)
    import asyncio
    asyncio.run(main())
--- a/server/reflector/processors/audio_transcript_modal.py
+++ b/server/reflector/processors/audio_transcript_modal.py
@@ -0,0 +1,62 @@
 """
 Implementation using the GPU service from modal.com
 API will be a POST request to TRANSCRIPT_URL:
 ```form
 "timestamp": 123.456
 "language": "en"
 "file": <audio file>
 ```
 """
 from reflector.processors.audio_transcript import AudioTranscriptProcessor
 from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
 from reflector.processors.types import AudioFile, Transcript, Word
 from reflector.settings import settings
 from reflector.utils.retry import retry
 import httpx
 class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
    def __init__(self, modal_api_key: str):
        super().__init__()
        self.transcript_url = settings.TRANSCRIPT_URL + "/transcribe"
        self.timeout = settings.TRANSCRIPT_TIMEOUT
        self.headers = {
            "Authorization": f"Bearer {modal_api_key}",
        }
    async def _transcript(self, data: AudioFile):
        async with httpx.AsyncClient() as client:
            print(f"Try to transcribe audio {data.path.name}")
            files = {
                "file": (data.path.name, data.path.open("rb")),
            }
            form = {
                "timestamp": float(round(data.timestamp, 2)),
            }
            response = await retry(client.post)(
                self.transcript_url,
                files=files,
                data=form,
                timeout=self.timeout,
                headers=self.headers,
            )
            print(f"Transcript response: {response.status_code} {response.content}")
            response.raise_for_status()
            result = response.json()
            transcript = Transcript(
                text=result["text"],
                words=[
                    Word(text=word["text"], start=word["start"], end=word["end"])
                    for word in result["words"]
                ],
            )
        return transcript
 AudioTranscriptAutoProcessor.register("modal", AudioTranscriptModalProcessor)
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -36,6 +36,9 @@ class Settings(BaseSettings):
    TRANSCRIPT_BANANA_API_KEY: str | None = None
    TRANSCRIPT_BANANA_MODEL_KEY: str | None = None
    # Audio transcription modal.com configuration
    TRANSCRIPT_MODAL_API_KEY: str | None = None
    # Audio transcription storage
    TRANSCRIPT_STORAGE_BACKEND: str = "aws"
@@ -63,6 +66,9 @@ class Settings(BaseSettings):
    LLM_BANANA_API_KEY: str | None = None
    LLM_BANANA_MODEL_KEY: str | None = None
    # LLM Modal configuration
    LLM_MODAL_API_KEY: str | None = None
    # Sentry
    SENTRY_DSN: str | None = None
--- a/server/reflector/utils/retry.py
+++ b/server/reflector/utils/retry.py
@@ -58,12 +58,14 @@ def retry(fn):
                if result:
                    return result
            except HTTPStatusError as e:
                logger.exception(e)
                status_code = e.response.status_code
                logger.debug(f"HTTP status {status_code} - {e}")
                if status_code in retry_httpx_status_stop:
                    message = f"HTTP status {status_code} is in retry_httpx_status_stop"
                    raise RetryHTTPException(message) from e
            except retry_ignore_exc_types as e:
                logger.exception(e)
                last_exception = e
            logger.debug(