From 445d3c12216c9d701ec4f601e87247586c4add2b Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Fri, 11 Aug 2023 12:34:11 +0200 Subject: [PATCH] server: implement modal backend for llm and transcription --- server/reflector/llm/llm_modal.py | 43 +++++++++++++ .../processors/audio_transcript_modal.py | 62 +++++++++++++++++++ server/reflector/settings.py | 6 ++ server/reflector/utils/retry.py | 2 + 4 files changed, 113 insertions(+) create mode 100644 server/reflector/llm/llm_modal.py create mode 100644 server/reflector/processors/audio_transcript_modal.py diff --git a/server/reflector/llm/llm_modal.py b/server/reflector/llm/llm_modal.py new file mode 100644 index 00000000..b971153b --- /dev/null +++ b/server/reflector/llm/llm_modal.py @@ -0,0 +1,43 @@ +from reflector.llm.base import LLM +from reflector.settings import settings +from reflector.utils.retry import retry +import httpx + + +class ModalLLM(LLM): + def __init__(self): + super().__init__() + self.timeout = settings.LLM_TIMEOUT + self.llm_url = settings.LLM_URL + "/llm" + self.headers = { + "Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}", + } + + async def _generate(self, prompt: str, **kwargs): + async with httpx.AsyncClient() as client: + response = await retry(client.post)( + self.llm_url, + headers=self.headers, + json={"prompt": prompt}, + timeout=self.timeout, + retry_timeout=60 * 5, + ) + response.raise_for_status() + text = response.json()["text"] + text = text[len(prompt) :] # remove prompt + return text + + +LLM.register("modal", ModalLLM) + +if __name__ == "__main__": + from reflector.logger import logger + + async def main(): + llm = ModalLLM() + result = await llm.generate("Hello, my name is", logger=logger) + print(result) + + import asyncio + + asyncio.run(main()) diff --git a/server/reflector/processors/audio_transcript_modal.py b/server/reflector/processors/audio_transcript_modal.py new file mode 100644 index 00000000..71cbdadb --- /dev/null +++ b/server/reflector/processors/audio_transcript_modal.py @@ -0,0 +1,62 @@ +""" +Implementation using the GPU service from modal.com + +API will be a POST request to TRANSCRIPT_URL: + +```form +"timestamp": 123.456 +"language": "en" +"file":