Merge pull request #377 from Monadical-SAS/setup-and-upgrade

Setup and upgrade
2026-02-04 09:56:47 +00:00 · 2024-08-21 11:30:23 +02:00
parent c24b42656b 918daff66d
commit ad228e4e4b
10 changed files with 90 additions and 76 deletions
--- a/server/.env_template
+++ b/server/.env_template
@@ -4,7 +4,7 @@ TRANSCRIPT_MODAL_API_KEY=***REMOVED***

 LLM_BACKEND=modal
 LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
-LLM_MODAL_API_KEY=<ask in zulip>
+LLM_MODAL_API_KEY=***REMOVED***

 AUTH_BACKEND=fief
 AUTH_FIEF_URL=https://auth.reflector.media/reflector-local
--- a/server/README.md
+++ b/server/README.md
--- a/server/env.example
+++ b/server/env.example
@@ -3,36 +3,15 @@
 # All the settings are described here: reflector/settings.py
 #

-## =======================================================
-## Database
-## =======================================================
-
-#DATABASE_URL=sqlite://./reflector.db
-#DATABASE_URL=postgresql://reflector:reflector@localhost:5432/reflector
-
-
 ## =======================================================
 ## User authentication
 ## =======================================================

-## No authentication
-#AUTH_BACKEND=none
-
 ## Using fief (fief.dev)
-#AUTH_BACKEND=fief
-#AUTH_FIEF_URL=https://your-fief-instance....
-#AUTH_FIEF_CLIENT_ID=xxx
-#AUTH_FIEF_CLIENT_SECRET=xxx
-
-
-## =======================================================
-## Public mode
-## =======================================================
-## If set to true, anonymous transcripts will be
-## accessible to anybody.
-
-#PUBLIC_MODE=false
-
+AUTH_BACKEND=fief
+AUTH_FIEF_URL=https://auth.reflector.media/reflector-local
+AUTH_FIEF_CLIENT_ID=***REMOVED***
+AUTH_FIEF_CLIENT_SECRET=<ask in zulip>

 ## =======================================================
 ## Transcription backend
@@ -41,7 +20,7 @@
 ## full list of available transcription backend
 ## =======================================================

-## Using local whisper (default)
+## Using local whisper
 #TRANSCRIPT_BACKEND=whisper
 #WHISPER_MODEL_SIZE=tiny

@@ -51,21 +30,31 @@
 #TRANSLATE_URL=https://xxxxx--reflector-translator-web.modal.run
 #TRANSCRIPT_MODAL_API_KEY=xxxxx

+TRANSCRIPT_BACKEND=modal
+TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
+TRANSCRIPT_MODAL_API_KEY=***REMOVED***
+
+## =======================================================
+## Transcription backend
+##
+## Only available in modal atm
+## =======================================================
+TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
+
 ## =======================================================
 ## LLM backend
 ##
+## Responsible for titles and short summary
 ## Check reflector/llm/* for the full list of available
 ## llm backend implementation
 ## =======================================================

-## Use oobabooga (default)
-#LLM_BACKEND=oobabooga
-#LLM_URL=http://xxx:7860/api/generate/v1
-
 ## Using serverless modal.com (require reflector-gpu-modal deployed)
-#LLM_BACKEND=modal
-#LLM_URL=https://xxxxxx--reflector-llm-web.modal.run
-#LLM_MODAL_API_KEY=xxx
+LLM_BACKEND=modal
+LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
+LLM_MODAL_API_KEY=***REMOVED***
+ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
+

 ## Using OpenAI
 #LLM_BACKEND=openai
@@ -78,11 +67,21 @@
 #LLM_OPENAI_MODEL="GPT4All Falcon"

 ## Default LLM MODEL NAME
-DEFAULT_LLM=lmsys/vicuna-13b-v1.5
+#DEFAULT_LLM=lmsys/vicuna-13b-v1.5

 ## Cache directory to store models
 CACHE_DIR=data

+## =======================================================
+## Diarization
+##
+## Only available on modal
+## To allow diarization, you need to expose expose the files to be dowloded by the pipeline
+## =======================================================
+DIARIZATION_ENABLED=false
+DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
+
+
 ## =======================================================
 ## Sentry
 ## =======================================================
--- a/server/gpu/modal_deployments/reflector_diarizer.py
+++ b/server/gpu/modal_deployments/reflector_diarizer.py
@@ -6,12 +6,12 @@ Reflector GPU backend - diarizer
 import os

 import modal.gpu
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter
 from pydantic import BaseModel

-PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
+PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.1"
 MODEL_DIR = "/root/diarization_models"
-stub = Stub(name="reflector-diarizer")
+app = App(name="reflector-diarizer")


 def migrate_cache_llm():
@@ -33,7 +33,6 @@ def download_pyannote_audio():
    Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.0",
        cache_dir=MODEL_DIR,
-        use_auth_token=os.environ["HF_TOKEN"]
    )


@@ -54,7 +53,7 @@ diarizer_image = (
        "hf-transfer"
    )
    .run_function(migrate_cache_llm)
-    .run_function(download_pyannote_audio, secrets=[modal.Secret.from_name("my-huggingface-secret")])
+    .run_function(download_pyannote_audio)
    .env(
        {
            "LD_LIBRARY_PATH": (
@@ -66,16 +65,16 @@ diarizer_image = (
 )


-@stub.cls(
+@app.cls(
    gpu=modal.gpu.A100(memory=40),
    timeout=60 * 30,
    container_idle_timeout=60,
    allow_concurrent_inputs=1,
    image=diarizer_image,
-    secrets=[modal.Secret.from_name("my-huggingface-secret")],
 )
 class Diarizer:
-    def __enter__(self):
+    @enter()
+    def enter(self):
        import torch
        from pyannote.audio import Pipeline

@@ -124,7 +123,7 @@ class Diarizer:
 # -------------------------------------------------------------------


-@stub.function(
+@app.function(
    timeout=60 * 10,
    container_idle_timeout=60 * 3,
    allow_concurrent_inputs=40,
--- a/server/gpu/modal_deployments/reflector_llm.py
+++ b/server/gpu/modal_deployments/reflector_llm.py
@@ -9,7 +9,7 @@ import threading
 from typing import Optional

 import modal
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter, exit

 # LLM
 LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
@@ -19,7 +19,7 @@ LLM_MAX_NEW_TOKENS: int = 300

 IMAGE_MODEL_DIR = "/root/llm_models"

-stub = Stub(name="reflector-llm")
+app = App(name="reflector-llm")


 def download_llm():
@@ -64,7 +64,7 @@ llm_image = (
 )


-@stub.cls(
+@app.cls(
    gpu="A100",
    timeout=60 * 5,
    container_idle_timeout=60 * 5,
@@ -72,7 +72,8 @@ llm_image = (
    image=llm_image,
 )
 class LLM:
-    def __enter__(self):
+    @enter()
+    def enter(self):
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

@@ -113,7 +114,8 @@ class LLM:

        self.lock = threading.Lock()

-    def __exit__(self, *args):
+    @exit()
+    def exit():
        print("Exit llm")

    @method()
@@ -161,7 +163,7 @@ class LLM:
 # -------------------------------------------------------------------


-@stub.function(
+@app.function(
    container_idle_timeout=60 * 10,
    timeout=60 * 5,
    allow_concurrent_inputs=45,
--- a/server/gpu/modal_deployments/reflector_llm_zephyr.py
+++ b/server/gpu/modal_deployments/reflector_llm_zephyr.py
@@ -9,7 +9,7 @@ import threading
 from typing import Optional

 import modal
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter, exit

 # LLM
 LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
@@ -19,7 +19,7 @@ LLM_MAX_NEW_TOKENS: int = 300

 IMAGE_MODEL_DIR = "/root/llm_models/zephyr"

-stub = Stub(name="reflector-llm-zephyr")
+app = App(name="reflector-llm-zephyr")


 def download_llm():
@@ -64,7 +64,7 @@ llm_image = (
 )


-@stub.cls(
+@app.cls(
    gpu="A10G",
    timeout=60 * 5,
    container_idle_timeout=60 * 5,
@@ -72,7 +72,8 @@ llm_image = (
    image=llm_image,
 )
 class LLM:
-    def __enter__(self):
+    @enter()
+    def enter(self):
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

@@ -116,7 +117,8 @@ class LLM:
        self.GenerationConfig = GenerationConfig
        self.lock = threading.Lock()

-    def __exit__(self, *args):
+    @exit()
+    def exit():
        print("Exit llm")

    @method()
@@ -169,7 +171,7 @@ class LLM:
 # -------------------------------------------------------------------


-@stub.function(
+@app.function(
    container_idle_timeout=60 * 10,
    timeout=60 * 5,
    allow_concurrent_inputs=30,
--- a/server/gpu/modal_deployments/reflector_transcriber.py
+++ b/server/gpu/modal_deployments/reflector_transcriber.py
@@ -7,7 +7,7 @@ import os
 import tempfile
 import threading

-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter
 from pydantic import BaseModel

 # Whisper
@@ -18,7 +18,7 @@ WHISPER_NUM_WORKERS: int = 1

 WHISPER_MODEL_DIR = "/root/transcription_models"

-stub = Stub(name="reflector-transcriber")
+app = App(name="reflector-transcriber")


 def download_whisper():
@@ -75,7 +75,7 @@ transcriber_image = (
 )


-@stub.cls(
+@app.cls(
    gpu="A10G",
    timeout=60 * 5,
    container_idle_timeout=60 * 5,
@@ -83,7 +83,8 @@ transcriber_image = (
    image=transcriber_image,
 )
 class Transcriber:
-    def __enter__(self):
+    @enter()
+    def enter(self):
        import faster_whisper
        import torch

@@ -149,7 +150,7 @@ class Transcriber:
 # -------------------------------------------------------------------


-@stub.function(
+@app.function(
    container_idle_timeout=60,
    timeout=60,
    allow_concurrent_inputs=40,
--- a/server/gpu/modal_deployments/reflector_translator.py
+++ b/server/gpu/modal_deployments/reflector_translator.py
@@ -6,7 +6,7 @@ Reflector GPU backend - transcriber
 import os
 import threading

-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter
 from pydantic import BaseModel

 # Seamless M4T
@@ -20,7 +20,7 @@ HF_SEAMLESS_M4T_VOCODEREPO: str = "facebook/seamless-m4t-vocoder"
 SEAMLESS_GITEPO: str = "https://github.com/facebookresearch/seamless_communication.git"
 SEAMLESS_MODEL_DIR: str = "m4t"

-stub = Stub(name="reflector-translator")
+app = App(name="reflector-translator")


 def install_seamless_communication():
@@ -134,7 +134,7 @@ transcriber_image = (
 )


-@stub.cls(
+@app.cls(
    gpu="A10G",
    timeout=60 * 5,
    container_idle_timeout=60 * 5,
@@ -142,7 +142,8 @@ transcriber_image = (
    image=transcriber_image,
 )
 class Translator:
-    def __enter__(self):
+    @enter()
+    def enter(self):
        import torch
        from seamless_communication.inference.translator import Translator

@@ -379,7 +380,7 @@ class Translator:
 # -------------------------------------------------------------------


-@stub.function(
+@app.function(
    container_idle_timeout=60,
    timeout=60,
    allow_concurrent_inputs=40,
--- a/server/reflector/views/rtc_offer.py
+++ b/server/reflector/views/rtc_offer.py
@@ -71,7 +71,7 @@ async def rtc_offer_base(

    async def flush_pipeline_and_quit(close=True):
        # may be called twice
-        # 1. either the client ask to sotp the meeting
+        # 1. either the client asked to stop the meeting
        #    - we flush and close
        #    - when we receive the close event, we do nothing.
        # 2. or the client close the connection