From a4077005b28081433b835ac954d3fe04a7134a13 Mon Sep 17 00:00:00 2001
From: Sara <sara@monadical.com>
Date: Mon, 12 Aug 2024 12:22:21 +0200
Subject: [PATCH 1/3] Setup instuctions

---
 README.md                           | 28 ++++++++----
 server/.env_template                |  2 +-
 server/env.example                  | 67 ++++++++++++++---------------
 server/reflector/views/rtc_offer.py |  2 +-
 4 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/README.md b/README.md
index 83db0f2f..2a5600ce 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ The project architecture consists of three primary components:
 
 - **Front-End**: NextJS React project hosted on Vercel, located in `www/`.
 - **Back-End**: Python server that offers an API and data persistence, found in `server/`.
-- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations.
+- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations. Most reliable option is Modal deployment
 
 It also uses https://github.com/fief-dev for authentication, and Vercel for deployment and configuration of the front-end.
 
@@ -40,15 +40,23 @@ It also uses https://github.com/fief-dev for authentication, and Vercel for depl
 
 All new contributions should be made in a separate branch. Before any code is merged into `main`, it requires a code review.
 
-### How to Install Blackhole (Mac Only)
+### Usage instructions
 
 To record both your voice and the meeting you're taking part in, you need :
 
 - For an in-person meeting, make sure your microphone is in range of all participants.
-- If using several miscrophones, make sure to merge the audio feeds into one with an external tool.
+- If using several microphones, make sure to merge the audio feeds into one with an external tool.
 - For an online meeting, if you do not use headphones, your microphone should be able to pick up both your voice and the audio feed of the meeting.
 - If you want to use headphones, you need to merge the audio feeds with an external tool.
 
+Permissions:
+
+You may have to add permission for browser's microphone access to record audio in
+`System Preferences -> Privacy & Security -> Microphone`
+`System Preferences -> Privacy & Security -> Accessibility`. You will be prompted to provide these when you try to connect.
+
+### How to Install Blackhole (Mac Only)
+
 This is an external tool for merging the audio feeds as explained in the previous section of this document.
 Note: We currently do not have instructions for Windows users.
 
@@ -58,12 +66,6 @@ Note: We currently do not have instructions for Windows users.
 - Then goto `System Preferences -> Sound` and choose the devices created from the Output and Input tabs.
 - The input from your local microphone, the browser run meeting should be aggregated into one virtual stream to listen to and the output should be fed back to your specified output devices if everything is configured properly.
 
-Permissions:
-
-You may have to add permission for browser's microphone access to record audio in
-`System Preferences -> Privacy & Security -> Microphone`
-`System Preferences -> Privacy & Security -> Accessibility`. You will be prompted to provide these when you try to connect.
-
 ## Front-End
 
 Start with `cd www`.
@@ -208,4 +210,12 @@ poetry run python -m reflector.tools.process path/to/audio.wav
 
 ## AI Models
 
+### Modal
+To deploy llm changes to modal, you need.
+- a modal account
+- set up the required secret in your modal account (REFLECTOR_GPU_APIKEY)
+- install the modal cli
+- connect your modal cli to your account if not done previously
+- `modal run path/to/required/llm`
+
 _(Documentation for this section is pending.)_
diff --git a/server/.env_template b/server/.env_template
index d25797d3..8252dfdd 100644
--- a/server/.env_template
+++ b/server/.env_template
@@ -4,7 +4,7 @@ TRANSCRIPT_MODAL_API_KEY=***REMOVED***
 
 LLM_BACKEND=modal
 LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
-LLM_MODAL_API_KEY=<ask in zulip>
+LLM_MODAL_API_KEY=***REMOVED***
 
 AUTH_BACKEND=fief
 AUTH_FIEF_URL=https://auth.reflector.media/reflector-local
diff --git a/server/env.example b/server/env.example
index c5a38bf5..e36145d5 100644
--- a/server/env.example
+++ b/server/env.example
@@ -3,36 +3,15 @@
 # All the settings are described here: reflector/settings.py
 #
 
-## =======================================================
-## Database
-## =======================================================
-
-#DATABASE_URL=sqlite://./reflector.db
-#DATABASE_URL=postgresql://reflector:reflector@localhost:5432/reflector
-
-
 ## =======================================================
 ## User authentication
 ## =======================================================
 
-## No authentication
-#AUTH_BACKEND=none
-
 ## Using fief (fief.dev)
-#AUTH_BACKEND=fief
-#AUTH_FIEF_URL=https://your-fief-instance....
-#AUTH_FIEF_CLIENT_ID=xxx
-#AUTH_FIEF_CLIENT_SECRET=xxx
-
-
-## =======================================================
-## Public mode
-## =======================================================
-## If set to true, anonymous transcripts will be
-## accessible to anybody.
-
-#PUBLIC_MODE=false
-
+AUTH_BACKEND=fief
+AUTH_FIEF_URL=https://auth.reflector.media/reflector-local
+AUTH_FIEF_CLIENT_ID=***REMOVED***
+AUTH_FIEF_CLIENT_SECRET=<ask in zulip>
 
 ## =======================================================
 ## Transcription backend
@@ -41,7 +20,7 @@
 ## full list of available transcription backend
 ## =======================================================
 
-## Using local whisper (default)
+## Using local whisper
 #TRANSCRIPT_BACKEND=whisper
 #WHISPER_MODEL_SIZE=tiny
 
@@ -51,21 +30,31 @@
 #TRANSLATE_URL=https://xxxxx--reflector-translator-web.modal.run
 #TRANSCRIPT_MODAL_API_KEY=xxxxx
 
+TRANSCRIPT_BACKEND=modal
+TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
+TRANSCRIPT_MODAL_API_KEY=***REMOVED***
+
+## =======================================================
+## Transcription backend
+##
+## Only available in modal atm
+## =======================================================
+TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
+
 ## =======================================================
 ## LLM backend
 ##
+## Responsible for titles and short summary
 ## Check reflector/llm/* for the full list of available
 ## llm backend implementation
 ## =======================================================
 
-## Use oobabooga (default)
-#LLM_BACKEND=oobabooga
-#LLM_URL=http://xxx:7860/api/generate/v1
-
 ## Using serverless modal.com (require reflector-gpu-modal deployed)
-#LLM_BACKEND=modal
-#LLM_URL=https://xxxxxx--reflector-llm-web.modal.run
-#LLM_MODAL_API_KEY=xxx
+LLM_BACKEND=modal
+LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
+LLM_MODAL_API_KEY=***REMOVED***
+ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
+
 
 ## Using OpenAI
 #LLM_BACKEND=openai
@@ -78,11 +67,21 @@
 #LLM_OPENAI_MODEL="GPT4All Falcon"
 
 ## Default LLM MODEL NAME
-DEFAULT_LLM=lmsys/vicuna-13b-v1.5
+#DEFAULT_LLM=lmsys/vicuna-13b-v1.5
 
 ## Cache directory to store models
 CACHE_DIR=data
 
+## =======================================================
+## Diarization
+##
+## Only available on modal
+## To allow diarization, you need to expose expose the files to be dowloded by the pipeline
+## =======================================================
+DIARIZATION_ENABLED=false
+DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
+
+
 ## =======================================================
 ## Sentry
 ## =======================================================
diff --git a/server/reflector/views/rtc_offer.py b/server/reflector/views/rtc_offer.py
index 3f0ca1ac..c537e878 100644
--- a/server/reflector/views/rtc_offer.py
+++ b/server/reflector/views/rtc_offer.py
@@ -71,7 +71,7 @@ async def rtc_offer_base(
 
     async def flush_pipeline_and_quit(close=True):
         # may be called twice
-        # 1. either the client ask to sotp the meeting
+        # 1. either the client asked to stop the meeting
         #    - we flush and close
         #    - when we receive the close event, we do nothing.
         # 2. or the client close the connection

From 004787c055b17b8081738b3dbffd6ccd18013215 Mon Sep 17 00:00:00 2001
From: Sara <sara@monadical.com>
Date: Mon, 12 Aug 2024 12:24:14 +0200
Subject: [PATCH 2/3] upgrade modal

---
 .../gpu/modal_deployments/reflector_diarizer.py | 17 ++++++++---------
 server/gpu/modal_deployments/reflector_llm.py   | 14 ++++++++------
 .../modal_deployments/reflector_llm_zephyr.py   | 14 ++++++++------
 .../modal_deployments/reflector_transcriber.py  | 11 ++++++-----
 .../modal_deployments/reflector_translator.py   | 11 ++++++-----
 5 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/server/gpu/modal_deployments/reflector_diarizer.py b/server/gpu/modal_deployments/reflector_diarizer.py
index ddab9950..db46b83c 100644
--- a/server/gpu/modal_deployments/reflector_diarizer.py
+++ b/server/gpu/modal_deployments/reflector_diarizer.py
@@ -6,12 +6,12 @@ Reflector GPU backend - diarizer
 import os
 
 import modal.gpu
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter
 from pydantic import BaseModel
 
-PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.0"
+PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.1"
 MODEL_DIR = "/root/diarization_models"
-stub = Stub(name="reflector-diarizer")
+app = App(name="reflector-diarizer")
 
 
 def migrate_cache_llm():
@@ -33,7 +33,6 @@ def download_pyannote_audio():
     Pipeline.from_pretrained(
         "pyannote/speaker-diarization-3.0",
         cache_dir=MODEL_DIR,
-        use_auth_token=os.environ["HF_TOKEN"]
     )
 
 
@@ -54,7 +53,7 @@ diarizer_image = (
         "hf-transfer"
     )
     .run_function(migrate_cache_llm)
-    .run_function(download_pyannote_audio, secrets=[modal.Secret.from_name("my-huggingface-secret")])
+    .run_function(download_pyannote_audio)
     .env(
         {
             "LD_LIBRARY_PATH": (
@@ -66,16 +65,16 @@ diarizer_image = (
 )
 
 
-@stub.cls(
+@app.cls(
     gpu=modal.gpu.A100(memory=40),
     timeout=60 * 30,
     container_idle_timeout=60,
     allow_concurrent_inputs=1,
     image=diarizer_image,
-    secrets=[modal.Secret.from_name("my-huggingface-secret")],
 )
 class Diarizer:
-    def __enter__(self):
+    @enter()
+    def enter(self):
         import torch
         from pyannote.audio import Pipeline
 
@@ -124,7 +123,7 @@ class Diarizer:
 # -------------------------------------------------------------------
 
 
-@stub.function(
+@app.function(
     timeout=60 * 10,
     container_idle_timeout=60 * 3,
     allow_concurrent_inputs=40,
diff --git a/server/gpu/modal_deployments/reflector_llm.py b/server/gpu/modal_deployments/reflector_llm.py
index f1e9d166..8faf5909 100644
--- a/server/gpu/modal_deployments/reflector_llm.py
+++ b/server/gpu/modal_deployments/reflector_llm.py
@@ -9,7 +9,7 @@ import threading
 from typing import Optional
 
 import modal
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter, exit
 
 # LLM
 LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
@@ -19,7 +19,7 @@ LLM_MAX_NEW_TOKENS: int = 300
 
 IMAGE_MODEL_DIR = "/root/llm_models"
 
-stub = Stub(name="reflector-llm")
+app = App(name="reflector-llm")
 
 
 def download_llm():
@@ -64,7 +64,7 @@ llm_image = (
 )
 
 
-@stub.cls(
+@app.cls(
     gpu="A100",
     timeout=60 * 5,
     container_idle_timeout=60 * 5,
@@ -72,7 +72,8 @@ llm_image = (
     image=llm_image,
 )
 class LLM:
-    def __enter__(self):
+    @enter()
+    def enter(self):
         import torch
         from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 
@@ -113,7 +114,8 @@ class LLM:
 
         self.lock = threading.Lock()
 
-    def __exit__(self, *args):
+    @exit()
+    def exit():
         print("Exit llm")
 
     @method()
@@ -161,7 +163,7 @@ class LLM:
 # -------------------------------------------------------------------
 
 
-@stub.function(
+@app.function(
     container_idle_timeout=60 * 10,
     timeout=60 * 5,
     allow_concurrent_inputs=45,
diff --git a/server/gpu/modal_deployments/reflector_llm_zephyr.py b/server/gpu/modal_deployments/reflector_llm_zephyr.py
index b101f5f2..18608acd 100644
--- a/server/gpu/modal_deployments/reflector_llm_zephyr.py
+++ b/server/gpu/modal_deployments/reflector_llm_zephyr.py
@@ -9,7 +9,7 @@ import threading
 from typing import Optional
 
 import modal
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter, exit
 
 # LLM
 LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
@@ -19,7 +19,7 @@ LLM_MAX_NEW_TOKENS: int = 300
 
 IMAGE_MODEL_DIR = "/root/llm_models/zephyr"
 
-stub = Stub(name="reflector-llm-zephyr")
+app = App(name="reflector-llm-zephyr")
 
 
 def download_llm():
@@ -64,7 +64,7 @@ llm_image = (
 )
 
 
-@stub.cls(
+@app.cls(
     gpu="A10G",
     timeout=60 * 5,
     container_idle_timeout=60 * 5,
@@ -72,7 +72,8 @@ llm_image = (
     image=llm_image,
 )
 class LLM:
-    def __enter__(self):
+    @enter()
+    def enter(self):
         import torch
         from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 
@@ -116,7 +117,8 @@ class LLM:
         self.GenerationConfig = GenerationConfig
         self.lock = threading.Lock()
 
-    def __exit__(self, *args):
+    @exit()
+    def exit():
         print("Exit llm")
 
     @method()
@@ -169,7 +171,7 @@ class LLM:
 # -------------------------------------------------------------------
 
 
-@stub.function(
+@app.function(
     container_idle_timeout=60 * 10,
     timeout=60 * 5,
     allow_concurrent_inputs=30,
diff --git a/server/gpu/modal_deployments/reflector_transcriber.py b/server/gpu/modal_deployments/reflector_transcriber.py
index 4f746ded..5b7cb351 100644
--- a/server/gpu/modal_deployments/reflector_transcriber.py
+++ b/server/gpu/modal_deployments/reflector_transcriber.py
@@ -7,7 +7,7 @@ import os
 import tempfile
 import threading
 
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter
 from pydantic import BaseModel
 
 # Whisper
@@ -18,7 +18,7 @@ WHISPER_NUM_WORKERS: int = 1
 
 WHISPER_MODEL_DIR = "/root/transcription_models"
 
-stub = Stub(name="reflector-transcriber")
+app = App(name="reflector-transcriber")
 
 
 def download_whisper():
@@ -75,7 +75,7 @@ transcriber_image = (
 )
 
 
-@stub.cls(
+@app.cls(
     gpu="A10G",
     timeout=60 * 5,
     container_idle_timeout=60 * 5,
@@ -83,7 +83,8 @@ transcriber_image = (
     image=transcriber_image,
 )
 class Transcriber:
-    def __enter__(self):
+    @enter()
+    def enter(self):
         import faster_whisper
         import torch
 
@@ -149,7 +150,7 @@ class Transcriber:
 # -------------------------------------------------------------------
 
 
-@stub.function(
+@app.function(
     container_idle_timeout=60,
     timeout=60,
     allow_concurrent_inputs=40,
diff --git a/server/gpu/modal_deployments/reflector_translator.py b/server/gpu/modal_deployments/reflector_translator.py
index 8e920a5a..a21c33fe 100644
--- a/server/gpu/modal_deployments/reflector_translator.py
+++ b/server/gpu/modal_deployments/reflector_translator.py
@@ -6,7 +6,7 @@ Reflector GPU backend - transcriber
 import os
 import threading
 
-from modal import Image, Secret, Stub, asgi_app, method
+from modal import Image, Secret, App, asgi_app, method, enter
 from pydantic import BaseModel
 
 # Seamless M4T
@@ -20,7 +20,7 @@ HF_SEAMLESS_M4T_VOCODEREPO: str = "facebook/seamless-m4t-vocoder"
 SEAMLESS_GITEPO: str = "https://github.com/facebookresearch/seamless_communication.git"
 SEAMLESS_MODEL_DIR: str = "m4t"
 
-stub = Stub(name="reflector-translator")
+app = App(name="reflector-translator")
 
 
 def install_seamless_communication():
@@ -134,7 +134,7 @@ transcriber_image = (
 )
 
 
-@stub.cls(
+@app.cls(
     gpu="A10G",
     timeout=60 * 5,
     container_idle_timeout=60 * 5,
@@ -142,7 +142,8 @@ transcriber_image = (
     image=transcriber_image,
 )
 class Translator:
-    def __enter__(self):
+    @enter()
+    def enter(self):
         import torch
         from seamless_communication.inference.translator import Translator
 
@@ -379,7 +380,7 @@ class Translator:
 # -------------------------------------------------------------------
 
 
-@stub.function(
+@app.function(
     container_idle_timeout=60,
     timeout=60,
     allow_concurrent_inputs=40,

From 918daff66db59ca31df2ec98c16389de99c69bb8 Mon Sep 17 00:00:00 2001
From: Sara <sara@monadical.com>
Date: Mon, 12 Aug 2024 12:24:32 +0200
Subject: [PATCH 3/3] more flexible poetry

---
 server/README.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 server/README.md

diff --git a/server/README.md b/server/README.md
new file mode 100644
index 00000000..e69de29b