fix: remove unused settings and utils files (#522)

* fix: remove unused settings and utils files * fix: remove migration done * fix: remove outdated scripts * fix: removing deployment of hermes, not used anymore * fix: partially remove secret, still have to understand frontend.
2026-02-04 09:56:47 +00:00 · 2025-07-31 17:45:48 -06:00
parent 4ee19ed015
commit ad56165b54
17 changed files with 8 additions and 1046 deletions
--- a/server/.env_template
+++ b/server/.env_template
@@ -1,21 +0,0 @@
-TRANSCRIPT_BACKEND=modal
-TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
-TRANSCRIPT_MODAL_API_KEY=***REMOVED***
-
-LLM_BACKEND=modal
-LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
-LLM_MODAL_API_KEY=***REMOVED***
-
-TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
-ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
-DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
-
-BASE_URL=https://xxxxx.ngrok.app
-DIARIZATION_ENABLED=false
-
-SQS_POLLING_TIMEOUT_SECONDS=60
-
-# Summary LLM configuration
-SUMMARY_MODEL=monadical/private/smart
-SUMMARY_LLM_URL=
-SUMMARY_LLM_API_KEY=
--- a/server/env.example
+++ b/server/env.example
@@ -20,7 +20,6 @@ AUTH_JWT_AUDIENCE=

 ## Using local whisper
 #TRANSCRIPT_BACKEND=whisper
-#WHISPER_MODEL_SIZE=tiny

 ## Using serverless modal.com (require reflector-gpu-modal deployed)
 #TRANSCRIPT_BACKEND=modal
@@ -30,7 +29,7 @@ AUTH_JWT_AUDIENCE=

 TRANSCRIPT_BACKEND=modal
 TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
-TRANSCRIPT_MODAL_API_KEY=***REMOVED***
+TRANSCRIPT_MODAL_API_KEY=

 ## =======================================================
 ## Transcription backend
@@ -50,7 +49,7 @@ TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
 ## Using serverless modal.com (require reflector-gpu-modal deployed)
 LLM_BACKEND=modal
 LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
-LLM_MODAL_API_KEY=***REMOVED***
+LLM_MODAL_API_KEY=
 ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run


--- a/server/gpu/modal_deployments/reflector_vllm_hermes3.py
+++ b/server/gpu/modal_deployments/reflector_vllm_hermes3.py
@@ -1,171 +0,0 @@
-# # Run an OpenAI-Compatible vLLM Server
-
-import modal
-
-MODELS_DIR = "/llamas"
-MODEL_NAME = "NousResearch/Hermes-3-Llama-3.1-8B"
-N_GPU = 1
-
-
-def download_llm():
-    from huggingface_hub import snapshot_download
-
-    print("Downloading LLM model")
-    snapshot_download(
-        MODEL_NAME,
-        local_dir=f"{MODELS_DIR}/{MODEL_NAME}",
-        ignore_patterns=[
-            "*.pt",
-            "*.bin",
-            "*.pth",
-            "original/*",
-        ],  # Ensure safetensors
-    )
-    print("LLM model downloaded")
-
-
-def move_cache():
-    from transformers.utils import move_cache as transformers_move_cache
-
-    transformers_move_cache()
-
-
-vllm_image = (
-    modal.Image.debian_slim(python_version="3.10")
-    .pip_install("vllm==0.5.3post1")
-    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
-    .pip_install(
-        # "accelerate==0.34.2",
-        "einops==0.8.0",
-        "hf-transfer~=0.1",
-    )
-    .run_function(download_llm)
-    .run_function(move_cache)
-    .pip_install(
-        "bitsandbytes>=0.42.9",
-    )
-)
-
-app = modal.App("reflector-vllm-hermes3")
-
-
-@app.function(
-    image=vllm_image,
-    gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
-    timeout=60 * 5,
-    scaledown_window=60 * 5,
-    allow_concurrent_inputs=100,
-    secrets=[
-        modal.Secret.from_name("reflector-gpu"),
-    ],
-)
-@modal.asgi_app()
-def serve():
-    import os
-
-    import fastapi
-    import vllm.entrypoints.openai.api_server as api_server
-    from vllm.engine.arg_utils import AsyncEngineArgs
-    from vllm.engine.async_llm_engine import AsyncLLMEngine
-    from vllm.entrypoints.logger import RequestLogger
-    from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
-    from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
-    from vllm.usage.usage_lib import UsageContext
-
-    TOKEN = os.environ["REFLECTOR_GPU_APIKEY"]
-
-    # create a fastAPI app that uses vLLM's OpenAI-compatible router
-    web_app = fastapi.FastAPI(
-        title=f"OpenAI-compatible {MODEL_NAME} server",
-        description="Run an OpenAI-compatible LLM server with vLLM on modal.com",
-        version="0.0.1",
-        docs_url="/docs",
-    )
-
-    # security: CORS middleware for external requests
-    http_bearer = fastapi.security.HTTPBearer(
-        scheme_name="Bearer Token",
-        description="See code for authentication details.",
-    )
-    web_app.add_middleware(
-        fastapi.middleware.cors.CORSMiddleware,
-        allow_origins=["*"],
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-
-    # security: inject dependency on authed routes
-    async def is_authenticated(api_key: str = fastapi.Security(http_bearer)):
-        if api_key.credentials != TOKEN:
-            raise fastapi.HTTPException(
-                status_code=fastapi.status.HTTP_401_UNAUTHORIZED,
-                detail="Invalid authentication credentials",
-            )
-        return {"username": "authenticated_user"}
-
-    router = fastapi.APIRouter(dependencies=[fastapi.Depends(is_authenticated)])
-
-    # wrap vllm's router in auth router
-    router.include_router(api_server.router)
-    # add authed vllm to our fastAPI app
-    web_app.include_router(router)
-
-    engine_args = AsyncEngineArgs(
-        model=MODELS_DIR + "/" + MODEL_NAME,
-        tensor_parallel_size=N_GPU,
-        gpu_memory_utilization=0.90,
-        # max_model_len=8096,
-        enforce_eager=False,  # capture the graph for faster inference, but slower cold starts (30s > 20s)
-        # --- 4 bits load
-        # quantization="bitsandbytes",
-        # load_format="bitsandbytes",
-    )
-
-    engine = AsyncLLMEngine.from_engine_args(
-        engine_args, usage_context=UsageContext.OPENAI_API_SERVER
-    )
-
-    model_config = get_model_config(engine)
-
-    request_logger = RequestLogger(max_log_len=2048)
-
-    api_server.openai_serving_chat = OpenAIServingChat(
-        engine,
-        model_config=model_config,
-        served_model_names=[MODEL_NAME],
-        chat_template=None,
-        response_role="assistant",
-        lora_modules=[],
-        prompt_adapters=[],
-        request_logger=request_logger,
-    )
-    api_server.openai_serving_completion = OpenAIServingCompletion(
-        engine,
-        model_config=model_config,
-        served_model_names=[MODEL_NAME],
-        lora_modules=[],
-        prompt_adapters=[],
-        request_logger=request_logger,
-    )
-
-    return web_app
-
-
-def get_model_config(engine):
-    import asyncio
-
-    try:  # adapted from vLLM source -- https://github.com/vllm-project/vllm/blob/507ef787d85dec24490069ffceacbd6b161f4f72/vllm/entrypoints/openai/api_server.py#L235C1-L247C1
-        event_loop = asyncio.get_running_loop()
-    except RuntimeError:
-        event_loop = None
-
-    if event_loop is not None and event_loop.is_running():
-        # If the current is instanced by Ray Serve,
-        # there is already a running event loop
-        model_config = event_loop.run_until_complete(engine.get_model_config())
-    else:
-        # When using single vLLM without engine_use_ray
-        model_config = asyncio.run(engine.get_model_config())
-
-    return model_config
--- a/server/migration.load
+++ b/server/migration.load
@@ -1,16 +0,0 @@
-LOAD DATABASE
-     FROM sqlite:///app/reflector.sqlite3
-     INTO pgsql://reflector:reflector@postgres:5432/reflector
-WITH
-    include drop,
-    create tables,
-    create indexes,
-    reset sequences,
-    preserve index names,
-    prefetch rows = 10
-SET
-    work_mem to '512MB',
-    maintenance_work_mem to '1024MB'
-CAST
-    column transcript.duration to float using (lambda (val) (when val (format nil "~f" val)))
-;
--- a/server/reflector/llm/base.py
+++ b/server/reflector/llm/base.py
@@ -61,7 +61,7 @@ class LLM:
        Return an instance depending on the settings.
        Settings used:

-        - `LLM_BACKEND`: key of the backend, defaults to `oobabooga`
+        - `LLM_BACKEND`: key of the backend
        - `LLM_URL`: url of the backend
        """
        if name is None:
--- a/server/reflector/llm/llm_oobabooga.py
+++ b/server/reflector/llm/llm_oobabooga.py
@@ -1,29 +0,0 @@
-import httpx
-
-from reflector.llm.base import LLM
-from reflector.settings import settings
-
-
-class OobaboogaLLM(LLM):
-    def __init__(self, model_name: str | None = None):
-        super().__init__()
-
-    async def _generate(
-        self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
-    ):
-        json_payload = {"prompt": prompt}
-        if gen_schema:
-            json_payload["gen_schema"] = gen_schema
-        if gen_cfg:
-            json_payload.update(gen_cfg)
-        async with httpx.AsyncClient() as client:
-            response = await client.post(
-                settings.LLM_URL,
-                headers={"Content-Type": "application/json"},
-                json=json_payload,
-            )
-            response.raise_for_status()
-            return response.json()
-
-
-LLM.register("oobabooga", OobaboogaLLM)
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -8,8 +8,6 @@ class Settings(BaseSettings):
        extra="ignore",
    )

-    OPENMP_KMP_DUPLICATE_LIB_OK: bool = False
-
    # CORS
    CORS_ORIGIN: str = "*"
    CORS_ALLOW_CREDENTIALS: bool = False
@@ -20,26 +18,6 @@ class Settings(BaseSettings):
    # local data directory (audio for no)
    DATA_DIR: str = "./data"

-    # Whisper
-    WHISPER_MODEL_SIZE: str = "tiny"
-    WHISPER_REAL_TIME_MODEL_SIZE: str = "tiny"
-
-    # Summarizer
-    SUMMARIZER_MODEL: str = "facebook/bart-large-cnn"
-    SUMMARIZER_INPUT_ENCODING_MAX_LENGTH: int = 1024
-    SUMMARIZER_MAX_LENGTH: int = 2048
-    SUMMARIZER_BEAM_SIZE: int = 6
-    SUMMARIZER_MAX_CHUNK_LENGTH: int = 1024
-    SUMMARIZER_USING_CHUNKS: bool = True
-
-    # Audio
-    AUDIO_BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME: str = "aggregator"
-    AUDIO_AV_FOUNDATION_DEVICE_ID: int = 1
-    AUDIO_CHANNELS: int = 2
-    AUDIO_SAMPLING_RATE: int = 48000
-    AUDIO_SAMPLING_WIDTH: int = 2
-    AUDIO_BUFFER_SIZE: int = 256 * 960
-
    # Audio Transcription
    # backends: whisper, modal
    TRANSCRIPT_BACKEND: str = "whisper"
@@ -63,8 +41,8 @@ class Settings(BaseSettings):
    TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None

    # LLM
-    # available backend: openai, modal, oobabooga
-    LLM_BACKEND: str = "oobabooga"
+    # available backend: openai, modal
+    LLM_BACKEND: str = "modal"

    # LLM common configuration
    LLM_URL: str | None = None
--- a/server/reflector/stream_client.py
+++ b/server/reflector/stream_client.py
@@ -1,14 +1,13 @@
 import asyncio
 import time
 import uuid
+from os import environ

 import httpx
 import stamina
 from aiortc import RTCPeerConnection, RTCSessionDescription
 from aiortc.contrib.media import MediaPlayer, MediaRelay
-
 from reflector.logger import logger
-from reflector.settings import settings


 class StreamClient:
@@ -43,8 +42,9 @@ class StreamClient:
        else:
            if self.relay is None:
                self.relay = MediaRelay()
+            audio_device_id = int(environ.get("AUDIO_AV_FOUNDATION_DEVICE_ID", 1))
            self.player = MediaPlayer(
-                f":{settings.AUDIO_AV_FOUNDATION_DEVICE_ID}",
+                f":{audio_device_id}",
                format="avfoundation",
                options={"channels": "2"},
            )
--- a/server/reflector/utils/file_utils.py
+++ b/server/reflector/utils/file_utils.py
@@ -1,59 +0,0 @@
-"""
-Utility file for file handling related functions, including file downloads and
-uploads to cloud storage
-"""
-
-import sys
-from typing import List, NoReturn
-
-import boto3
-import botocore
-
-from .log_utils import LOGGER
-from .run_utils import SECRETS
-
-BUCKET_NAME = SECRETS["AWS-S3"]["BUCKET_NAME"]
-
-s3 = boto3.client(
-    "s3",
-    aws_access_key_id=SECRETS["AWS-S3"]["AWS_ACCESS_KEY"],
-    aws_secret_access_key=SECRETS["AWS-S3"]["AWS_SECRET_KEY"],
-)
-
-
-def upload_files(files_to_upload: List[str]) -> NoReturn:
-    """
-    Upload a list of files to the configured S3 bucket
-    :param files_to_upload: List of files to upload
-    :return: None
-    """
-    for key in files_to_upload:
-        LOGGER.info("Uploading file " + key)
-        try:
-            s3.upload_file(key, BUCKET_NAME, key)
-        except botocore.exceptions.ClientError as exception:
-            print(exception.response)
-
-
-def download_files(files_to_download: List[str]) -> NoReturn:
-    """
-    Download a list of files from the configured S3 bucket
-    :param files_to_download: List of files to download
-    :return: None
-    """
-    for key in files_to_download:
-        LOGGER.info("Downloading file " + key)
-        try:
-            s3.download_file(BUCKET_NAME, key, key)
-        except botocore.exceptions.ClientError as exception:
-            if exception.response["Error"]["Code"] == "404":
-                print("The object does not exist.")
-            else:
-                raise
-
-
-if __name__ == "__main__":
-    if sys.argv[1] == "download":
-        download_files([sys.argv[2]])
-    elif sys.argv[1] == "upload":
-        upload_files([sys.argv[2]])
--- a/server/reflector/utils/format_output.py
+++ b/server/reflector/utils/format_output.py
@@ -1,38 +0,0 @@
-"""
-Utility function to format the artefacts created during Reflector run
-"""
-
-import json
-
-with open("../artefacts/meeting_titles_and_summaries.txt", "r", encoding="utf-8") as f:
-    outputs = f.read()
-
-outputs = json.loads(outputs)
-
-transcript_file = open("../artefacts/meeting_transcript.txt", "a", encoding="utf-8")
-title_desc_file = open(
-    "../artefacts/meeting_title_description.txt", "a", encoding="utf-8"
-)
-summary_file = open("../artefacts/meeting_summary.txt", "a", encoding="utf-8")
-
-for item in outputs["topics"]:
-    transcript_file.write(item["transcript"])
-    summary_file.write(item["description"])
-
-    title_desc_file.write("TITLE: \n")
-    title_desc_file.write(item["title"])
-    title_desc_file.write("\n")
-
-    title_desc_file.write("DESCRIPTION: \n")
-    title_desc_file.write(item["description"])
-    title_desc_file.write("\n")
-
-    title_desc_file.write("TRANSCRIPT: \n")
-    title_desc_file.write(item["transcript"])
-    title_desc_file.write("\n")
-
-    title_desc_file.write("---------------------------------------- \n\n")
-
-transcript_file.close()
-title_desc_file.close()
-summary_file.close()
--- a/server/reflector/utils/run_utils.py
+++ b/server/reflector/utils/run_utils.py
@@ -1,55 +0,0 @@
-"""
-Utility file for server side asynchronous task running and config objects
-"""
-
-import asyncio
-import contextlib
-from functools import partial
-from threading import Lock
-from typing import ContextManager, Generic, TypeVar
-
-
-def run_in_executor(func, *args, executor=None, **kwargs):
-    """
-    Run the function in an executor, unblocking the main loop
-    :param func: Function to be run in executor
-    :param args: function parameters
-    :param executor: executor instance [Thread | Process]
-    :param kwargs: Additional parameters
-    :return: Future of function result upon completion
-    """
-    callback = partial(func, *args, **kwargs)
-    loop = asyncio.get_event_loop()
-    return loop.run_in_executor(executor, callback)
-
-
-# Genetic type template
-T = TypeVar("T")
-
-
-class Mutex(Generic[T]):
-    """
-    Mutex class to implement lock/release of a shared
-    protected variable
-    """
-
-    def __init__(self, value: T):
-        """
-        Create an instance of Mutex wrapper for the given resource
-        :param value: Shared resources to be thread protected
-        """
-        self.__value = value
-        self.__lock = Lock()
-
-    @contextlib.contextmanager
-    def lock(self) -> ContextManager[T]:
-        """
-        Lock the resource with a mutex to be used within a context block
-        The lock is automatically released on context exit
-        :return: Shared resource
-        """
-        self.__lock.acquire()
-        try:
-            yield self.__value
-        finally:
-            self.__lock.release()
--- a/server/reflector/utils/text_utils.py
+++ b/server/reflector/utils/text_utils.py
@@ -1,262 +0,0 @@
-"""
-Utility file for all text processing related functionalities
-"""
-
-import datetime
-from typing import List
-
-import nltk
-import torch
-from log_utils import LOGGER
-from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
-from run_utils import CONFIG
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-from transformers import BartForConditionalGeneration, BartTokenizer
-
-nltk.download("punkt", quiet=True)
-
-
-def preprocess_sentence(sentence: str) -> str:
-    """
-    Filter out undesirable tokens from thr sentence
-    :param sentence:
-    :return:
-    """
-    stop_words = set(stopwords.words("english"))
-    tokens = word_tokenize(sentence.lower())
-    tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
-    return " ".join(tokens)
-
-
-def compute_similarity(sent1: str, sent2: str) -> float:
-    """
-    Compute the similarity
-    """
-    tfidf_vectorizer = TfidfVectorizer()
-    if sent1 is not None and sent2 is not None:
-        tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2])
-        return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
-    return 0.0
-
-
-def remove_almost_alike_sentences(sentences: List[str], threshold=0.7) -> List[str]:
-    """
-    Filter sentences that are similar beyond a set threshold
-    :param sentences:
-    :param threshold:
-    :return:
-    """
-    num_sentences = len(sentences)
-    removed_indices = set()
-
-    for i in range(num_sentences):
-        if i not in removed_indices:
-            for j in range(i + 1, num_sentences):
-                if j not in removed_indices:
-                    l_i = len(sentences[i])
-                    l_j = len(sentences[j])
-                    if l_i == 0 or l_j == 0:
-                        if l_i == 0:
-                            removed_indices.add(i)
-                        if l_j == 0:
-                            removed_indices.add(j)
-                    else:
-                        sentence1 = preprocess_sentence(sentences[i])
-                        sentence2 = preprocess_sentence(sentences[j])
-                        if len(sentence1) != 0 and len(sentence2) != 0:
-                            similarity = compute_similarity(sentence1, sentence2)
-
-                            if similarity >= threshold:
-                                removed_indices.add(max(i, j))
-
-    filtered_sentences = [
-        sentences[i] for i in range(num_sentences) if i not in removed_indices
-    ]
-    return filtered_sentences
-
-
-def remove_outright_duplicate_sentences_from_chunk(chunk: str) -> List[str]:
-    """
-    Remove repetitive sentences
-    :param chunk:
-    :return:
-    """
-    chunk_text = chunk["text"]
-    sentences = nltk.sent_tokenize(chunk_text)
-    nonduplicate_sentences = list(dict.fromkeys(sentences))
-    return nonduplicate_sentences
-
-
-def remove_whisper_repetitive_hallucination(
-    nonduplicate_sentences: List[str],
-) -> List[str]:
-    """
-    Remove sentences that are repeated as a result of Whisper
-    hallucinations
-    :param nonduplicate_sentences:
-    :return:
-    """
-    chunk_sentences = []
-
-    for sent in nonduplicate_sentences:
-        temp_result = ""
-        seen = {}
-        words = nltk.word_tokenize(sent)
-        n_gram_filter = 3
-        for i in range(len(words)):
-            if (
-                str(words[i : i + n_gram_filter]) in seen
-                and seen[str(words[i : i + n_gram_filter])]
-                == words[i + 1 : i + n_gram_filter + 2]
-            ):
-                pass
-            else:
-                seen[str(words[i : i + n_gram_filter])] = words[
-                    i + 1 : i + n_gram_filter + 2
-                ]
-                temp_result += words[i]
-                temp_result += " "
-        chunk_sentences.append(temp_result)
-    return chunk_sentences
-
-
-def post_process_transcription(whisper_result: dict) -> dict:
-    """
-    Parent function to perform post-processing on the transcription result
-    :param whisper_result:
-    :return:
-    """
-    transcript_text = ""
-    for chunk in whisper_result["chunks"]:
-        nonduplicate_sentences = remove_outright_duplicate_sentences_from_chunk(chunk)
-        chunk_sentences = remove_whisper_repetitive_hallucination(
-            nonduplicate_sentences
-        )
-        similarity_matched_sentences = remove_almost_alike_sentences(chunk_sentences)
-        chunk["text"] = " ".join(similarity_matched_sentences)
-        transcript_text += chunk["text"]
-    whisper_result["text"] = transcript_text
-    return whisper_result
-
-
-def summarize_chunks(chunks: List[str], tokenizer, model) -> List[str]:
-    """
-    Summarize each chunk using a summarizer model
-    :param chunks:
-    :param tokenizer:
-    :param model:
-    :return:
-    """
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    summaries = []
-    for c in chunks:
-        input_ids = tokenizer.encode(c, return_tensors="pt")
-        input_ids = input_ids.to(device)
-        with torch.no_grad():
-            summary_ids = model.generate(
-                input_ids,
-                num_beams=int(CONFIG["SUMMARIZER"]["BEAM_SIZE"]),
-                length_penalty=2.0,
-                max_length=int(CONFIG["SUMMARIZER"]["MAX_LENGTH"]),
-                early_stopping=True,
-            )
-            summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-            summaries.append(summary)
-    return summaries
-
-
-def chunk_text(
-    text: str, max_chunk_length: int = int(CONFIG["SUMMARIZER"]["MAX_CHUNK_LENGTH"])
-) -> List[str]:
-    """
-    Split text into smaller chunks.
-    :param text: Text to be chunked
-    :param max_chunk_length: length of chunk
-    :return: chunked texts
-    """
-    sentences = nltk.sent_tokenize(text)
-    chunks = []
-    current_chunk = ""
-    for sentence in sentences:
-        if len(current_chunk) + len(sentence) < max_chunk_length:
-            current_chunk += f" {sentence.strip()}"
-        else:
-            chunks.append(current_chunk.strip())
-            current_chunk = f"{sentence.strip()}"
-    chunks.append(current_chunk.strip())
-    return chunks
-
-
-def summarize(
-    transcript_text: str,
-    timestamp: datetime.datetime.timestamp,
-    real_time: bool = False,
-    chunk_summarize: str = CONFIG["SUMMARIZER"]["SUMMARIZE_USING_CHUNKS"],
-):
-    """
-    Summarize the given text either as a whole or as chunks as needed
-    :param transcript_text:
-    :param timestamp:
-    :param real_time:
-    :param chunk_summarize:
-    :return:
-    """
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    summary_model = CONFIG["SUMMARIZER"]["SUMMARY_MODEL"]
-    if not summary_model:
-        summary_model = "facebook/bart-large-cnn"
-
-    # Summarize the generated transcript using the BART model
-    LOGGER.info(f"Loading BART model: {summary_model}")
-    tokenizer = BartTokenizer.from_pretrained(summary_model)
-    model = BartForConditionalGeneration.from_pretrained(summary_model)
-    model = model.to(device)
-
-    output_file = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
-    if real_time:
-        output_file = "real_time_" + output_file
-
-    if chunk_summarize != "YES":
-        max_length = int(CONFIG["SUMMARIZER"]["INPUT_ENCODING_MAX_LENGTH"])
-        inputs = tokenizer.batch_encode_plus(
-            [transcript_text],
-            truncation=True,
-            padding="longest",
-            max_length=max_length,
-            return_tensors="pt",
-        )
-        inputs = inputs.to(device)
-
-        with torch.no_grad():
-            num_beans = int(CONFIG["SUMMARIZER"]["BEAM_SIZE"])
-            max_length = int(CONFIG["SUMMARIZER"]["MAX_LENGTH"])
-            summaries = model.generate(
-                inputs["input_ids"],
-                num_beams=num_beans,
-                length_penalty=2.0,
-                max_length=max_length,
-                early_stopping=True,
-            )
-
-        decoded_summaries = [
-            tokenizer.decode(
-                summary, skip_special_tokens=True, clean_up_tokenization_spaces=False
-            )
-            for summary in summaries
-        ]
-        summary = " ".join(decoded_summaries)
-        with open("./artefacts/" + output_file, "w", encoding="utf-8") as file:
-            file.write(summary.strip() + "\n")
-    else:
-        LOGGER.info("Breaking transcript into smaller chunks")
-        chunks = chunk_text(transcript_text)
-
-        LOGGER.info(f"Transcript broken into {len(chunks)} chunks of at most 500 words")
-
-        LOGGER.info(f"Writing summary text to: {output_file}")
-        with open(output_file, "w") as f:
-            summaries = summarize_chunks(chunks, tokenizer, model)
-            for summary in summaries:
-                f.write(summary.strip() + " ")
--- a/server/reflector/utils/viz_utils.py
+++ b/server/reflector/utils/viz_utils.py
@@ -1,283 +0,0 @@
-"""
-Utility file for all visualization related functions
-"""
-
-import ast
-import collections
-import datetime
-import os
-import pickle
-from typing import NoReturn
-
-import matplotlib.pyplot as plt
-import pandas as pd
-import scattertext as st
-import spacy
-from nltk.corpus import stopwords
-from wordcloud import STOPWORDS, WordCloud
-
-en = spacy.load("en_core_web_md")
-spacy_stopwords = en.Defaults.stop_words
-
-STOPWORDS = (
-    set(STOPWORDS).union(set(stopwords.words("english"))).union(set(spacy_stopwords))
-)
-
-
-def create_wordcloud(
-    timestamp: datetime.datetime.timestamp, real_time: bool = False
-) -> NoReturn:
-    """
-    Create a basic word cloud visualization of transcribed text
-    :return: None. The wordcloud image is saved locally
-    """
-    filename = "transcript"
-    if real_time:
-        filename = (
-            "real_time_"
-            + filename
-            + "_"
-            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-            + ".txt"
-        )
-    else:
-        filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
-
-    with open("./artefacts/" + filename, "r") as f:
-        transcription_text = f.read()
-
-    # python_mask = np.array(PIL.Image.open("download1.png"))
-
-    wordcloud = WordCloud(
-        height=800,
-        width=800,
-        background_color="white",
-        stopwords=STOPWORDS,
-        min_font_size=8,
-    ).generate(transcription_text)
-
-    # Plot wordcloud and save image
-    plt.figure(facecolor=None)
-    plt.imshow(wordcloud, interpolation="bilinear")
-    plt.axis("off")
-    plt.tight_layout(pad=0)
-
-    wordcloud = "wordcloud"
-    if real_time:
-        wordcloud = (
-            "real_time_"
-            + wordcloud
-            + "_"
-            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-            + ".png"
-        )
-    else:
-        wordcloud += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
-
-    plt.savefig("./artefacts/" + wordcloud)
-
-
-def create_talk_diff_scatter_viz(
-    timestamp: datetime.datetime.timestamp, real_time: bool = False
-) -> NoReturn:
-    """
-    Perform agenda vs transcription diff to see covered topics.
-    Create a scatter plot of words in topics.
-    :return: None. Saved locally.
-    """
-    spacy_model = "en_core_web_md"
-    nlp = spacy.load(spacy_model)
-    nlp.add_pipe("sentencizer")
-
-    agenda_topics = []
-    agenda = []
-    # Load the agenda
-    with open(os.path.join(os.getcwd(), "agenda-headers.txt"), "r") as f:
-        for line in f.readlines():
-            if line.strip():
-                agenda.append(line.strip())
-                agenda_topics.append(line.split(":")[0])
-
-    # Load the transcription with timestamp
-    if real_time:
-        filename = (
-            "./artefacts/real_time_transcript_with_timestamp_"
-            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-            + ".txt"
-        )
-    else:
-        filename = (
-            "./artefacts/transcript_with_timestamp_"
-            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-            + ".txt"
-        )
-    with open(filename) as file:
-        transcription_timestamp_text = file.read()
-
-    res = ast.literal_eval(transcription_timestamp_text)
-    chunks = res["chunks"]
-
-    # create df for processing
-    df = pd.DataFrame.from_dict(res["chunks"])
-
-    covered_items = {}
-    # ts: timestamp
-    # Map each timestamped chunk with top1 and top2 matched agenda
-    ts_to_topic_mapping_top_1 = {}
-    ts_to_topic_mapping_top_2 = {}
-
-    # Also create a mapping of the different timestamps
-    # in which each topic was covered
-    topic_to_ts_mapping_top_1 = collections.defaultdict(list)
-    topic_to_ts_mapping_top_2 = collections.defaultdict(list)
-
-    similarity_threshold = 0.7
-
-    for c in chunks:
-        doc_transcription = nlp(c["text"])
-        topic_similarities = []
-        for item in range(len(agenda)):
-            item_doc = nlp(agenda[item])
-            # if not doc_transcription or not all
-            # (token.has_vector for token in doc_transcription):
-            if not doc_transcription:
-                continue
-            similarity = doc_transcription.similarity(item_doc)
-            topic_similarities.append((item, similarity))
-        topic_similarities.sort(key=lambda x: x[1], reverse=True)
-        for i in range(2):
-            if topic_similarities[i][1] >= similarity_threshold:
-                covered_items[agenda[topic_similarities[i][0]]] = True
-            # top1 match
-            if i == 0:
-                ts_to_topic_mapping_top_1[c["timestamp"]] = agenda_topics[
-                    topic_similarities[i][0]
-                ]
-                topic_to_ts_mapping_top_1[
-                    agenda_topics[topic_similarities[i][0]]
-                ].append(c["timestamp"])
-            # top2 match
-            else:
-                ts_to_topic_mapping_top_2[c["timestamp"]] = agenda_topics[
-                    topic_similarities[i][0]
-                ]
-                topic_to_ts_mapping_top_2[
-                    agenda_topics[topic_similarities[i][0]]
-                ].append(c["timestamp"])
-
-    def create_new_columns(record: dict) -> dict:
-        """
-        Accumulate the mapping information into the df
-        :param record:
-        :return:
-        """
-        record["ts_to_topic_mapping_top_1"] = ts_to_topic_mapping_top_1[
-            record["timestamp"]
-        ]
-        record["ts_to_topic_mapping_top_2"] = ts_to_topic_mapping_top_2[
-            record["timestamp"]
-        ]
-        return record
-
-    df = df.apply(create_new_columns, axis=1)
-
-    # Count the number of items covered and calculate the percentage
-    num_covered_items = sum(covered_items.values())
-    percentage_covered = num_covered_items / len(agenda) * 100
-
-    # Print the results
-    print("💬 Agenda items covered in the transcription:")
-    for item in agenda:
-        if item in covered_items and covered_items[item]:
-            print("✅ ", item)
-        else:
-            print("❌ ", item)
-    print("📊 Coverage: {:.2f}%".format(percentage_covered))
-
-    # Save df, mappings for further experimentation
-    df_name = "df"
-    if real_time:
-        df_name = (
-            "real_time_"
-            + df_name
-            + "_"
-            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-            + ".pkl"
-        )
-    else:
-        df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
-    df.to_pickle("./artefacts/" + df_name)
-
-    my_mappings = [
-        ts_to_topic_mapping_top_1,
-        ts_to_topic_mapping_top_2,
-        topic_to_ts_mapping_top_1,
-        topic_to_ts_mapping_top_2,
-    ]
-
-    mappings_name = "mappings"
-    if real_time:
-        mappings_name = (
-            "real_time_"
-            + mappings_name
-            + "_"
-            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-            + ".pkl"
-        )
-    else:
-        mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
-    pickle.dump(my_mappings, open("./artefacts/" + mappings_name, "wb"))
-
-    # to load,  my_mappings = pickle.load( open ("mappings.pkl", "rb") )
-
-    # pick the 2 most matched topic to be used for plotting
-    topic_times = collections.defaultdict(int)
-    for key in ts_to_topic_mapping_top_1.keys():
-        if key[0] is None or key[1] is None:
-            continue
-        duration = key[1] - key[0]
-        topic_times[ts_to_topic_mapping_top_1[key]] += duration
-
-    topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
-
-    if len(topic_times) > 1:
-        cat_1 = topic_times[0][0]
-        cat_1_name = topic_times[0][0]
-        cat_2_name = topic_times[1][0]
-
-        # Scatter plot of topics
-        df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
-        corpus = (
-            st.CorpusFromParsedDocuments(
-                df, category_col="ts_to_topic_mapping_top_1", parsed_col="parse"
-            )
-            .build()
-            .get_unigram_corpus()
-            .compact(st.AssociationCompactor(2000))
-        )
-        html = st.produce_scattertext_explorer(
-            corpus,
-            category=cat_1,
-            category_name=cat_1_name,
-            not_category_name=cat_2_name,
-            minimum_term_frequency=0,
-            pmi_threshold_coefficient=0,
-            width_in_pixels=1000,
-            transform=st.Scalers.dense_rank,
-        )
-        if real_time:
-            with open(
-                "./artefacts/real_time_scatter_"
-                + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-                + ".html",
-                "w",
-            ) as file:
-                file.write(html)
-        else:
-            with open(
-                "./artefacts/scatter_"
-                + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
-                + ".html",
-                "w",
-            ) as file:
-                file.write(html)
--- a/server/scripts/clear_artefacts.sh
+++ b/server/scripts/clear_artefacts.sh
@@ -1,30 +0,0 @@
-#!/bin/bash
-
-# Directory to search for Python files
-cwd=$(pwd)
-last_component="${cwd##*/}"
-
-if [ "$last_component" = "reflector" ]; then
-    directory="./artefacts"
-elif [ "$last_component" = "scripts" ]; then
-    directory="../artefacts"
-fi
-
-# Pattern to match Python files (e.g., "*.py" for all .py files)
-transcript_file_pattern="transcript_*.txt"
-summary_file_pattern="summary_*.txt"
-pickle_file_pattern="*.pkl"
-html_file_pattern="*.html"
-png_file_pattern="wordcloud*.png"
-mp3_file_pattern="*.mp3"
-mp4_file_pattern="*.mp4"
-m4a_file_pattern="*.m4a"
-
-find "$directory" -type f -name "$transcript_file_pattern" -delete
-find "$directory" -type f -name "$summary_file_pattern" -delete
-find "$directory" -type f -name "$pickle_file_pattern" -delete
-find "$directory" -type f -name "$html_file_pattern" -delete
-find "$directory" -type f -name "$png_file_pattern" -delete
-find "$directory" -type f -name "$mp3_file_pattern" -delete
-find "$directory" -type f -name "$mp4_file_pattern" -delete
-find "$directory" -type f -name "$m4a_file_pattern" -delete
--- a/server/scripts/setup_pipeline_dependencies.sh
+++ b/server/scripts/setup_pipeline_dependencies.sh
@@ -1,39 +0,0 @@
-#!/bin/sh
-
-# Upgrade pip
-pip install --upgrade pip
-
-# Default to CPU Installation of JAX
-jax_mode="jax[cpu]"
-
-# Install JAX
-if [ "$1" == "cpu" ]
-then
-  jax_mode="jax[cpu]"
-elif [ "$1" == "cuda11" ]
-then
-  jax_mode="jax[cuda11_pip]"
-elif [ "$1" == "cuda12" ]
-then
-  jax_mode="jax[cuda12_pip]"
-fi
-
-pip install --upgrade "$jax_mode"
-
-# Install Whisper-JAX base
-pip install git+https://github.com/sanchit-gandhi/whisper-jax.git
-
-# Update to latest version
-pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git
-
-cwd=$(pwd)
-last_component="${cwd##*/}"
-if [ "$last_component" = "reflector" ]; then
-    pip install -r pipeline-requirements.txt
-elif [ "$last_component" = "scripts" ]; then
-    pip install -r ../pipeline-requirements.txt
-fi
-
-# download spacy models
-spacy download en_core_web_sm
-spacy download en_core_web_md
--- a/server/scripts/setup_server_dependencies.sh
+++ b/server/scripts/setup_server_dependencies.sh
@@ -1,11 +0,0 @@
-#!/bin/sh
-
-pip install --upgrade pip
-
-cwd=$(pwd)
-last_component="${cwd##*/}"
-if [ "$last_component" = "reflector" ]; then
-    pip install -r server-requirements.txt
-elif [ "$last_component" = "scripts" ]; then
-    pip install -r ../server-requirements.txt
-fi
--- a/www/.env_template
+++ b/www/.env_template
@@ -1 +0,0 @@
-ZULIP_API_KEY=<omitted, ask in zulip>