fix: remove unused settings and utils files (#522)

* fix: remove unused settings and utils files * fix: remove migration done * fix: remove outdated scripts * fix: removing deployment of hermes, not used anymore * fix: partially remove secret, still have to understand frontend.
2026-02-04 09:56:47 +00:00 · 2025-07-31 17:45:48 -06:00
parent 4ee19ed015
commit ad56165b54
17 changed files with 8 additions and 1046 deletions
--- a/server/.env_template
+++ b/server/.env_template
@@ -1,21 +0,0 @@
 TRANSCRIPT_BACKEND=modal
 TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
 TRANSCRIPT_MODAL_API_KEY=***REMOVED***
 LLM_BACKEND=modal
 LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
 LLM_MODAL_API_KEY=***REMOVED***
 TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
 ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
 DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
 BASE_URL=https://xxxxx.ngrok.app
 DIARIZATION_ENABLED=false
 SQS_POLLING_TIMEOUT_SECONDS=60
 # Summary LLM configuration
 SUMMARY_MODEL=monadical/private/smart
 SUMMARY_LLM_URL=
 SUMMARY_LLM_API_KEY=
--- a/server/env.example
+++ b/server/env.example
@@ -20,7 +20,6 @@ AUTH_JWT_AUDIENCE=
 ## Using local whisper
 #TRANSCRIPT_BACKEND=whisper
 #WHISPER_MODEL_SIZE=tiny
 ## Using serverless modal.com (require reflector-gpu-modal deployed)
 #TRANSCRIPT_BACKEND=modal
@@ -30,7 +29,7 @@ AUTH_JWT_AUDIENCE=
 TRANSCRIPT_BACKEND=modal
 TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
-TRANSCRIPT_MODAL_API_KEY=***REMOVED***
+TRANSCRIPT_MODAL_API_KEY=
 ## =======================================================
 ## Transcription backend
@@ -50,7 +49,7 @@ TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
 ## Using serverless modal.com (require reflector-gpu-modal deployed)
 LLM_BACKEND=modal
 LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
-LLM_MODAL_API_KEY=***REMOVED***
+LLM_MODAL_API_KEY=
 ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
--- a/server/gpu/modal_deployments/reflector_vllm_hermes3.py
+++ b/server/gpu/modal_deployments/reflector_vllm_hermes3.py
@@ -1,171 +0,0 @@
 # # Run an OpenAI-Compatible vLLM Server
 import modal
 MODELS_DIR = "/llamas"
 MODEL_NAME = "NousResearch/Hermes-3-Llama-3.1-8B"
 N_GPU = 1
 def download_llm():
    from huggingface_hub import snapshot_download
    print("Downloading LLM model")
    snapshot_download(
        MODEL_NAME,
        local_dir=f"{MODELS_DIR}/{MODEL_NAME}",
        ignore_patterns=[
            "*.pt",
            "*.bin",
            "*.pth",
            "original/*",
        ],  # Ensure safetensors
    )
    print("LLM model downloaded")
 def move_cache():
    from transformers.utils import move_cache as transformers_move_cache
    transformers_move_cache()
 vllm_image = (
    modal.Image.debian_slim(python_version="3.10")
    .pip_install("vllm==0.5.3post1")
    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
    .pip_install(
        # "accelerate==0.34.2",
        "einops==0.8.0",
        "hf-transfer~=0.1",
    )
    .run_function(download_llm)
    .run_function(move_cache)
    .pip_install(
        "bitsandbytes>=0.42.9",
    )
 )
 app = modal.App("reflector-vllm-hermes3")
@app.function(
    image=vllm_image,
    gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
    timeout=60 * 5,
    scaledown_window=60 * 5,
    allow_concurrent_inputs=100,
    secrets=[
        modal.Secret.from_name("reflector-gpu"),
    ],
 )
@modal.asgi_app()
 def serve():
    import os
    import fastapi
    import vllm.entrypoints.openai.api_server as api_server
    from vllm.engine.arg_utils import AsyncEngineArgs
    from vllm.engine.async_llm_engine import AsyncLLMEngine
    from vllm.entrypoints.logger import RequestLogger
    from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
    from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
    from vllm.usage.usage_lib import UsageContext
    TOKEN = os.environ["REFLECTOR_GPU_APIKEY"]
    # create a fastAPI app that uses vLLM's OpenAI-compatible router
    web_app = fastapi.FastAPI(
        title=f"OpenAI-compatible {MODEL_NAME} server",
        description="Run an OpenAI-compatible LLM server with vLLM on modal.com",
        version="0.0.1",
        docs_url="/docs",
    )
    # security: CORS middleware for external requests
    http_bearer = fastapi.security.HTTPBearer(
        scheme_name="Bearer Token",
        description="See code for authentication details.",
    )
    web_app.add_middleware(
        fastapi.middleware.cors.CORSMiddleware,
        allow_origins=["*"],
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
    # security: inject dependency on authed routes
    async def is_authenticated(api_key: str = fastapi.Security(http_bearer)):
        if api_key.credentials != TOKEN:
            raise fastapi.HTTPException(
                status_code=fastapi.status.HTTP_401_UNAUTHORIZED,
                detail="Invalid authentication credentials",
            )
        return {"username": "authenticated_user"}
    router = fastapi.APIRouter(dependencies=[fastapi.Depends(is_authenticated)])
    # wrap vllm's router in auth router
    router.include_router(api_server.router)
    # add authed vllm to our fastAPI app
    web_app.include_router(router)
    engine_args = AsyncEngineArgs(
        model=MODELS_DIR + "/" + MODEL_NAME,
        tensor_parallel_size=N_GPU,
        gpu_memory_utilization=0.90,
        # max_model_len=8096,
        enforce_eager=False,  # capture the graph for faster inference, but slower cold starts (30s > 20s)
        # --- 4 bits load
        # quantization="bitsandbytes",
        # load_format="bitsandbytes",
    )
    engine = AsyncLLMEngine.from_engine_args(
        engine_args, usage_context=UsageContext.OPENAI_API_SERVER
    )
    model_config = get_model_config(engine)
    request_logger = RequestLogger(max_log_len=2048)
    api_server.openai_serving_chat = OpenAIServingChat(
        engine,
        model_config=model_config,
        served_model_names=[MODEL_NAME],
        chat_template=None,
        response_role="assistant",
        lora_modules=[],
        prompt_adapters=[],
        request_logger=request_logger,
    )
    api_server.openai_serving_completion = OpenAIServingCompletion(
        engine,
        model_config=model_config,
        served_model_names=[MODEL_NAME],
        lora_modules=[],
        prompt_adapters=[],
        request_logger=request_logger,
    )
    return web_app
 def get_model_config(engine):
    import asyncio
    try:  # adapted from vLLM source -- https://github.com/vllm-project/vllm/blob/507ef787d85dec24490069ffceacbd6b161f4f72/vllm/entrypoints/openai/api_server.py#L235C1-L247C1
        event_loop = asyncio.get_running_loop()
    except RuntimeError:
        event_loop = None
    if event_loop is not None and event_loop.is_running():
        # If the current is instanced by Ray Serve,
        # there is already a running event loop
        model_config = event_loop.run_until_complete(engine.get_model_config())
    else:
        # When using single vLLM without engine_use_ray
        model_config = asyncio.run(engine.get_model_config())
    return model_config
--- a/server/migration.load
+++ b/server/migration.load
@@ -1,16 +0,0 @@
 LOAD DATABASE
     FROM sqlite:///app/reflector.sqlite3
     INTO pgsql://reflector:reflector@postgres:5432/reflector
 WITH
    include drop,
    create tables,
    create indexes,
    reset sequences,
    preserve index names,
    prefetch rows = 10
 SET
    work_mem to '512MB',
    maintenance_work_mem to '1024MB'
 CAST
    column transcript.duration to float using (lambda (val) (when val (format nil "~f" val)))
 ;
--- a/server/reflector/llm/base.py
+++ b/server/reflector/llm/base.py
@@ -61,7 +61,7 @@ class LLM:
        Return an instance depending on the settings.
        Settings used:
-        - `LLM_BACKEND`: key of the backend, defaults to `oobabooga`
+        - `LLM_BACKEND`: key of the backend
        - `LLM_URL`: url of the backend
        """
        if name is None:
--- a/server/reflector/llm/llm_oobabooga.py
+++ b/server/reflector/llm/llm_oobabooga.py
@@ -1,29 +0,0 @@
 import httpx
 from reflector.llm.base import LLM
 from reflector.settings import settings
 class OobaboogaLLM(LLM):
    def __init__(self, model_name: str | None = None):
        super().__init__()
    async def _generate(
        self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
    ):
        json_payload = {"prompt": prompt}
        if gen_schema:
            json_payload["gen_schema"] = gen_schema
        if gen_cfg:
            json_payload.update(gen_cfg)
        async with httpx.AsyncClient() as client:
            response = await client.post(
                settings.LLM_URL,
                headers={"Content-Type": "application/json"},
                json=json_payload,
            )
            response.raise_for_status()
            return response.json()
 LLM.register("oobabooga", OobaboogaLLM)
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -8,8 +8,6 @@ class Settings(BaseSettings):
        extra="ignore",
    )
    OPENMP_KMP_DUPLICATE_LIB_OK: bool = False
    # CORS
    CORS_ORIGIN: str = "*"
    CORS_ALLOW_CREDENTIALS: bool = False
@@ -20,26 +18,6 @@ class Settings(BaseSettings):
    # local data directory (audio for no)
    DATA_DIR: str = "./data"
    # Whisper
    WHISPER_MODEL_SIZE: str = "tiny"
    WHISPER_REAL_TIME_MODEL_SIZE: str = "tiny"
    # Summarizer
    SUMMARIZER_MODEL: str = "facebook/bart-large-cnn"
    SUMMARIZER_INPUT_ENCODING_MAX_LENGTH: int = 1024
    SUMMARIZER_MAX_LENGTH: int = 2048
    SUMMARIZER_BEAM_SIZE: int = 6
    SUMMARIZER_MAX_CHUNK_LENGTH: int = 1024
    SUMMARIZER_USING_CHUNKS: bool = True
    # Audio
    AUDIO_BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME: str = "aggregator"
    AUDIO_AV_FOUNDATION_DEVICE_ID: int = 1
    AUDIO_CHANNELS: int = 2
    AUDIO_SAMPLING_RATE: int = 48000
    AUDIO_SAMPLING_WIDTH: int = 2
    AUDIO_BUFFER_SIZE: int = 256 * 960
    # Audio Transcription
    # backends: whisper, modal
    TRANSCRIPT_BACKEND: str = "whisper"
@@ -63,8 +41,8 @@ class Settings(BaseSettings):
    TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
    # LLM
-    # available backend: openai, modal, oobabooga
+    # available backend: openai, modal
-    LLM_BACKEND: str = "oobabooga"
+    LLM_BACKEND: str = "modal"
    # LLM common configuration
    LLM_URL: str | None = None
--- a/server/reflector/stream_client.py
+++ b/server/reflector/stream_client.py
@@ -1,14 +1,13 @@
 import asyncio
 import time
 import uuid
 from os import environ
 import httpx
 import stamina
 from aiortc import RTCPeerConnection, RTCSessionDescription
 from aiortc.contrib.media import MediaPlayer, MediaRelay
 from reflector.logger import logger
 from reflector.settings import settings
 class StreamClient:
@@ -43,8 +42,9 @@ class StreamClient:
        else:
            if self.relay is None:
                self.relay = MediaRelay()
            audio_device_id = int(environ.get("AUDIO_AV_FOUNDATION_DEVICE_ID", 1))
            self.player = MediaPlayer(
-                f":{settings.AUDIO_AV_FOUNDATION_DEVICE_ID}",
+                f":{audio_device_id}",
                format="avfoundation",
                options={"channels": "2"},
            )
--- a/server/reflector/utils/file_utils.py
+++ b/server/reflector/utils/file_utils.py
@@ -1,59 +0,0 @@
 """
 Utility file for file handling related functions, including file downloads and
 uploads to cloud storage
 """
 import sys
 from typing import List, NoReturn
 import boto3
 import botocore
 from .log_utils import LOGGER
 from .run_utils import SECRETS
 BUCKET_NAME = SECRETS["AWS-S3"]["BUCKET_NAME"]
 s3 = boto3.client(
    "s3",
    aws_access_key_id=SECRETS["AWS-S3"]["AWS_ACCESS_KEY"],
    aws_secret_access_key=SECRETS["AWS-S3"]["AWS_SECRET_KEY"],
 )
 def upload_files(files_to_upload: List[str]) -> NoReturn:
    """
    Upload a list of files to the configured S3 bucket
    :param files_to_upload: List of files to upload
    :return: None
    """
    for key in files_to_upload:
        LOGGER.info("Uploading file " + key)
        try:
            s3.upload_file(key, BUCKET_NAME, key)
        except botocore.exceptions.ClientError as exception:
            print(exception.response)
 def download_files(files_to_download: List[str]) -> NoReturn:
    """
    Download a list of files from the configured S3 bucket
    :param files_to_download: List of files to download
    :return: None
    """
    for key in files_to_download:
        LOGGER.info("Downloading file " + key)
        try:
            s3.download_file(BUCKET_NAME, key, key)
        except botocore.exceptions.ClientError as exception:
            if exception.response["Error"]["Code"] == "404":
                print("The object does not exist.")
            else:
                raise
 if __name__ == "__main__":
    if sys.argv[1] == "download":
        download_files([sys.argv[2]])
    elif sys.argv[1] == "upload":
        upload_files([sys.argv[2]])
--- a/server/reflector/utils/format_output.py
+++ b/server/reflector/utils/format_output.py
@@ -1,38 +0,0 @@
 """
 Utility function to format the artefacts created during Reflector run
 """
 import json
 with open("../artefacts/meeting_titles_and_summaries.txt", "r", encoding="utf-8") as f:
    outputs = f.read()
 outputs = json.loads(outputs)
 transcript_file = open("../artefacts/meeting_transcript.txt", "a", encoding="utf-8")
 title_desc_file = open(
    "../artefacts/meeting_title_description.txt", "a", encoding="utf-8"
 )
 summary_file = open("../artefacts/meeting_summary.txt", "a", encoding="utf-8")
 for item in outputs["topics"]:
    transcript_file.write(item["transcript"])
    summary_file.write(item["description"])
    title_desc_file.write("TITLE: \n")
    title_desc_file.write(item["title"])
    title_desc_file.write("\n")
    title_desc_file.write("DESCRIPTION: \n")
    title_desc_file.write(item["description"])
    title_desc_file.write("\n")
    title_desc_file.write("TRANSCRIPT: \n")
    title_desc_file.write(item["transcript"])
    title_desc_file.write("\n")
    title_desc_file.write("---------------------------------------- \n\n")
 transcript_file.close()
 title_desc_file.close()
 summary_file.close()
--- a/server/reflector/utils/run_utils.py
+++ b/server/reflector/utils/run_utils.py
@@ -1,55 +0,0 @@
 """
 Utility file for server side asynchronous task running and config objects
 """
 import asyncio
 import contextlib
 from functools import partial
 from threading import Lock
 from typing import ContextManager, Generic, TypeVar
 def run_in_executor(func, *args, executor=None, **kwargs):
    """
    Run the function in an executor, unblocking the main loop
    :param func: Function to be run in executor
    :param args: function parameters
    :param executor: executor instance [Thread | Process]
    :param kwargs: Additional parameters
    :return: Future of function result upon completion
    """
    callback = partial(func, *args, **kwargs)
    loop = asyncio.get_event_loop()
    return loop.run_in_executor(executor, callback)
 # Genetic type template
 T = TypeVar("T")
 class Mutex(Generic[T]):
    """
    Mutex class to implement lock/release of a shared
    protected variable
    """
    def __init__(self, value: T):
        """
        Create an instance of Mutex wrapper for the given resource
        :param value: Shared resources to be thread protected
        """
        self.__value = value
        self.__lock = Lock()
    @contextlib.contextmanager
    def lock(self) -> ContextManager[T]:
        """
        Lock the resource with a mutex to be used within a context block
        The lock is automatically released on context exit
        :return: Shared resource
        """
        self.__lock.acquire()
        try:
            yield self.__value
        finally:
            self.__lock.release()
--- a/server/reflector/utils/text_utils.py
+++ b/server/reflector/utils/text_utils.py
@@ -1,262 +0,0 @@
 """
 Utility file for all text processing related functionalities
 """
 import datetime
 from typing import List
 import nltk
 import torch
 from log_utils import LOGGER
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from run_utils import CONFIG
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from transformers import BartForConditionalGeneration, BartTokenizer
 nltk.download("punkt", quiet=True)
 def preprocess_sentence(sentence: str) -> str:
    """
    Filter out undesirable tokens from thr sentence
    :param sentence:
    :return:
    """
    stop_words = set(stopwords.words("english"))
    tokens = word_tokenize(sentence.lower())
    tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
    return " ".join(tokens)
 def compute_similarity(sent1: str, sent2: str) -> float:
    """
    Compute the similarity
    """
    tfidf_vectorizer = TfidfVectorizer()
    if sent1 is not None and sent2 is not None:
        tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2])
        return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
    return 0.0
 def remove_almost_alike_sentences(sentences: List[str], threshold=0.7) -> List[str]:
    """
    Filter sentences that are similar beyond a set threshold
    :param sentences:
    :param threshold:
    :return:
    """
    num_sentences = len(sentences)
    removed_indices = set()
    for i in range(num_sentences):
        if i not in removed_indices:
            for j in range(i + 1, num_sentences):
                if j not in removed_indices:
                    l_i = len(sentences[i])
                    l_j = len(sentences[j])
                    if l_i == 0 or l_j == 0:
                        if l_i == 0:
                            removed_indices.add(i)
                        if l_j == 0:
                            removed_indices.add(j)
                    else:
                        sentence1 = preprocess_sentence(sentences[i])
                        sentence2 = preprocess_sentence(sentences[j])
                        if len(sentence1) != 0 and len(sentence2) != 0:
                            similarity = compute_similarity(sentence1, sentence2)
                            if similarity >= threshold:
                                removed_indices.add(max(i, j))
    filtered_sentences = [
        sentences[i] for i in range(num_sentences) if i not in removed_indices
    ]
    return filtered_sentences
 def remove_outright_duplicate_sentences_from_chunk(chunk: str) -> List[str]:
    """
    Remove repetitive sentences
    :param chunk:
    :return:
    """
    chunk_text = chunk["text"]
    sentences = nltk.sent_tokenize(chunk_text)
    nonduplicate_sentences = list(dict.fromkeys(sentences))
    return nonduplicate_sentences
 def remove_whisper_repetitive_hallucination(
    nonduplicate_sentences: List[str],
 ) -> List[str]:
    """
    Remove sentences that are repeated as a result of Whisper
    hallucinations
    :param nonduplicate_sentences:
    :return:
    """
    chunk_sentences = []
    for sent in nonduplicate_sentences:
        temp_result = ""
        seen = {}
        words = nltk.word_tokenize(sent)
        n_gram_filter = 3
        for i in range(len(words)):
            if (
                str(words[i : i + n_gram_filter]) in seen
                and seen[str(words[i : i + n_gram_filter])]
                == words[i + 1 : i + n_gram_filter + 2]
            ):
                pass
            else:
                seen[str(words[i : i + n_gram_filter])] = words[
                    i + 1 : i + n_gram_filter + 2
                ]
                temp_result += words[i]
                temp_result += " "
        chunk_sentences.append(temp_result)
    return chunk_sentences
 def post_process_transcription(whisper_result: dict) -> dict:
    """
    Parent function to perform post-processing on the transcription result
    :param whisper_result:
    :return:
    """
    transcript_text = ""
    for chunk in whisper_result["chunks"]:
        nonduplicate_sentences = remove_outright_duplicate_sentences_from_chunk(chunk)
        chunk_sentences = remove_whisper_repetitive_hallucination(
            nonduplicate_sentences
        )
        similarity_matched_sentences = remove_almost_alike_sentences(chunk_sentences)
        chunk["text"] = " ".join(similarity_matched_sentences)
        transcript_text += chunk["text"]
    whisper_result["text"] = transcript_text
    return whisper_result
 def summarize_chunks(chunks: List[str], tokenizer, model) -> List[str]:
    """
    Summarize each chunk using a summarizer model
    :param chunks:
    :param tokenizer:
    :param model:
    :return:
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    summaries = []
    for c in chunks:
        input_ids = tokenizer.encode(c, return_tensors="pt")
        input_ids = input_ids.to(device)
        with torch.no_grad():
            summary_ids = model.generate(
                input_ids,
                num_beams=int(CONFIG["SUMMARIZER"]["BEAM_SIZE"]),
                length_penalty=2.0,
                max_length=int(CONFIG["SUMMARIZER"]["MAX_LENGTH"]),
                early_stopping=True,
            )
            summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
            summaries.append(summary)
    return summaries
 def chunk_text(
    text: str, max_chunk_length: int = int(CONFIG["SUMMARIZER"]["MAX_CHUNK_LENGTH"])
 ) -> List[str]:
    """
    Split text into smaller chunks.
    :param text: Text to be chunked
    :param max_chunk_length: length of chunk
    :return: chunked texts
    """
    sentences = nltk.sent_tokenize(text)
    chunks = []
    current_chunk = ""
    for sentence in sentences:
        if len(current_chunk) + len(sentence) < max_chunk_length:
            current_chunk += f" {sentence.strip()}"
        else:
            chunks.append(current_chunk.strip())
            current_chunk = f"{sentence.strip()}"
    chunks.append(current_chunk.strip())
    return chunks
 def summarize(
    transcript_text: str,
    timestamp: datetime.datetime.timestamp,
    real_time: bool = False,
    chunk_summarize: str = CONFIG["SUMMARIZER"]["SUMMARIZE_USING_CHUNKS"],
 ):
    """
    Summarize the given text either as a whole or as chunks as needed
    :param transcript_text:
    :param timestamp:
    :param real_time:
    :param chunk_summarize:
    :return:
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    summary_model = CONFIG["SUMMARIZER"]["SUMMARY_MODEL"]
    if not summary_model:
        summary_model = "facebook/bart-large-cnn"
    # Summarize the generated transcript using the BART model
    LOGGER.info(f"Loading BART model: {summary_model}")
    tokenizer = BartTokenizer.from_pretrained(summary_model)
    model = BartForConditionalGeneration.from_pretrained(summary_model)
    model = model.to(device)
    output_file = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    if real_time:
        output_file = "real_time_" + output_file
    if chunk_summarize != "YES":
        max_length = int(CONFIG["SUMMARIZER"]["INPUT_ENCODING_MAX_LENGTH"])
        inputs = tokenizer.batch_encode_plus(
            [transcript_text],
            truncation=True,
            padding="longest",
            max_length=max_length,
            return_tensors="pt",
        )
        inputs = inputs.to(device)
        with torch.no_grad():
            num_beans = int(CONFIG["SUMMARIZER"]["BEAM_SIZE"])
            max_length = int(CONFIG["SUMMARIZER"]["MAX_LENGTH"])
            summaries = model.generate(
                inputs["input_ids"],
                num_beams=num_beans,
                length_penalty=2.0,
                max_length=max_length,
                early_stopping=True,
            )
        decoded_summaries = [
            tokenizer.decode(
                summary, skip_special_tokens=True, clean_up_tokenization_spaces=False
            )
            for summary in summaries
        ]
        summary = " ".join(decoded_summaries)
        with open("./artefacts/" + output_file, "w", encoding="utf-8") as file:
            file.write(summary.strip() + "\n")
    else:
        LOGGER.info("Breaking transcript into smaller chunks")
        chunks = chunk_text(transcript_text)
        LOGGER.info(f"Transcript broken into {len(chunks)} chunks of at most 500 words")
        LOGGER.info(f"Writing summary text to: {output_file}")
        with open(output_file, "w") as f:
            summaries = summarize_chunks(chunks, tokenizer, model)
            for summary in summaries:
                f.write(summary.strip() + " ")
--- a/server/reflector/utils/viz_utils.py
+++ b/server/reflector/utils/viz_utils.py
@@ -1,283 +0,0 @@
 """
 Utility file for all visualization related functions
 """
 import ast
 import collections
 import datetime
 import os
 import pickle
 from typing import NoReturn
 import matplotlib.pyplot as plt
 import pandas as pd
 import scattertext as st
 import spacy
 from nltk.corpus import stopwords
 from wordcloud import STOPWORDS, WordCloud
 en = spacy.load("en_core_web_md")
 spacy_stopwords = en.Defaults.stop_words
 STOPWORDS = (
    set(STOPWORDS).union(set(stopwords.words("english"))).union(set(spacy_stopwords))
 )
 def create_wordcloud(
    timestamp: datetime.datetime.timestamp, real_time: bool = False
 ) -> NoReturn:
    """
    Create a basic word cloud visualization of transcribed text
    :return: None. The wordcloud image is saved locally
    """
    filename = "transcript"
    if real_time:
        filename = (
            "real_time_"
            + filename
            + "_"
            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
            + ".txt"
        )
    else:
        filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    with open("./artefacts/" + filename, "r") as f:
        transcription_text = f.read()
    # python_mask = np.array(PIL.Image.open("download1.png"))
    wordcloud = WordCloud(
        height=800,
        width=800,
        background_color="white",
        stopwords=STOPWORDS,
        min_font_size=8,
    ).generate(transcription_text)
    # Plot wordcloud and save image
    plt.figure(facecolor=None)
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.tight_layout(pad=0)
    wordcloud = "wordcloud"
    if real_time:
        wordcloud = (
            "real_time_"
            + wordcloud
            + "_"
            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
            + ".png"
        )
    else:
        wordcloud += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
    plt.savefig("./artefacts/" + wordcloud)
 def create_talk_diff_scatter_viz(
    timestamp: datetime.datetime.timestamp, real_time: bool = False
 ) -> NoReturn:
    """
    Perform agenda vs transcription diff to see covered topics.
    Create a scatter plot of words in topics.
    :return: None. Saved locally.
    """
    spacy_model = "en_core_web_md"
    nlp = spacy.load(spacy_model)
    nlp.add_pipe("sentencizer")
    agenda_topics = []
    agenda = []
    # Load the agenda
    with open(os.path.join(os.getcwd(), "agenda-headers.txt"), "r") as f:
        for line in f.readlines():
            if line.strip():
                agenda.append(line.strip())
                agenda_topics.append(line.split(":")[0])
    # Load the transcription with timestamp
    if real_time:
        filename = (
            "./artefacts/real_time_transcript_with_timestamp_"
            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
            + ".txt"
        )
    else:
        filename = (
            "./artefacts/transcript_with_timestamp_"
            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
            + ".txt"
        )
    with open(filename) as file:
        transcription_timestamp_text = file.read()
    res = ast.literal_eval(transcription_timestamp_text)
    chunks = res["chunks"]
    # create df for processing
    df = pd.DataFrame.from_dict(res["chunks"])
    covered_items = {}
    # ts: timestamp
    # Map each timestamped chunk with top1 and top2 matched agenda
    ts_to_topic_mapping_top_1 = {}
    ts_to_topic_mapping_top_2 = {}
    # Also create a mapping of the different timestamps
    # in which each topic was covered
    topic_to_ts_mapping_top_1 = collections.defaultdict(list)
    topic_to_ts_mapping_top_2 = collections.defaultdict(list)
    similarity_threshold = 0.7
    for c in chunks:
        doc_transcription = nlp(c["text"])
        topic_similarities = []
        for item in range(len(agenda)):
            item_doc = nlp(agenda[item])
            # if not doc_transcription or not all
            # (token.has_vector for token in doc_transcription):
            if not doc_transcription:
                continue
            similarity = doc_transcription.similarity(item_doc)
            topic_similarities.append((item, similarity))
        topic_similarities.sort(key=lambda x: x[1], reverse=True)
        for i in range(2):
            if topic_similarities[i][1] >= similarity_threshold:
                covered_items[agenda[topic_similarities[i][0]]] = True
            # top1 match
            if i == 0:
                ts_to_topic_mapping_top_1[c["timestamp"]] = agenda_topics[
                    topic_similarities[i][0]
                ]
                topic_to_ts_mapping_top_1[
                    agenda_topics[topic_similarities[i][0]]
                ].append(c["timestamp"])
            # top2 match
            else:
                ts_to_topic_mapping_top_2[c["timestamp"]] = agenda_topics[
                    topic_similarities[i][0]
                ]
                topic_to_ts_mapping_top_2[
                    agenda_topics[topic_similarities[i][0]]
                ].append(c["timestamp"])
    def create_new_columns(record: dict) -> dict:
        """
        Accumulate the mapping information into the df
        :param record:
        :return:
        """
        record["ts_to_topic_mapping_top_1"] = ts_to_topic_mapping_top_1[
            record["timestamp"]
        ]
        record["ts_to_topic_mapping_top_2"] = ts_to_topic_mapping_top_2[
            record["timestamp"]
        ]
        return record
    df = df.apply(create_new_columns, axis=1)
    # Count the number of items covered and calculate the percentage
    num_covered_items = sum(covered_items.values())
    percentage_covered = num_covered_items / len(agenda) * 100
    # Print the results
    print("💬 Agenda items covered in the transcription:")
    for item in agenda:
        if item in covered_items and covered_items[item]:
            print("✅ ", item)
        else:
            print("❌ ", item)
    print("📊 Coverage: {:.2f}%".format(percentage_covered))
    # Save df, mappings for further experimentation
    df_name = "df"
    if real_time:
        df_name = (
            "real_time_"
            + df_name
            + "_"
            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
            + ".pkl"
        )
    else:
        df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
    df.to_pickle("./artefacts/" + df_name)
    my_mappings = [
        ts_to_topic_mapping_top_1,
        ts_to_topic_mapping_top_2,
        topic_to_ts_mapping_top_1,
        topic_to_ts_mapping_top_2,
    ]
    mappings_name = "mappings"
    if real_time:
        mappings_name = (
            "real_time_"
            + mappings_name
            + "_"
            + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
            + ".pkl"
        )
    else:
        mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
    pickle.dump(my_mappings, open("./artefacts/" + mappings_name, "wb"))
    # to load,  my_mappings = pickle.load( open ("mappings.pkl", "rb") )
    # pick the 2 most matched topic to be used for plotting
    topic_times = collections.defaultdict(int)
    for key in ts_to_topic_mapping_top_1.keys():
        if key[0] is None or key[1] is None:
            continue
        duration = key[1] - key[0]
        topic_times[ts_to_topic_mapping_top_1[key]] += duration
    topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
    if len(topic_times) > 1:
        cat_1 = topic_times[0][0]
        cat_1_name = topic_times[0][0]
        cat_2_name = topic_times[1][0]
        # Scatter plot of topics
        df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
        corpus = (
            st.CorpusFromParsedDocuments(
                df, category_col="ts_to_topic_mapping_top_1", parsed_col="parse"
            )
            .build()
            .get_unigram_corpus()
            .compact(st.AssociationCompactor(2000))
        )
        html = st.produce_scattertext_explorer(
            corpus,
            category=cat_1,
            category_name=cat_1_name,
            not_category_name=cat_2_name,
            minimum_term_frequency=0,
            pmi_threshold_coefficient=0,
            width_in_pixels=1000,
            transform=st.Scalers.dense_rank,
        )
        if real_time:
            with open(
                "./artefacts/real_time_scatter_"
                + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
                + ".html",
                "w",
            ) as file:
                file.write(html)
        else:
            with open(
                "./artefacts/scatter_"
                + timestamp.strftime("%m-%d-%Y_%H:%M:%S")
                + ".html",
                "w",
            ) as file:
                file.write(html)
--- a/server/scripts/clear_artefacts.sh
+++ b/server/scripts/clear_artefacts.sh
@@ -1,30 +0,0 @@
 #!/bin/bash
 # Directory to search for Python files
 cwd=$(pwd)
 last_component="${cwd##*/}"
 if [ "$last_component" = "reflector" ]; then
    directory="./artefacts"
 elif [ "$last_component" = "scripts" ]; then
    directory="../artefacts"
 fi
 # Pattern to match Python files (e.g., "*.py" for all .py files)
 transcript_file_pattern="transcript_*.txt"
 summary_file_pattern="summary_*.txt"
 pickle_file_pattern="*.pkl"
 html_file_pattern="*.html"
 png_file_pattern="wordcloud*.png"
 mp3_file_pattern="*.mp3"
 mp4_file_pattern="*.mp4"
 m4a_file_pattern="*.m4a"
 find "$directory" -type f -name "$transcript_file_pattern" -delete
 find "$directory" -type f -name "$summary_file_pattern" -delete
 find "$directory" -type f -name "$pickle_file_pattern" -delete
 find "$directory" -type f -name "$html_file_pattern" -delete
 find "$directory" -type f -name "$png_file_pattern" -delete
 find "$directory" -type f -name "$mp3_file_pattern" -delete
 find "$directory" -type f -name "$mp4_file_pattern" -delete
 find "$directory" -type f -name "$m4a_file_pattern" -delete
--- a/server/scripts/setup_pipeline_dependencies.sh
+++ b/server/scripts/setup_pipeline_dependencies.sh
@@ -1,39 +0,0 @@
 #!/bin/sh
 # Upgrade pip
 pip install --upgrade pip
 # Default to CPU Installation of JAX
 jax_mode="jax[cpu]"
 # Install JAX
 if [ "$1" == "cpu" ]
 then
  jax_mode="jax[cpu]"
 elif [ "$1" == "cuda11" ]
 then
  jax_mode="jax[cuda11_pip]"
 elif [ "$1" == "cuda12" ]
 then
  jax_mode="jax[cuda12_pip]"
 fi
 pip install --upgrade "$jax_mode"
 # Install Whisper-JAX base
 pip install git+https://github.com/sanchit-gandhi/whisper-jax.git
 # Update to latest version
 pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git
 cwd=$(pwd)
 last_component="${cwd##*/}"
 if [ "$last_component" = "reflector" ]; then
    pip install -r pipeline-requirements.txt
 elif [ "$last_component" = "scripts" ]; then
    pip install -r ../pipeline-requirements.txt
 fi
 # download spacy models
 spacy download en_core_web_sm
 spacy download en_core_web_md
--- a/server/scripts/setup_server_dependencies.sh
+++ b/server/scripts/setup_server_dependencies.sh
@@ -1,11 +0,0 @@
 #!/bin/sh
 pip install --upgrade pip
 cwd=$(pwd)
 last_component="${cwd##*/}"
 if [ "$last_component" = "reflector" ]; then
    pip install -r server-requirements.txt
 elif [ "$last_component" = "scripts" ]; then
    pip install -r ../server-requirements.txt
 fi
--- a/www/.env_template
+++ b/www/.env_template
@@ -1 +0,0 @@
 ZULIP_API_KEY=<omitted, ask in zulip>