diff --git a/server/.env_template b/server/.env_template deleted file mode 100644 index f59c1ffe..00000000 --- a/server/.env_template +++ /dev/null @@ -1,21 +0,0 @@ -TRANSCRIPT_BACKEND=modal -TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run -TRANSCRIPT_MODAL_API_KEY=***REMOVED*** - -LLM_BACKEND=modal -LLM_URL=https://monadical-sas--reflector-llm-web.modal.run -LLM_MODAL_API_KEY=***REMOVED*** - -TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run -ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run -DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run - -BASE_URL=https://xxxxx.ngrok.app -DIARIZATION_ENABLED=false - -SQS_POLLING_TIMEOUT_SECONDS=60 - -# Summary LLM configuration -SUMMARY_MODEL=monadical/private/smart -SUMMARY_LLM_URL= -SUMMARY_LLM_API_KEY= diff --git a/server/env.example b/server/env.example index ce70cfb4..10079105 100644 --- a/server/env.example +++ b/server/env.example @@ -20,7 +20,6 @@ AUTH_JWT_AUDIENCE= ## Using local whisper #TRANSCRIPT_BACKEND=whisper -#WHISPER_MODEL_SIZE=tiny ## Using serverless modal.com (require reflector-gpu-modal deployed) #TRANSCRIPT_BACKEND=modal @@ -30,7 +29,7 @@ AUTH_JWT_AUDIENCE= TRANSCRIPT_BACKEND=modal TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run -TRANSCRIPT_MODAL_API_KEY=***REMOVED*** +TRANSCRIPT_MODAL_API_KEY= ## ======================================================= ## Transcription backend @@ -50,7 +49,7 @@ TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run ## Using serverless modal.com (require reflector-gpu-modal deployed) LLM_BACKEND=modal LLM_URL=https://monadical-sas--reflector-llm-web.modal.run -LLM_MODAL_API_KEY=***REMOVED*** +LLM_MODAL_API_KEY= ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run diff --git a/server/gpu/modal_deployments/reflector_vllm_hermes3.py b/server/gpu/modal_deployments/reflector_vllm_hermes3.py deleted file mode 100644 index 5eebf5c0..00000000 --- a/server/gpu/modal_deployments/reflector_vllm_hermes3.py +++ /dev/null @@ -1,171 +0,0 @@ -# # Run an OpenAI-Compatible vLLM Server - -import modal - -MODELS_DIR = "/llamas" -MODEL_NAME = "NousResearch/Hermes-3-Llama-3.1-8B" -N_GPU = 1 - - -def download_llm(): - from huggingface_hub import snapshot_download - - print("Downloading LLM model") - snapshot_download( - MODEL_NAME, - local_dir=f"{MODELS_DIR}/{MODEL_NAME}", - ignore_patterns=[ - "*.pt", - "*.bin", - "*.pth", - "original/*", - ], # Ensure safetensors - ) - print("LLM model downloaded") - - -def move_cache(): - from transformers.utils import move_cache as transformers_move_cache - - transformers_move_cache() - - -vllm_image = ( - modal.Image.debian_slim(python_version="3.10") - .pip_install("vllm==0.5.3post1") - .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) - .pip_install( - # "accelerate==0.34.2", - "einops==0.8.0", - "hf-transfer~=0.1", - ) - .run_function(download_llm) - .run_function(move_cache) - .pip_install( - "bitsandbytes>=0.42.9", - ) -) - -app = modal.App("reflector-vllm-hermes3") - - -@app.function( - image=vllm_image, - gpu=modal.gpu.A100(count=N_GPU, size="40GB"), - timeout=60 * 5, - scaledown_window=60 * 5, - allow_concurrent_inputs=100, - secrets=[ - modal.Secret.from_name("reflector-gpu"), - ], -) -@modal.asgi_app() -def serve(): - import os - - import fastapi - import vllm.entrypoints.openai.api_server as api_server - from vllm.engine.arg_utils import AsyncEngineArgs - from vllm.engine.async_llm_engine import AsyncLLMEngine - from vllm.entrypoints.logger import RequestLogger - from vllm.entrypoints.openai.serving_chat import OpenAIServingChat - from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion - from vllm.usage.usage_lib import UsageContext - - TOKEN = os.environ["REFLECTOR_GPU_APIKEY"] - - # create a fastAPI app that uses vLLM's OpenAI-compatible router - web_app = fastapi.FastAPI( - title=f"OpenAI-compatible {MODEL_NAME} server", - description="Run an OpenAI-compatible LLM server with vLLM on modal.com", - version="0.0.1", - docs_url="/docs", - ) - - # security: CORS middleware for external requests - http_bearer = fastapi.security.HTTPBearer( - scheme_name="Bearer Token", - description="See code for authentication details.", - ) - web_app.add_middleware( - fastapi.middleware.cors.CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - ) - - # security: inject dependency on authed routes - async def is_authenticated(api_key: str = fastapi.Security(http_bearer)): - if api_key.credentials != TOKEN: - raise fastapi.HTTPException( - status_code=fastapi.status.HTTP_401_UNAUTHORIZED, - detail="Invalid authentication credentials", - ) - return {"username": "authenticated_user"} - - router = fastapi.APIRouter(dependencies=[fastapi.Depends(is_authenticated)]) - - # wrap vllm's router in auth router - router.include_router(api_server.router) - # add authed vllm to our fastAPI app - web_app.include_router(router) - - engine_args = AsyncEngineArgs( - model=MODELS_DIR + "/" + MODEL_NAME, - tensor_parallel_size=N_GPU, - gpu_memory_utilization=0.90, - # max_model_len=8096, - enforce_eager=False, # capture the graph for faster inference, but slower cold starts (30s > 20s) - # --- 4 bits load - # quantization="bitsandbytes", - # load_format="bitsandbytes", - ) - - engine = AsyncLLMEngine.from_engine_args( - engine_args, usage_context=UsageContext.OPENAI_API_SERVER - ) - - model_config = get_model_config(engine) - - request_logger = RequestLogger(max_log_len=2048) - - api_server.openai_serving_chat = OpenAIServingChat( - engine, - model_config=model_config, - served_model_names=[MODEL_NAME], - chat_template=None, - response_role="assistant", - lora_modules=[], - prompt_adapters=[], - request_logger=request_logger, - ) - api_server.openai_serving_completion = OpenAIServingCompletion( - engine, - model_config=model_config, - served_model_names=[MODEL_NAME], - lora_modules=[], - prompt_adapters=[], - request_logger=request_logger, - ) - - return web_app - - -def get_model_config(engine): - import asyncio - - try: # adapted from vLLM source -- https://github.com/vllm-project/vllm/blob/507ef787d85dec24490069ffceacbd6b161f4f72/vllm/entrypoints/openai/api_server.py#L235C1-L247C1 - event_loop = asyncio.get_running_loop() - except RuntimeError: - event_loop = None - - if event_loop is not None and event_loop.is_running(): - # If the current is instanced by Ray Serve, - # there is already a running event loop - model_config = event_loop.run_until_complete(engine.get_model_config()) - else: - # When using single vLLM without engine_use_ray - model_config = asyncio.run(engine.get_model_config()) - - return model_config diff --git a/server/migration.load b/server/migration.load deleted file mode 100644 index aa44238e..00000000 --- a/server/migration.load +++ /dev/null @@ -1,16 +0,0 @@ -LOAD DATABASE - FROM sqlite:///app/reflector.sqlite3 - INTO pgsql://reflector:reflector@postgres:5432/reflector -WITH - include drop, - create tables, - create indexes, - reset sequences, - preserve index names, - prefetch rows = 10 -SET - work_mem to '512MB', - maintenance_work_mem to '1024MB' -CAST - column transcript.duration to float using (lambda (val) (when val (format nil "~f" val))) -; diff --git a/server/reflector/llm/base.py b/server/reflector/llm/base.py index 1527ec63..e5dd220b 100644 --- a/server/reflector/llm/base.py +++ b/server/reflector/llm/base.py @@ -61,7 +61,7 @@ class LLM: Return an instance depending on the settings. Settings used: - - `LLM_BACKEND`: key of the backend, defaults to `oobabooga` + - `LLM_BACKEND`: key of the backend - `LLM_URL`: url of the backend """ if name is None: diff --git a/server/reflector/llm/llm_oobabooga.py b/server/reflector/llm/llm_oobabooga.py deleted file mode 100644 index 36d3480b..00000000 --- a/server/reflector/llm/llm_oobabooga.py +++ /dev/null @@ -1,29 +0,0 @@ -import httpx - -from reflector.llm.base import LLM -from reflector.settings import settings - - -class OobaboogaLLM(LLM): - def __init__(self, model_name: str | None = None): - super().__init__() - - async def _generate( - self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs - ): - json_payload = {"prompt": prompt} - if gen_schema: - json_payload["gen_schema"] = gen_schema - if gen_cfg: - json_payload.update(gen_cfg) - async with httpx.AsyncClient() as client: - response = await client.post( - settings.LLM_URL, - headers={"Content-Type": "application/json"}, - json=json_payload, - ) - response.raise_for_status() - return response.json() - - -LLM.register("oobabooga", OobaboogaLLM) diff --git a/server/reflector/settings.py b/server/reflector/settings.py index d300d449..6cd54be5 100644 --- a/server/reflector/settings.py +++ b/server/reflector/settings.py @@ -8,8 +8,6 @@ class Settings(BaseSettings): extra="ignore", ) - OPENMP_KMP_DUPLICATE_LIB_OK: bool = False - # CORS CORS_ORIGIN: str = "*" CORS_ALLOW_CREDENTIALS: bool = False @@ -20,26 +18,6 @@ class Settings(BaseSettings): # local data directory (audio for no) DATA_DIR: str = "./data" - # Whisper - WHISPER_MODEL_SIZE: str = "tiny" - WHISPER_REAL_TIME_MODEL_SIZE: str = "tiny" - - # Summarizer - SUMMARIZER_MODEL: str = "facebook/bart-large-cnn" - SUMMARIZER_INPUT_ENCODING_MAX_LENGTH: int = 1024 - SUMMARIZER_MAX_LENGTH: int = 2048 - SUMMARIZER_BEAM_SIZE: int = 6 - SUMMARIZER_MAX_CHUNK_LENGTH: int = 1024 - SUMMARIZER_USING_CHUNKS: bool = True - - # Audio - AUDIO_BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME: str = "aggregator" - AUDIO_AV_FOUNDATION_DEVICE_ID: int = 1 - AUDIO_CHANNELS: int = 2 - AUDIO_SAMPLING_RATE: int = 48000 - AUDIO_SAMPLING_WIDTH: int = 2 - AUDIO_BUFFER_SIZE: int = 256 * 960 - # Audio Transcription # backends: whisper, modal TRANSCRIPT_BACKEND: str = "whisper" @@ -63,8 +41,8 @@ class Settings(BaseSettings): TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None # LLM - # available backend: openai, modal, oobabooga - LLM_BACKEND: str = "oobabooga" + # available backend: openai, modal + LLM_BACKEND: str = "modal" # LLM common configuration LLM_URL: str | None = None diff --git a/server/reflector/stream_client.py b/server/reflector/stream_client.py index 99534609..ff3076d8 100644 --- a/server/reflector/stream_client.py +++ b/server/reflector/stream_client.py @@ -1,14 +1,13 @@ import asyncio import time import uuid +from os import environ import httpx import stamina from aiortc import RTCPeerConnection, RTCSessionDescription from aiortc.contrib.media import MediaPlayer, MediaRelay - from reflector.logger import logger -from reflector.settings import settings class StreamClient: @@ -43,8 +42,9 @@ class StreamClient: else: if self.relay is None: self.relay = MediaRelay() + audio_device_id = int(environ.get("AUDIO_AV_FOUNDATION_DEVICE_ID", 1)) self.player = MediaPlayer( - f":{settings.AUDIO_AV_FOUNDATION_DEVICE_ID}", + f":{audio_device_id}", format="avfoundation", options={"channels": "2"}, ) diff --git a/server/reflector/utils/file_utils.py b/server/reflector/utils/file_utils.py deleted file mode 100644 index ba9e4fec..00000000 --- a/server/reflector/utils/file_utils.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -Utility file for file handling related functions, including file downloads and -uploads to cloud storage -""" - -import sys -from typing import List, NoReturn - -import boto3 -import botocore - -from .log_utils import LOGGER -from .run_utils import SECRETS - -BUCKET_NAME = SECRETS["AWS-S3"]["BUCKET_NAME"] - -s3 = boto3.client( - "s3", - aws_access_key_id=SECRETS["AWS-S3"]["AWS_ACCESS_KEY"], - aws_secret_access_key=SECRETS["AWS-S3"]["AWS_SECRET_KEY"], -) - - -def upload_files(files_to_upload: List[str]) -> NoReturn: - """ - Upload a list of files to the configured S3 bucket - :param files_to_upload: List of files to upload - :return: None - """ - for key in files_to_upload: - LOGGER.info("Uploading file " + key) - try: - s3.upload_file(key, BUCKET_NAME, key) - except botocore.exceptions.ClientError as exception: - print(exception.response) - - -def download_files(files_to_download: List[str]) -> NoReturn: - """ - Download a list of files from the configured S3 bucket - :param files_to_download: List of files to download - :return: None - """ - for key in files_to_download: - LOGGER.info("Downloading file " + key) - try: - s3.download_file(BUCKET_NAME, key, key) - except botocore.exceptions.ClientError as exception: - if exception.response["Error"]["Code"] == "404": - print("The object does not exist.") - else: - raise - - -if __name__ == "__main__": - if sys.argv[1] == "download": - download_files([sys.argv[2]]) - elif sys.argv[1] == "upload": - upload_files([sys.argv[2]]) diff --git a/server/reflector/utils/format_output.py b/server/reflector/utils/format_output.py deleted file mode 100644 index adf2ff67..00000000 --- a/server/reflector/utils/format_output.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -Utility function to format the artefacts created during Reflector run -""" - -import json - -with open("../artefacts/meeting_titles_and_summaries.txt", "r", encoding="utf-8") as f: - outputs = f.read() - -outputs = json.loads(outputs) - -transcript_file = open("../artefacts/meeting_transcript.txt", "a", encoding="utf-8") -title_desc_file = open( - "../artefacts/meeting_title_description.txt", "a", encoding="utf-8" -) -summary_file = open("../artefacts/meeting_summary.txt", "a", encoding="utf-8") - -for item in outputs["topics"]: - transcript_file.write(item["transcript"]) - summary_file.write(item["description"]) - - title_desc_file.write("TITLE: \n") - title_desc_file.write(item["title"]) - title_desc_file.write("\n") - - title_desc_file.write("DESCRIPTION: \n") - title_desc_file.write(item["description"]) - title_desc_file.write("\n") - - title_desc_file.write("TRANSCRIPT: \n") - title_desc_file.write(item["transcript"]) - title_desc_file.write("\n") - - title_desc_file.write("---------------------------------------- \n\n") - -transcript_file.close() -title_desc_file.close() -summary_file.close() diff --git a/server/reflector/utils/run_utils.py b/server/reflector/utils/run_utils.py deleted file mode 100644 index f99c2549..00000000 --- a/server/reflector/utils/run_utils.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -Utility file for server side asynchronous task running and config objects -""" - -import asyncio -import contextlib -from functools import partial -from threading import Lock -from typing import ContextManager, Generic, TypeVar - - -def run_in_executor(func, *args, executor=None, **kwargs): - """ - Run the function in an executor, unblocking the main loop - :param func: Function to be run in executor - :param args: function parameters - :param executor: executor instance [Thread | Process] - :param kwargs: Additional parameters - :return: Future of function result upon completion - """ - callback = partial(func, *args, **kwargs) - loop = asyncio.get_event_loop() - return loop.run_in_executor(executor, callback) - - -# Genetic type template -T = TypeVar("T") - - -class Mutex(Generic[T]): - """ - Mutex class to implement lock/release of a shared - protected variable - """ - - def __init__(self, value: T): - """ - Create an instance of Mutex wrapper for the given resource - :param value: Shared resources to be thread protected - """ - self.__value = value - self.__lock = Lock() - - @contextlib.contextmanager - def lock(self) -> ContextManager[T]: - """ - Lock the resource with a mutex to be used within a context block - The lock is automatically released on context exit - :return: Shared resource - """ - self.__lock.acquire() - try: - yield self.__value - finally: - self.__lock.release() diff --git a/server/reflector/utils/text_utils.py b/server/reflector/utils/text_utils.py deleted file mode 100644 index da2260c5..00000000 --- a/server/reflector/utils/text_utils.py +++ /dev/null @@ -1,262 +0,0 @@ -""" -Utility file for all text processing related functionalities -""" - -import datetime -from typing import List - -import nltk -import torch -from log_utils import LOGGER -from nltk.corpus import stopwords -from nltk.tokenize import word_tokenize -from run_utils import CONFIG -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.metrics.pairwise import cosine_similarity -from transformers import BartForConditionalGeneration, BartTokenizer - -nltk.download("punkt", quiet=True) - - -def preprocess_sentence(sentence: str) -> str: - """ - Filter out undesirable tokens from thr sentence - :param sentence: - :return: - """ - stop_words = set(stopwords.words("english")) - tokens = word_tokenize(sentence.lower()) - tokens = [token for token in tokens if token.isalnum() and token not in stop_words] - return " ".join(tokens) - - -def compute_similarity(sent1: str, sent2: str) -> float: - """ - Compute the similarity - """ - tfidf_vectorizer = TfidfVectorizer() - if sent1 is not None and sent2 is not None: - tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2]) - return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0] - return 0.0 - - -def remove_almost_alike_sentences(sentences: List[str], threshold=0.7) -> List[str]: - """ - Filter sentences that are similar beyond a set threshold - :param sentences: - :param threshold: - :return: - """ - num_sentences = len(sentences) - removed_indices = set() - - for i in range(num_sentences): - if i not in removed_indices: - for j in range(i + 1, num_sentences): - if j not in removed_indices: - l_i = len(sentences[i]) - l_j = len(sentences[j]) - if l_i == 0 or l_j == 0: - if l_i == 0: - removed_indices.add(i) - if l_j == 0: - removed_indices.add(j) - else: - sentence1 = preprocess_sentence(sentences[i]) - sentence2 = preprocess_sentence(sentences[j]) - if len(sentence1) != 0 and len(sentence2) != 0: - similarity = compute_similarity(sentence1, sentence2) - - if similarity >= threshold: - removed_indices.add(max(i, j)) - - filtered_sentences = [ - sentences[i] for i in range(num_sentences) if i not in removed_indices - ] - return filtered_sentences - - -def remove_outright_duplicate_sentences_from_chunk(chunk: str) -> List[str]: - """ - Remove repetitive sentences - :param chunk: - :return: - """ - chunk_text = chunk["text"] - sentences = nltk.sent_tokenize(chunk_text) - nonduplicate_sentences = list(dict.fromkeys(sentences)) - return nonduplicate_sentences - - -def remove_whisper_repetitive_hallucination( - nonduplicate_sentences: List[str], -) -> List[str]: - """ - Remove sentences that are repeated as a result of Whisper - hallucinations - :param nonduplicate_sentences: - :return: - """ - chunk_sentences = [] - - for sent in nonduplicate_sentences: - temp_result = "" - seen = {} - words = nltk.word_tokenize(sent) - n_gram_filter = 3 - for i in range(len(words)): - if ( - str(words[i : i + n_gram_filter]) in seen - and seen[str(words[i : i + n_gram_filter])] - == words[i + 1 : i + n_gram_filter + 2] - ): - pass - else: - seen[str(words[i : i + n_gram_filter])] = words[ - i + 1 : i + n_gram_filter + 2 - ] - temp_result += words[i] - temp_result += " " - chunk_sentences.append(temp_result) - return chunk_sentences - - -def post_process_transcription(whisper_result: dict) -> dict: - """ - Parent function to perform post-processing on the transcription result - :param whisper_result: - :return: - """ - transcript_text = "" - for chunk in whisper_result["chunks"]: - nonduplicate_sentences = remove_outright_duplicate_sentences_from_chunk(chunk) - chunk_sentences = remove_whisper_repetitive_hallucination( - nonduplicate_sentences - ) - similarity_matched_sentences = remove_almost_alike_sentences(chunk_sentences) - chunk["text"] = " ".join(similarity_matched_sentences) - transcript_text += chunk["text"] - whisper_result["text"] = transcript_text - return whisper_result - - -def summarize_chunks(chunks: List[str], tokenizer, model) -> List[str]: - """ - Summarize each chunk using a summarizer model - :param chunks: - :param tokenizer: - :param model: - :return: - """ - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - summaries = [] - for c in chunks: - input_ids = tokenizer.encode(c, return_tensors="pt") - input_ids = input_ids.to(device) - with torch.no_grad(): - summary_ids = model.generate( - input_ids, - num_beams=int(CONFIG["SUMMARIZER"]["BEAM_SIZE"]), - length_penalty=2.0, - max_length=int(CONFIG["SUMMARIZER"]["MAX_LENGTH"]), - early_stopping=True, - ) - summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) - summaries.append(summary) - return summaries - - -def chunk_text( - text: str, max_chunk_length: int = int(CONFIG["SUMMARIZER"]["MAX_CHUNK_LENGTH"]) -) -> List[str]: - """ - Split text into smaller chunks. - :param text: Text to be chunked - :param max_chunk_length: length of chunk - :return: chunked texts - """ - sentences = nltk.sent_tokenize(text) - chunks = [] - current_chunk = "" - for sentence in sentences: - if len(current_chunk) + len(sentence) < max_chunk_length: - current_chunk += f" {sentence.strip()}" - else: - chunks.append(current_chunk.strip()) - current_chunk = f"{sentence.strip()}" - chunks.append(current_chunk.strip()) - return chunks - - -def summarize( - transcript_text: str, - timestamp: datetime.datetime.timestamp, - real_time: bool = False, - chunk_summarize: str = CONFIG["SUMMARIZER"]["SUMMARIZE_USING_CHUNKS"], -): - """ - Summarize the given text either as a whole or as chunks as needed - :param transcript_text: - :param timestamp: - :param real_time: - :param chunk_summarize: - :return: - """ - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - summary_model = CONFIG["SUMMARIZER"]["SUMMARY_MODEL"] - if not summary_model: - summary_model = "facebook/bart-large-cnn" - - # Summarize the generated transcript using the BART model - LOGGER.info(f"Loading BART model: {summary_model}") - tokenizer = BartTokenizer.from_pretrained(summary_model) - model = BartForConditionalGeneration.from_pretrained(summary_model) - model = model.to(device) - - output_file = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" - if real_time: - output_file = "real_time_" + output_file - - if chunk_summarize != "YES": - max_length = int(CONFIG["SUMMARIZER"]["INPUT_ENCODING_MAX_LENGTH"]) - inputs = tokenizer.batch_encode_plus( - [transcript_text], - truncation=True, - padding="longest", - max_length=max_length, - return_tensors="pt", - ) - inputs = inputs.to(device) - - with torch.no_grad(): - num_beans = int(CONFIG["SUMMARIZER"]["BEAM_SIZE"]) - max_length = int(CONFIG["SUMMARIZER"]["MAX_LENGTH"]) - summaries = model.generate( - inputs["input_ids"], - num_beams=num_beans, - length_penalty=2.0, - max_length=max_length, - early_stopping=True, - ) - - decoded_summaries = [ - tokenizer.decode( - summary, skip_special_tokens=True, clean_up_tokenization_spaces=False - ) - for summary in summaries - ] - summary = " ".join(decoded_summaries) - with open("./artefacts/" + output_file, "w", encoding="utf-8") as file: - file.write(summary.strip() + "\n") - else: - LOGGER.info("Breaking transcript into smaller chunks") - chunks = chunk_text(transcript_text) - - LOGGER.info(f"Transcript broken into {len(chunks)} chunks of at most 500 words") - - LOGGER.info(f"Writing summary text to: {output_file}") - with open(output_file, "w") as f: - summaries = summarize_chunks(chunks, tokenizer, model) - for summary in summaries: - f.write(summary.strip() + " ") diff --git a/server/reflector/utils/viz_utils.py b/server/reflector/utils/viz_utils.py deleted file mode 100644 index d26afdca..00000000 --- a/server/reflector/utils/viz_utils.py +++ /dev/null @@ -1,283 +0,0 @@ -""" -Utility file for all visualization related functions -""" - -import ast -import collections -import datetime -import os -import pickle -from typing import NoReturn - -import matplotlib.pyplot as plt -import pandas as pd -import scattertext as st -import spacy -from nltk.corpus import stopwords -from wordcloud import STOPWORDS, WordCloud - -en = spacy.load("en_core_web_md") -spacy_stopwords = en.Defaults.stop_words - -STOPWORDS = ( - set(STOPWORDS).union(set(stopwords.words("english"))).union(set(spacy_stopwords)) -) - - -def create_wordcloud( - timestamp: datetime.datetime.timestamp, real_time: bool = False -) -> NoReturn: - """ - Create a basic word cloud visualization of transcribed text - :return: None. The wordcloud image is saved locally - """ - filename = "transcript" - if real_time: - filename = ( - "real_time_" - + filename - + "_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".txt" - ) - else: - filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" - - with open("./artefacts/" + filename, "r") as f: - transcription_text = f.read() - - # python_mask = np.array(PIL.Image.open("download1.png")) - - wordcloud = WordCloud( - height=800, - width=800, - background_color="white", - stopwords=STOPWORDS, - min_font_size=8, - ).generate(transcription_text) - - # Plot wordcloud and save image - plt.figure(facecolor=None) - plt.imshow(wordcloud, interpolation="bilinear") - plt.axis("off") - plt.tight_layout(pad=0) - - wordcloud = "wordcloud" - if real_time: - wordcloud = ( - "real_time_" - + wordcloud - + "_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".png" - ) - else: - wordcloud += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png" - - plt.savefig("./artefacts/" + wordcloud) - - -def create_talk_diff_scatter_viz( - timestamp: datetime.datetime.timestamp, real_time: bool = False -) -> NoReturn: - """ - Perform agenda vs transcription diff to see covered topics. - Create a scatter plot of words in topics. - :return: None. Saved locally. - """ - spacy_model = "en_core_web_md" - nlp = spacy.load(spacy_model) - nlp.add_pipe("sentencizer") - - agenda_topics = [] - agenda = [] - # Load the agenda - with open(os.path.join(os.getcwd(), "agenda-headers.txt"), "r") as f: - for line in f.readlines(): - if line.strip(): - agenda.append(line.strip()) - agenda_topics.append(line.split(":")[0]) - - # Load the transcription with timestamp - if real_time: - filename = ( - "./artefacts/real_time_transcript_with_timestamp_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".txt" - ) - else: - filename = ( - "./artefacts/transcript_with_timestamp_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".txt" - ) - with open(filename) as file: - transcription_timestamp_text = file.read() - - res = ast.literal_eval(transcription_timestamp_text) - chunks = res["chunks"] - - # create df for processing - df = pd.DataFrame.from_dict(res["chunks"]) - - covered_items = {} - # ts: timestamp - # Map each timestamped chunk with top1 and top2 matched agenda - ts_to_topic_mapping_top_1 = {} - ts_to_topic_mapping_top_2 = {} - - # Also create a mapping of the different timestamps - # in which each topic was covered - topic_to_ts_mapping_top_1 = collections.defaultdict(list) - topic_to_ts_mapping_top_2 = collections.defaultdict(list) - - similarity_threshold = 0.7 - - for c in chunks: - doc_transcription = nlp(c["text"]) - topic_similarities = [] - for item in range(len(agenda)): - item_doc = nlp(agenda[item]) - # if not doc_transcription or not all - # (token.has_vector for token in doc_transcription): - if not doc_transcription: - continue - similarity = doc_transcription.similarity(item_doc) - topic_similarities.append((item, similarity)) - topic_similarities.sort(key=lambda x: x[1], reverse=True) - for i in range(2): - if topic_similarities[i][1] >= similarity_threshold: - covered_items[agenda[topic_similarities[i][0]]] = True - # top1 match - if i == 0: - ts_to_topic_mapping_top_1[c["timestamp"]] = agenda_topics[ - topic_similarities[i][0] - ] - topic_to_ts_mapping_top_1[ - agenda_topics[topic_similarities[i][0]] - ].append(c["timestamp"]) - # top2 match - else: - ts_to_topic_mapping_top_2[c["timestamp"]] = agenda_topics[ - topic_similarities[i][0] - ] - topic_to_ts_mapping_top_2[ - agenda_topics[topic_similarities[i][0]] - ].append(c["timestamp"]) - - def create_new_columns(record: dict) -> dict: - """ - Accumulate the mapping information into the df - :param record: - :return: - """ - record["ts_to_topic_mapping_top_1"] = ts_to_topic_mapping_top_1[ - record["timestamp"] - ] - record["ts_to_topic_mapping_top_2"] = ts_to_topic_mapping_top_2[ - record["timestamp"] - ] - return record - - df = df.apply(create_new_columns, axis=1) - - # Count the number of items covered and calculate the percentage - num_covered_items = sum(covered_items.values()) - percentage_covered = num_covered_items / len(agenda) * 100 - - # Print the results - print("💬 Agenda items covered in the transcription:") - for item in agenda: - if item in covered_items and covered_items[item]: - print("✅ ", item) - else: - print("❌ ", item) - print("📊 Coverage: {:.2f}%".format(percentage_covered)) - - # Save df, mappings for further experimentation - df_name = "df" - if real_time: - df_name = ( - "real_time_" - + df_name - + "_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".pkl" - ) - else: - df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl" - df.to_pickle("./artefacts/" + df_name) - - my_mappings = [ - ts_to_topic_mapping_top_1, - ts_to_topic_mapping_top_2, - topic_to_ts_mapping_top_1, - topic_to_ts_mapping_top_2, - ] - - mappings_name = "mappings" - if real_time: - mappings_name = ( - "real_time_" - + mappings_name - + "_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".pkl" - ) - else: - mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl" - pickle.dump(my_mappings, open("./artefacts/" + mappings_name, "wb")) - - # to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") ) - - # pick the 2 most matched topic to be used for plotting - topic_times = collections.defaultdict(int) - for key in ts_to_topic_mapping_top_1.keys(): - if key[0] is None or key[1] is None: - continue - duration = key[1] - key[0] - topic_times[ts_to_topic_mapping_top_1[key]] += duration - - topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True) - - if len(topic_times) > 1: - cat_1 = topic_times[0][0] - cat_1_name = topic_times[0][0] - cat_2_name = topic_times[1][0] - - # Scatter plot of topics - df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)) - corpus = ( - st.CorpusFromParsedDocuments( - df, category_col="ts_to_topic_mapping_top_1", parsed_col="parse" - ) - .build() - .get_unigram_corpus() - .compact(st.AssociationCompactor(2000)) - ) - html = st.produce_scattertext_explorer( - corpus, - category=cat_1, - category_name=cat_1_name, - not_category_name=cat_2_name, - minimum_term_frequency=0, - pmi_threshold_coefficient=0, - width_in_pixels=1000, - transform=st.Scalers.dense_rank, - ) - if real_time: - with open( - "./artefacts/real_time_scatter_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".html", - "w", - ) as file: - file.write(html) - else: - with open( - "./artefacts/scatter_" - + timestamp.strftime("%m-%d-%Y_%H:%M:%S") - + ".html", - "w", - ) as file: - file.write(html) diff --git a/server/scripts/clear_artefacts.sh b/server/scripts/clear_artefacts.sh deleted file mode 100755 index 9e98f50e..00000000 --- a/server/scripts/clear_artefacts.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# Directory to search for Python files -cwd=$(pwd) -last_component="${cwd##*/}" - -if [ "$last_component" = "reflector" ]; then - directory="./artefacts" -elif [ "$last_component" = "scripts" ]; then - directory="../artefacts" -fi - -# Pattern to match Python files (e.g., "*.py" for all .py files) -transcript_file_pattern="transcript_*.txt" -summary_file_pattern="summary_*.txt" -pickle_file_pattern="*.pkl" -html_file_pattern="*.html" -png_file_pattern="wordcloud*.png" -mp3_file_pattern="*.mp3" -mp4_file_pattern="*.mp4" -m4a_file_pattern="*.m4a" - -find "$directory" -type f -name "$transcript_file_pattern" -delete -find "$directory" -type f -name "$summary_file_pattern" -delete -find "$directory" -type f -name "$pickle_file_pattern" -delete -find "$directory" -type f -name "$html_file_pattern" -delete -find "$directory" -type f -name "$png_file_pattern" -delete -find "$directory" -type f -name "$mp3_file_pattern" -delete -find "$directory" -type f -name "$mp4_file_pattern" -delete -find "$directory" -type f -name "$m4a_file_pattern" -delete diff --git a/server/scripts/setup_pipeline_dependencies.sh b/server/scripts/setup_pipeline_dependencies.sh deleted file mode 100644 index b0279ab2..00000000 --- a/server/scripts/setup_pipeline_dependencies.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh - -# Upgrade pip -pip install --upgrade pip - -# Default to CPU Installation of JAX -jax_mode="jax[cpu]" - -# Install JAX -if [ "$1" == "cpu" ] -then - jax_mode="jax[cpu]" -elif [ "$1" == "cuda11" ] -then - jax_mode="jax[cuda11_pip]" -elif [ "$1" == "cuda12" ] -then - jax_mode="jax[cuda12_pip]" -fi - -pip install --upgrade "$jax_mode" - -# Install Whisper-JAX base -pip install git+https://github.com/sanchit-gandhi/whisper-jax.git - -# Update to latest version -pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git - -cwd=$(pwd) -last_component="${cwd##*/}" -if [ "$last_component" = "reflector" ]; then - pip install -r pipeline-requirements.txt -elif [ "$last_component" = "scripts" ]; then - pip install -r ../pipeline-requirements.txt -fi - -# download spacy models -spacy download en_core_web_sm -spacy download en_core_web_md diff --git a/server/scripts/setup_server_dependencies.sh b/server/scripts/setup_server_dependencies.sh deleted file mode 100755 index 0ca5c1e4..00000000 --- a/server/scripts/setup_server_dependencies.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -pip install --upgrade pip - -cwd=$(pwd) -last_component="${cwd##*/}" -if [ "$last_component" = "reflector" ]; then - pip install -r server-requirements.txt -elif [ "$last_component" = "scripts" ]; then - pip install -r ../server-requirements.txt -fi diff --git a/www/.env_template b/www/.env_template deleted file mode 100644 index d207c44f..00000000 --- a/www/.env_template +++ /dev/null @@ -1 +0,0 @@ -ZULIP_API_KEY=