Merge pull request #40 from Monadical-SAS/feat/gokul

Code refactor and cleanup from Feat/gokul
2026-02-04 18:06:48 +00:00 · 2023-07-25 13:56:14 +05:30
parent d2454b6d2d b4303d6cd4
commit d2d27a4a23
33 changed files with 700 additions and 242 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -165,7 +165,7 @@ cython_debug/
 transcript_*.txt
 test_*.txt
 wordcloud*.png
-*.ini
+utils/config.ini
 test_samples/
 *.wav
 *.mp3
--- a/trials/api.py
+++ b/trials/api.py
--- a/client.py
+++ b/client.py
@@ -5,15 +5,15 @@ import signal
 from aiortc.contrib.signaling import (add_signaling_arguments,
                                      create_signaling)
 from stream_client import StreamClient
 from utils.log_utils import logger
 from stream_client import StreamClient
 async def main():
    parser = argparse.ArgumentParser(description="Data channels ping/pong")
    parser.add_argument(
-            "--url", type=str, nargs="?", default="http://127.0.0.1:1250/offer"
+            "--url", type=str, nargs="?", default="http://0.0.0.0:1250/offer"
    )
    parser.add_argument(
--- a/pipeline-requirements.txt
+++ b/pipeline-requirements.txt
@@ -2,8 +2,6 @@ pyaudio==0.2.13
 keyboard==0.13.5
 pynput==1.7.6
 wave==0.0.2
 aiohttp==3.8.4
 aiosignal==1.3.1
 async-timeout==4.0.2
 attrs==23.1.0
 certifi==2023.5.7
@@ -51,11 +49,8 @@ matplotlib==3.7.2
 matplotlib-inline==0.1.6
 termcolor==2.3.0
 ffmpeg==1.4
 aiortc==1.5.0
 cached_property==1.5.2
 stamina==23.1.0
 httpx==0.24.1
 sortedcontainers==2.4.0
 https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
 gpt4all==1.0.5
 aiohttp_cors==0.7.0
--- a/scripts/setup_pipeline_dependencies.sh
+++ b/scripts/setup_pipeline_dependencies.sh
@@ -26,7 +26,7 @@ pip install git+https://github.com/sanchit-gandhi/whisper-jax.git
 # Update to latest version
 pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git
-pip install -r ../requirements.txt
+pip install -r ../server-requirements.txt
 # download spacy models
 spacy download en_core_web_sm
--- a/scripts/setup_server_dependencies.sh
+++ b/scripts/setup_server_dependencies.sh
@@ -0,0 +1,4 @@
 #!/bin/sh
 pip install --upgrade pip
 pip install -r ../server-requirements.txt
--- a/server-requirements.txt
+++ b/server-requirements.txt
@@ -0,0 +1,50 @@
 aiohttp==3.8.5
 aiohttp-cors==0.7.0
 aioice==0.9.0
 aiortc==1.5.0
 aiosignal==1.3.1
 anyio==3.7.1
 async-timeout==4.0.2
 attrs==23.1.0
 av==10.0.0
 certifi==2023.7.22
 cffi==1.15.1
 charset-normalizer==3.2.0
 coloredlogs==15.0.1
 cryptography==41.0.2
 ctranslate2==3.17.1
 dnspython==2.4.0
 faster-whisper==0.7.1
 filelock==3.12.2
 flatbuffers==23.5.26
 frozenlist==1.4.0
 fsspec==2023.6.0
 google-crc32c==1.5.0
 h11==0.14.0
 httpcore==0.17.3
 huggingface-hub==0.16.4
 humanfriendly==10.0
 idna==3.4
 ifaddr==0.2.0
 loguru==0.7.0
 mpmath==1.3.0
 multidict==6.0.4
 numpy==1.25.1
 onnxruntime==1.15.1
 packaging==23.1
 protobuf==4.23.4
 pycparser==2.21
 pyee==11.0.0
 pylibsrtp==0.8.0
 pyOpenSSL==23.2.0
 PyYAML==6.0.1
 requests==2.31.0
 sniffio==1.3.0
 sortedcontainers==2.4.0
 sympy==1.12
 tokenizers==0.13.3
 tqdm==4.65.0
 typing_extensions==4.7.1
 urllib3==2.0.4
 yarl==1.9.2
 wave==0.0.2
--- a/server_executor_cleaned.py
+++ b/server_executor_cleaned.py
@@ -1,29 +1,30 @@
 import argparse
 import asyncio
 import datetime
 import io
 import json
 import os
 import uuid
 import wave
 from concurrent.futures import ThreadPoolExecutor
 import aiohttp_cors
 import jax.numpy as jnp
 import requests
 from aiohttp import web
 from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
 from aiortc.contrib.media import MediaRelay
 from av import AudioFifo
 from faster_whisper import WhisperModel
 from loguru import logger
 from whisper_jax import FlaxWhisperPipline
 from utils.run_utils import run_in_executor
 from sortedcontainers import SortedDict
 from utils.run_utils import run_in_executor, config
 pcs = set()
 relay = MediaRelay()
 data_channel = None
-pipeline = FlaxWhisperPipline("openai/whisper-tiny",
+model = WhisperModel("tiny", device="cpu",
-                              dtype=jnp.float16,
+                     compute_type="float32",
-                              batch_size=16)
+                     num_workers=12)
 CHANNELS = 2
 RATE = 48000
@@ -31,8 +32,8 @@ audio_buffer = AudioFifo()
 executor = ThreadPoolExecutor()
 transcription_text = ""
 last_transcribed_time = 0.0
-LLM_MACHINE_IP = "216.153.52.83"
+LLM_MACHINE_IP = config["DEFAULT"]["LLM_MACHINE_IP"]
-LLM_MACHINE_PORT = "5000"
+LLM_MACHINE_PORT = config["DEFAULT"]["LLM_MACHINE_PORT"]
 LLM_URL = f"http://{LLM_MACHINE_IP}:{LLM_MACHINE_PORT}/api/v1/generate"
 incremental_responses = []
 sorted_transcripts = SortedDict()
@@ -43,7 +44,7 @@ blacklisted_messages = [" Thank you.", " See you next time!",
 def get_title_and_summary(llm_input_text, last_timestamp):
-    print("Generating title and summary")
+    logger.info("Generating title and summary")
    # output = llm.generate(prompt)
    # Use monadical-ml to fire this query to an LLM and get result
@@ -67,7 +68,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
            "prompt": prompt
    }
-    # To-do: Handle unexpected output formats from the model
+    # TODO : Handle unexpected output formats from the model
    try:
        response = requests.post(LLM_URL, headers=headers, json=data)
        output = json.loads(response.json()["results"][0]["text"])
@@ -80,14 +81,15 @@ def get_title_and_summary(llm_input_text, last_timestamp):
                "cmd": "UPDATE_TOPICS",
                "topics": incremental_responses,
        }
    except Exception as e:
-        print("Exception" + str(e))
+        logger.info("Exception" + str(e))
        result = None
    return result
 def channel_log(channel, t, message):
-    print("channel(%s) %s %s" % (channel.label, t, message))
+    logger.info("channel(%s) %s %s" % (channel.label, t, message))
 def channel_send(channel, message):
@@ -113,18 +115,25 @@ def channel_send_transcript(channel):
            # Due to exceptions if one of the earlier batches can't return
            # a transcript, we don't want to be stuck waiting for the result
            # With the threshold size of 3, we pop the first(lost) element
-            elif len(sorted_transcripts) >= 3:
+            else:
                if len(sorted_transcripts) >= 3:
                    del sorted_transcripts[least_time]
        except Exception as e:
-            print("Exception", str(e))
+            logger.info("Exception", str(e))
            pass
 def get_transcription(frames):
-    print("Transcribing..")
+    logger.info("Transcribing..")
    sorted_transcripts[frames[0].time] = None
-    out_file = io.BytesIO()
+
-    wf = wave.open(out_file, "wb")
+    # TODO:
    # Passing IO objects instead of temporary files throws an error
    # Passing ndarrays (typecasted with float) does not give any
    # transcription. Refer issue,
    # https://github.com/guillaumekln/faster-whisper/issues/369
    audiofilename = "test" + str(datetime.datetime.now())
    wf = wave.open(audiofilename, "wb")
    wf.setnchannels(CHANNELS)
    wf.setframerate(RATE)
    wf.setsampwidth(2)
@@ -133,22 +142,40 @@ def get_transcription(frames):
        wf.writeframes(b"".join(frame.to_ndarray()))
    wf.close()
-    # To-Do: Look into WhisperTimeStampLogitsProcessor exception
+    result_text = ""
    try:
        whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
    except Exception as e:
        return
-    global transcription_text, last_transcribed_time
+    try:
-    transcription_text += whisper_result["text"]
+        segments, _ = \
-    duration = whisper_result["chunks"][0]["timestamp"][1]
+            model.transcribe(audiofilename,
-    if not duration:
+                             language="en",
-        duration = 5.0
+                             beam_size=5,
                             vad_filter=True,
                             vad_parameters=dict(min_silence_duration_ms=500))
        os.remove(audiofilename)
        segments = list(segments)
        result_text = ""
        duration = 0.0
        for segment in segments:
            result_text += segment.text
            start_time = segment.start
            end_time = segment.end
            if not segment.start:
                start_time = 0.0
            if not segment.end:
                end_time = 5.5
            duration += (end_time - start_time)
        global last_transcribed_time, transcription_text
        last_transcribed_time += duration
        transcription_text += result_text
    except Exception as e:
        logger.info("Exception" + str(e))
        pass
    result = {
            "cmd": "SHOW_TRANSCRIPTION",
-            "text": whisper_result["text"]
+            "text": result_text
    }
    sorted_transcripts[frames[0].time] = result
    return result
@@ -167,6 +194,9 @@ def get_final_summary_response():
                    seconds=round(last_transcribed_time))),
            "summary": final_summary
    }
    with open("./artefacts/meeting_titles_and_summaries.txt", "a") as f:
        f.write(json.dumps(incremental_responses))
    return response
@@ -196,7 +226,7 @@ class AudioStreamTrack(MediaStreamTrack):
                    else None
            )
-        if len(transcription_text) > 500:
+        if len(transcription_text) > 750:
            llm_input_text = transcription_text
            transcription_text = ""
            llm_result = run_in_executor(get_title_and_summary,
@@ -245,7 +275,6 @@ async def offer(request):
            if isinstance(message, str) and message.startswith("ping"):
                channel_send(channel, "pong" + message[4:])
    @pc.on("connectionstatechange")
    async def on_connectionstatechange():
        log_info("Connection state is " + pc.connectionState)
@@ -278,6 +307,16 @@ async def on_shutdown(app):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
            description="WebRTC based server for Reflector"
    )
    parser.add_argument(
            "--host", default="0.0.0.0", help="Server host IP (def: 0.0.0.0)"
    )
    parser.add_argument(
            "--port", type=int, default=1250, help="Server port (def: 1250)"
    )
    args = parser.parse_args()
    app = web.Application()
    cors = aiohttp_cors.setup(
            app,
@@ -293,4 +332,4 @@ if __name__ == "__main__":
    offer_resource = cors.add(app.router.add_resource("/offer"))
    cors.add(offer_resource.add_route("POST", offer))
    app.on_shutdown.append(on_shutdown)
-    web.run_app(app, access_log=None, host="127.0.0.1", port=1250)
+    web.run_app(app, access_log=None,  host=args.host, port=args.port)
--- a/stream_client.py
+++ b/stream_client.py
@@ -17,7 +17,7 @@ class StreamClient:
    def __init__(
            self,
            signaling,
-            url="http://127.0.0.1:1250",
+            url="http://0.0.0.0:1250",
            play_from=None,
            ping_pong=False
    ):
--- a/trials/init.py
+++ b/trials/init.py
--- a/trials/finetuning/init.py
+++ b/trials/finetuning/init.py
--- a/trials/finetuning/inference_fine_tuned.py
+++ b/trials/finetuning/inference_fine_tuned.py
@@ -0,0 +1,24 @@
 # Steps to prepare data and submit/check OpenAI finetuning
 # import subprocess
 # subprocess.run("openai tools fine_tunes.prepare_data -f " + "finetuning_dataset.jsonl")
 # export OPENAI_API_KEY=
 # openai api fine_tunes.create -t <TRAIN_FILE_ID_OR_PATH> -m <BASE_MODEL>
 # openai api fine_tunes.list
 import openai
 # Use your OpenAI API Key
 openai.api_key = ""
 sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . -> ",
                 " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas . - > "]
 # Give your finetuned model name here
 # "davinci:ft-personal-2023-07-14-10-43-51"
 model_name = ""
 response = openai.Completion.create(
    model=model_name,
    prompt=sample_chunks[0])
 print(response)
--- a/trials/finetuning/youtube_scraping.py
+++ b/trials/finetuning/youtube_scraping.py
@@ -0,0 +1,98 @@
 import json
 import yt_dlp as youtube_dl
 from whisper_jax import FlaxWhisperPipline
 import jax.numpy as jnp
 # Function to extract chapter information from a YouTube video URL
 def get_youtube_chapters(video_id):
    video_url = "https://www.youtube.com/watch?v=" + video_id
    ydl_opts = {
        'extract_flat': 'in_playlist',
        'skip_download': True,
        'quiet': True,
    }
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        video_info = ydl.extract_info(video_url, download=False)
    chapters = []
    if 'chapters' in video_info:
        for chapter in video_info['chapters']:
            start_time = chapter['start_time']
            end_time = chapter['end_time']
            title = chapter['title']
            chapters.append({
                'start': start_time,
                'end': end_time,
                'title': title
            })
    return chapters
 # Function to extract video transcription using yt_dlp
 def get_youtube_transcription(video_id):
    ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': 'mp3',
                    'preferredquality': '192',
            }],
            'outtmpl': './artefacts/audio',  # Specify output file path and name
    }
    # Download the audio
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download(["https://www.youtube.com/watch?v=" + video_id])
    media_file = "./artefacts/audio.mp3"
    pipeline = FlaxWhisperPipline("openai/whisper-" + "tiny",
                                  dtype=jnp.float16,
                                  batch_size=16)
    whisper_result = pipeline(media_file, return_timestamps=True)
    return whisper_result["chunks"]
 # Function to scrape YouTube video transcripts and chapter information
 def scrape_youtube_data(video_id):
    transcript_text = get_youtube_transcription(video_id)
    chapters = get_youtube_chapters(video_id)
    print("transcript_text", transcript_text)
    print("chapters", chapters)
    return transcript_text, chapters
 # Function to generate fine-tuning dataset from YouTube data
 def generate_finetuning_dataset(video_ids):
    prompt_completion_pairs = []
    for video_id in video_ids:
        transcript_text, chapters = scrape_youtube_data(video_id)
        if transcript_text is not None and chapters is not None:
            for chapter in chapters:
                start_time = chapter["start"]
                end_time = chapter["end"]
                chapter_text = chapter["title"]
                prompt = ""
                for transcript in transcript_text:
                    if transcript["timestamp"][0] >= start_time and transcript["timestamp"][1] < end_time:
                        prompt += transcript["text"]
                if prompt is not None:
                    completion = chapter_text
                    prompt_completion_pairs.append({"prompt": prompt, "completion": completion})
    return prompt_completion_pairs
 # Add all the video ids here, the videos must have captions [chapters]
 video_ids = ["yTnSEZIwnkU"]
 dataset = generate_finetuning_dataset(video_ids)
 with open("finetuning_dataset.jsonl", "w") as f:
    for example in dataset:
        f.write(json.dumps(example) + "\n")
--- a/trials/gpt2.py
+++ b/trials/gpt2.py
@@ -1,98 +0,0 @@
 # # Approach 1
 # from transformers import GPTNeoForCausalLM, GPT2Tokenizer
 #
 # model_name = 'EleutherAI/gpt-neo-1.3B'
 # tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 # model = GPTNeoForCausalLM.from_pretrained(model_name)
 #
 # conversation = """
 # Summarize the following conversation in 3 key sentences:
 #
 # We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
 # Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
 # Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
 # Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
 # Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
 # Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
 # """
 #
 # input_ids = tokenizer.encode(conversation, return_tensors='pt')
 #
 # output = model.generate(input_ids,
 #                         max_length=30,
 #                         num_return_sequences=1)
 #
 # caption = tokenizer.decode(output[0], skip_special_tokens=True)
 # print("Caption:", caption[len(input_ids):])
 #
 # # Approach 2
 # import torch
 # from transformers import GPT2LMHeadModel, GPT2Tokenizer
 #
 # model_name = "gpt2"
 # tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 # model = GPT2LMHeadModel.from_pretrained(model_name)
 #
 # model.eval()
 #
 # text = """
 # You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
 # """
 #
 # tokenizer.pad_token = tokenizer.eos_token
 # input_ids = tokenizer.encode(text,
 #                              max_length=100,
 #                              truncation=True,
 #                              return_tensors="pt")
 # attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
 # output = model.generate(input_ids,
 #                         max_new_tokens=20,
 #                         num_return_sequences=1,
 #                         num_beams=2,
 #                         attention_mask=attention_mask)
 #
 # chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
 # for i, title in enumerate(chapter_titles):
 #     print("Caption: ", title)
 # Approach 3
 import torch
 from transformers import GPT2Tokenizer, GPT2LMHeadModel
 def generate_response(conversation, max_length=100):
    input_text = ""
    for entry in conversation:
        role = entry["role"]
        content = entry["content"]
        input_text += f"{role}: {content}\n"
    # Tokenize the entire conversation
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    # Generate text based on the entire conversation
    with torch.no_grad():
        output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
    # Decode the generated text and return it
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response
 if __name__ == "__main__":
    model_name = "gpt2"
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    sample_chunks = [
        "You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
    ]
    conversation = [
        {"role": "system", "content": "Summarize this text" },
        {"role": "user", "content": " text : " + sample_chunks[0]},
    ]
    response = generate_response(conversation)
    print("Response:", response)
--- a/trials/server/init.py
+++ b/trials/server/init.py
--- a/trials/server/server_multithreaded.py
+++ b/trials/server/server_multithreaded.py
@@ -16,8 +16,8 @@ from av import AudioFifo
 from sortedcontainers import SortedDict
 from whisper_jax import FlaxWhisperPipline
-from utils.log_utils import logger
+from reflector.utils.log_utils import logger
-from utils.run_utils import config, Mutex
+from reflector.utils.run_utils import config, Mutex
 WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_REAL_TIME_MODEL_SIZE"]
 pcs = set()
--- a/trials/title_summary/init.py
+++ b/trials/title_summary/init.py
--- a/trials/title_summary/api.py
+++ b/trials/title_summary/api.py
@@ -0,0 +1,57 @@
 import requests
 import spacy
 # Enter the Machine where the LLM is hosted
 LLM_MACHINE_IP = ""
 # This is the URL of text-generation-webui
 URL = f"http://{LLM_MACHINE_IP}:5000/api/v1/generate"
 headers = {
        "Content-Type": "application/json"
 }
 def split_text_file(filename, token_count):
    nlp = spacy.load('en_core_web_md')
    with open(filename, 'r') as file:
        text = file.read()
    doc = nlp(text)
    total_tokens = len(doc)
    parts = []
    start_index = 0
    while start_index < total_tokens:
        end_index = start_index + token_count
        part_tokens = doc[start_index:end_index - 5]
        part = ' '.join(token.text for token in part_tokens)
        parts.append(part)
        start_index = end_index
    return parts
 final_summary = ""
 parts = split_text_file("transcript.txt", 1600)
 for part in parts:
    prompt = f"""
              ### Human:
             Given the following text, distill the most important information 
             into a short summary:  {part}
              ### Assistant:
              """
    data = {
            "prompt": prompt
    }
    try:
        response = requests.post(URL, headers=headers, json=data)
        print(response.json())
    except Exception as e:
        print(str(e))
 with open("summary.txt", "w") as sum:
    sum.write(" ".join(final_summary))
--- a/trials/title_summary/bert.py
+++ b/trials/title_summary/bert.py
@@ -0,0 +1,43 @@
 import torch
 from transformers import BertTokenizer, BertModel
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 # Load the pre-trained BERT model and tokenizer
 model_name = "bert-base-uncased"
 model = BertModel.from_pretrained(model_name)
 tokenizer = BertTokenizer.from_pretrained(model_name)
 # Set the device to use
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Load the SentenceTransformer model
 sentence_transformer_model = SentenceTransformer('average_word_embeddings_glove.6B.300d')
 # Define the input text
 text = "Your input text to be summarized goes here."
 # Tokenize the text
 tokens = tokenizer.tokenize(text)
 input_ids = tokenizer.convert_tokens_to_ids(tokens)
 input_ids = torch.tensor([input_ids]).to(device)
 # Get the BERT model output
 with torch.no_grad():
    outputs = model(input_ids)[0]  # Extract the last hidden states
 # Calculate sentence embeddings
 sentence_embeddings = outputs.mean(dim=1).squeeze().cpu().numpy()
 input_text_embedding = sentence_transformer_model.encode([text])[0]
 # Calculate cosine similarity between sentences and input text
 similarity_scores = cosine_similarity([input_text_embedding], sentence_embeddings)
 # Sort the sentences by similarity scores in descending order
 sorted_sentences = [sent for _, sent in sorted(zip(similarity_scores[0], sentences), reverse=True)]
 # Choose the top sentences as the summary
 num_summary_sentences = 2  # Adjust as needed
 summary = ". ".join(sorted_sentences[:num_summary_sentences])
 print("Summary:", summary)
--- a/trials/title_summary/gpt2.py
+++ b/trials/title_summary/gpt2.py
@@ -0,0 +1,101 @@
 # Approach 1
 from transformers import GPTNeoForCausalLM, GPT2Tokenizer
 model_name = 'EleutherAI/gpt-neo-1.3B'
 tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 model = GPTNeoForCausalLM.from_pretrained(model_name)
 conversation = """
 Summarize the following conversation in 3 key sentences:
 We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
 Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
 Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
 Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
 Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
 Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
 """
 input_ids = tokenizer.encode(conversation, return_tensors='pt')
 output = model.generate(input_ids,
                        max_length=30,
                        num_return_sequences=1)
 caption = tokenizer.decode(output[0], skip_special_tokens=True)
 print("Caption:", caption[len(input_ids):])
 # Approach 2
 import torch
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 model_name = "gpt2"
 tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 model = GPT2LMHeadModel.from_pretrained(model_name)
 model.eval()
 text = """
 You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
 """
 tokenizer.pad_token = tokenizer.eos_token
 input_ids = tokenizer.encode(text,
                             max_length=100,
                             truncation=True,
                             return_tensors="pt")
 attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
 output = model.generate(input_ids,
                        max_new_tokens=20,
                        num_return_sequences=1,
                        num_beams=2,
                        attention_mask=attention_mask)
 chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
 for i, title in enumerate(chapter_titles):
    print("Caption: ", title)
 # Approach 3
 import torch
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 def generate_response(conversation, max_length=100):
    input_text = ""
    for entry in conversation:
        role = entry["role"]
        content = entry["content"]
        input_text += f"{role}: {content}\n"
    # Tokenize the entire conversation
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    # Generate text based on the entire conversation
    with torch.no_grad():
        output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
    # Decode the generated text and return it
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response
 if __name__ == "__main__":
    # Call appropriate approach from the main while experimenting
    model_name = "gpt2"
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    sample_chunks = [
            "You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
    ]
    conversation = [
            {"role": "system", "content": "Summarize this text"},
            {"role": "user", "content": " text : " + sample_chunks[0]},
    ]
    response = generate_response(conversation)
    print("Response:", response)
--- a/trials/title_summary/incsum.py
+++ b/trials/title_summary/incsum.py
@@ -1,9 +1,11 @@
 import spacy
 import sys
 # Observe the incremental summaries by performing summaries in chunks
 with open("transcript.txt") as f:
    transcription = f.read()
 import spacy
 def split_text_file(filename, token_count):
    nlp = spacy.load('en_core_web_md')
@@ -26,6 +28,7 @@ def split_text_file(filename, token_count):
    return parts
 # Set the chunk length here to split the transcript and test
 MAX_CHUNK_LENGTH = 1000
@@ -41,13 +44,11 @@ with open("chunks" + str(MAX_CHUNK_LENGTH) + ".txt", "a") as f:
 # ex. python incsum.py 1 => will run approach 1
 # If no input, will run all approaches
 import sys
 try:
    index = sys.argv[1]
 except:
    index = None
 # Approach 1 : facebook/bart-large-cnn
 if index == "1" or index is None:
    SUMMARY_MODEL = "facebook/bart-large-cnn"
@@ -81,7 +82,6 @@ if index == "1" or index is None:
        for summary in summaries:
            f.write(summary + "\n\n")
 # Approach 2
 if index == "2" or index is None:
    print("Performing chunk summary : " + "gpt-neo-1.3B")
@@ -155,4 +155,3 @@ if index == "3" or index is None:
    with open("mpt-7b-summaries.txt", "a") as f:
        for summary in summaries:
            f.write(summary + "\n\n")
--- a/trials/title_summary/openai_endpoint.py
+++ b/trials/title_summary/openai_endpoint.py
@@ -0,0 +1,37 @@
 # Use OpenAI API endpoint to send data to OpenAI
 # along with prompts to caption/summarize the conversation
 import openai
 openai.api_key = ""
 # to caption, user prompt used : "caption this conversation"
 #             max_tokens=20
 # to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
 #               max_tokens=300
 sample_chunks = [
        "You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
        " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
 conversation = [
        {"role": "system",
         "content": sample_chunks[1]},
        {"role": "user",
         "content": "summarize this conversation in a few sentences by taking key points"}
 ]
 model = "gpt-3.5-turbo"
 response = openai.ChatCompletion.create(model=model,
                                        messages=conversation,
                                        n=1,
                                        max_tokens=300)
 # Try fine tuned model
 # model = "davinci:ft-personal-2023-07-14-10-43-51"
 # response = openai.Completion.create(model=model,
 #                                         prompt=sample_chunks[0] + " -> ")
 caption = response.choices[0]
 print(caption)
--- a/trials/title_summary/pegasus.py
+++ b/trials/title_summary/pegasus.py
@@ -0,0 +1,33 @@
 from transformers import PegasusForConditionalGeneration, PegasusTokenizer
 import torch
 # Load the Pegasus model and tokenizer
 model_name = "google/pegasus-large"
 model = PegasusForConditionalGeneration.from_pretrained(model_name)
 tokenizer = PegasusTokenizer.from_pretrained(model_name)
 # Set the device to use
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
                 " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
 # Define the input text for summarization
 text = sample_chunks[1]
 inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt").to(device)
 # Generate the summary
 summary_ids = model.generate(
    inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_length=200,
    num_beams=4,
    length_penalty=2.0,
    early_stopping=True,
 )
 # Decode and print the summary
 summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 print("Summary:", summary)
--- a/trials/title_summary/t5.py
+++ b/trials/title_summary/t5.py
@@ -1,36 +1,27 @@
-# Use OpenAI API endpoint to send data to OpenAI
+from transformers import T5ForConditionalGeneration, T5Tokenizer
-# along with prompts to caption/summarize the conversation
+import torch
 # Load the T5 model and tokenizer
 model_name = "t5-base"
 model = T5ForConditionalGeneration.from_pretrained(model_name)
 tokenizer = T5Tokenizer.from_pretrained(model_name)
-import openai
+# Set the device to use
-
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-openai.api_key = ""
+model.to(device)
 # to caption, user prompt used : "caption this conversation"
 #             max_tokens=20
 # to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
 #               max_tokens=300
 sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
                 " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
 conversation = [
    {"role": "system",
     "content":  sample_chunks[1]},
    {"role": "user",
     "content": "summarize this conversation in a few sentences by taking key points"}
 ]
-model = "gpt-3.5-turbo"
+# Define the input text for summarization
-response = openai.ChatCompletion.create(model=model,
+text = "Summarize the following text in 3 key points. text : " + sample_chunks[1]
                                        messages=conversation,
                                        n=1,
                                        max_tokens=300)
-# Try finetuned model
+# Tokenize the input text
-# model = "davinci:ft-personal-2023-07-14-10-43-51"
+inputs = tokenizer.encode(text, return_tensors="pt").to(device)
 # response = openai.Completion.create(model=model,
 #                                         prompt=sample_chunks[0] + " -> ")
-caption = response.choices[0]
+# Generate the summary
-print(caption)
+summary_ids = model.generate(inputs, max_length=1000, num_beams=4, early_stopping=True)
 # Decode and print the summary
 summary = tokenizer.decode(summary_ids.squeeze(), skip_special_tokens=True)
 print("Summary:", summary)
--- a/trials/title_summary/transcript.txt
+++ b/trials/title_summary/transcript.txt
--- a/trials/title_summary/vicuna.py
+++ b/trials/title_summary/vicuna.py
@@ -0,0 +1,44 @@
 from gpt4all import GPT4All
 model = GPT4All("/Users/gokulmohanarangan/Library/Application Support/nomic.ai/GPT4All/ggml-vicuna-13b-1.1-q4_2.bin")
 import spacy
 def split_text_file(filename, token_count):
    nlp = spacy.load('en_core_web_md')
    with open(filename, 'r') as file:
        text = file.read()
    doc = nlp(text)
    total_tokens = len(doc)
    parts = []
    start_index = 0
    while start_index < total_tokens:
        end_index = start_index + token_count
        part_tokens = doc[start_index:end_index]
        part = ' '.join(token.text for token in part_tokens)
        parts.append(part)
        start_index = end_index
    return parts
 parts = split_text_file("transcript.txt", 1800)
 final_summary = []
 for part in parts:
       prompt = f"""
              ### Human:
              Summarize the following text without missing any key points and action items.
              {part}
              ### Assistant:
              """
       output = model.generate(prompt)
       final_summary.append(output)
 with open("sum.txt", "w") as sum:
       sum.write(" ".join(final_summary))
--- a/trials/whisper-jax/init.py
+++ b/trials/whisper-jax/init.py
--- a/trials/whisper-jax/whisjax.py
+++ b/trials/whisper-jax/whisjax.py
@@ -18,11 +18,11 @@ import nltk
 import yt_dlp as youtube_dl
 from whisper_jax import FlaxWhisperPipline
-from utils.file_utils import download_files, upload_files
+from ...utils.file_utils import download_files, upload_files
-from utils.log_utils import logger
+from ...utils.log_utils import logger
-from utils.run_utils import config
+from ...utils.run_utils import config
-from utils.text_utilities import post_process_transcription, summarize
+from ...utils.text_utils import post_process_transcription, summarize
-from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
+from ...utils.viz_utils import create_talk_diff_scatter_viz, create_wordcloud
 nltk.download('punkt', quiet=True)
 nltk.download('stopwords', quiet=True)
@@ -30,8 +30,8 @@ nltk.download('stopwords', quiet=True)
 WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
 NOW = datetime.now()
-if not os.path.exists('./artefacts'):
+if not os.path.exists('../../artefacts'):
-    os.makedirs('./artefacts')
+    os.makedirs('../../artefacts')
 def init_argparse() -> argparse.ArgumentParser:
@@ -91,7 +91,7 @@ def main():
            # Download the audio
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([args.location])
-            media_file = "./artefacts/audio.mp3"
+            media_file = "../artefacts/audio.mp3"
            logger.info("Saved downloaded YouTube video to: " + media_file)
        else:
--- a/trials/whisper-jax/whisjax_realtime.py
+++ b/trials/whisper-jax/whisjax_realtime.py
@@ -10,11 +10,11 @@ from pynput import keyboard
 from termcolor import colored
 from whisper_jax import FlaxWhisperPipline
-from utils.file_utils import upload_files
+from ...utils.file_utils import upload_files
-from utils.log_utils import logger
+from ...utils.log_utils import logger
-from utils.run_utils import config
+from ...utils.run_utils import config
-from utils.text_utilities import post_process_transcription, summarize
+from ...utils.text_utils import post_process_transcription, summarize
-from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
+from ...utils.viz_utils import create_talk_diff_scatter_viz, create_wordcloud
 WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
--- a/utils/config.ini
+++ b/utils/config.ini
@@ -1,5 +1,6 @@
 [DEFAULT]
-#SetexceptionruleforOpenMPerrortoallowduplicatelibinitialization
+#Set exception rule for OpenMP error
 #to allow duplicate lib initialization
 KMP_DUPLICATE_LIB_OK=TRUE
 #ExportOpenAIAPIKey
 OPENAI_APIKEY=
@@ -7,8 +8,8 @@ OPENAI_APIKEY=
 WHISPER_MODEL_SIZE=tiny
 WHISPER_REAL_TIME_MODEL_SIZE=tiny
 #AWSconfig
-AWS_ACCESS_KEY=***REMOVED***
+AWS_ACCESS_KEY=
-AWS_SECRET_KEY=***REMOVED***
+AWS_SECRET_KEY=
 BUCKET_NAME=reflector-bucket
 #Summarizerconfig
 SUMMARY_MODEL=facebook/bart-large-cnn
@@ -20,5 +21,6 @@ SUMMARIZE_USING_CHUNKS=YES
 # Audiodevice
 BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME=aggregator
 AV_FOUNDATION_DEVICE_ID=1
-# LLM PATH
+# LLM configs
-LLM_PATH=
+LLM_MACHINE_IP=
 LLM_MACHINE_PORT=
--- a/utils/format_output.py
+++ b/utils/format_output.py
@@ -0,0 +1,32 @@
 import json
 with open("../artefacts/meeting_titles_and_summaries.txt", "r") as f:
    outputs = f.read()
 outputs = json.loads(outputs)
 transcript_file = open("../artefacts/meeting_transcript.txt", "a")
 title_desc_file = open("../artefacts/meeting_title_description.txt", "a")
 summary_file = open("../artefacts/meeting_summary.txt", "a")
 for item in outputs["topics"]:
    transcript_file.write(item["transcript"])
    summary_file.write(item["description"])
    title_desc_file.write("TITLE: \n")
    title_desc_file.write(item["title"])
    title_desc_file.write("\n")
    title_desc_file.write("DESCRIPTION: \n")
    title_desc_file.write(item["description"])
    title_desc_file.write("\n")
    title_desc_file.write("TRANSCRIPT: \n")
    title_desc_file.write(item["transcript"])
    title_desc_file.write("\n")
    title_desc_file.write("---------------------------------------- \n\n")
 transcript_file.close()
 title_desc_file.close()
 summary_file.close()
--- a/utils/text_utilities.py
+++ b/utils/text_utilities.py
@@ -6,8 +6,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from transformers import BartForConditionalGeneration, BartTokenizer
-from utils.log_utils import logger
+from log_utils import logger
-from utils.run_utils import config
+from run_utils import config
 nltk.download('punkt', quiet=True)
@@ -154,7 +154,7 @@ def chunk_text(text,
 def summarize(transcript_text, timestamp,
              real_time=False,
-              summarize_using_chunks=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]):
+              chunk_summarize=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    summary_model = config["DEFAULT"]["SUMMARY_MODEL"]
    if not summary_model:
@@ -166,27 +166,35 @@ def summarize(transcript_text, timestamp,
    model = BartForConditionalGeneration.from_pretrained(summary_model)
    model = model.to(device)
-    output_filename = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
+    output_file = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    if real_time:
-        output_filename = "real_time_" + output_filename
+        output_file = "real_time_" + output_file
-    if summarize_using_chunks != "YES":
+    if chunk_summarize != "YES":
        max_length = int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"])
        inputs = tokenizer. \
            batch_encode_plus([transcript_text], truncation=True,
                              padding='longest',
-                              max_length=int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"]),
+                              max_length=max_length,
                              return_tensors='pt')
        inputs = inputs.to(device)
        with torch.no_grad():
            num_beans = int(config["DEFAULT"]["BEAM_SIZE"])
            max_length = int(config["DEFAULT"]["MAX_LENGTH"])
            summaries = model.generate(inputs['input_ids'],
-                                       num_beams=int(config["DEFAULT"]["BEAM_SIZE"]), length_penalty=2.0,
+                                       num_beams=num_beans,
-                                       max_length=int(config["DEFAULT"]["MAX_LENGTH"]), early_stopping=True)
+                                       length_penalty=2.0,
                                       max_length=max_length,
                                       early_stopping=True)
-        decoded_summaries = [tokenizer.decode(summary, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+        decoded_summaries = \
            [tokenizer.decode(summary,
                              skip_special_tokens=True,
                              clean_up_tokenization_spaces=False)
             for summary in summaries]
        summary = " ".join(decoded_summaries)
-        with open("./artefacts/" + output_filename, 'w') as f:
+        with open("./artefacts/" + output_file, 'w') as f:
            f.write(summary.strip() + "\n")
    else:
        logger.info("Breaking transcript into smaller chunks")
@@ -195,8 +203,8 @@ def summarize(transcript_text, timestamp,
        logger.info(f"Transcript broken into {len(chunks)} "
                    f"chunks of at most 500 words")
-        logger.info(f"Writing summary text to: {output_filename}")
+        logger.info(f"Writing summary text to: {output_file}")
-        with open(output_filename, 'w') as f:
+        with open(output_file, 'w') as f:
            summaries = summarize_chunks(chunks, tokenizer, model)
            for summary in summaries:
                f.write(summary.strip() + " ")
--- a/utils/viz_utilities.py
+++ b/utils/viz_utilities.py
@@ -45,24 +45,24 @@ def create_wordcloud(timestamp, real_time=False):
    plt.axis("off")
    plt.tight_layout(pad=0)
-    wordcloud_name = "wordcloud"
+    wordcloud = "wordcloud"
    if real_time:
-        wordcloud_name = "real_time_" + wordcloud_name + "_" +\
+        wordcloud = "real_time_" + wordcloud + "_" + \
                         timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
    else:
-        wordcloud_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
+        wordcloud += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
-    plt.savefig("./artefacts/" + wordcloud_name)
+    plt.savefig("./artefacts/" + wordcloud)
 def create_talk_diff_scatter_viz(timestamp, real_time=False):
    """
-    Perform agenda vs transription diff to see covered topics.
+    Perform agenda vs transcription diff to see covered topics.
    Create a scatter plot of words in topics.
    :return: None. Saved locally.
    """
-    spaCy_model = "en_core_web_md"
+    spacy_model = "en_core_web_md"
-    nlp = spacy.load(spaCy_model)
+    nlp = spacy.load(spacy_model)
    nlp.add_pipe('sentencizer')
    agenda_topics = []
@@ -75,7 +75,6 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
                agenda_topics.append(line.split(":")[0])
    # Load the transcription with timestamp
    filename = ""
    if real_time:
        filename = "./artefacts/real_time_transcript_with_timestamp_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -142,7 +141,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
    df = df.apply(create_new_columns, axis=1)
-    # Count the number of items covered and calculatre the percentage
+    # Count the number of items covered and calculate the percentage
    num_covered_items = sum(covered_items.values())
    percentage_covered = num_covered_items / len(agenda) * 100