Merge pull request #40 from Monadical-SAS/feat/gokul

Code refactor and cleanup from Feat/gokul
2026-02-04 09:56:47 +00:00 · 2023-07-25 13:56:14 +05:30
parent d2454b6d2d b4303d6cd4
commit d2d27a4a23
33 changed files with 700 additions and 242 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -165,7 +165,7 @@ cython_debug/
 transcript_*.txt
 test_*.txt
 wordcloud*.png
-*.ini
+utils/config.ini
 test_samples/
 *.wav
 *.mp3
--- a/trials/api.py
+++ b/trials/api.py
--- a/client.py
+++ b/client.py
@@ -5,15 +5,15 @@ import signal
 from aiortc.contrib.signaling import (add_signaling_arguments,
                                      create_signaling)

-from stream_client import StreamClient
 from utils.log_utils import logger
+from stream_client import StreamClient


 async def main():
    parser = argparse.ArgumentParser(description="Data channels ping/pong")

    parser.add_argument(
-            "--url", type=str, nargs="?", default="http://127.0.0.1:1250/offer"
+            "--url", type=str, nargs="?", default="http://0.0.0.0:1250/offer"
    )

    parser.add_argument(
--- a/pipeline-requirements.txt
+++ b/pipeline-requirements.txt
@@ -2,8 +2,6 @@ pyaudio==0.2.13
 keyboard==0.13.5
 pynput==1.7.6
 wave==0.0.2
-aiohttp==3.8.4
-aiosignal==1.3.1
 async-timeout==4.0.2
 attrs==23.1.0
 certifi==2023.5.7
@@ -51,11 +49,8 @@ matplotlib==3.7.2
 matplotlib-inline==0.1.6
 termcolor==2.3.0
 ffmpeg==1.4
-aiortc==1.5.0
 cached_property==1.5.2
 stamina==23.1.0
 httpx==0.24.1
-sortedcontainers==2.4.0
 https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
 gpt4all==1.0.5
-aiohttp_cors==0.7.0
--- a/scripts/setup_pipeline_dependencies.sh
+++ b/scripts/setup_pipeline_dependencies.sh
@@ -26,7 +26,7 @@ pip install git+https://github.com/sanchit-gandhi/whisper-jax.git
 # Update to latest version
 pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git

-pip install -r ../requirements.txt
+pip install -r ../server-requirements.txt

 # download spacy models
 spacy download en_core_web_sm
--- a/scripts/setup_server_dependencies.sh
+++ b/scripts/setup_server_dependencies.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+pip install --upgrade pip
+pip install -r ../server-requirements.txt
--- a/server-requirements.txt
+++ b/server-requirements.txt
@@ -0,0 +1,50 @@
+aiohttp==3.8.5
+aiohttp-cors==0.7.0
+aioice==0.9.0
+aiortc==1.5.0
+aiosignal==1.3.1
+anyio==3.7.1
+async-timeout==4.0.2
+attrs==23.1.0
+av==10.0.0
+certifi==2023.7.22
+cffi==1.15.1
+charset-normalizer==3.2.0
+coloredlogs==15.0.1
+cryptography==41.0.2
+ctranslate2==3.17.1
+dnspython==2.4.0
+faster-whisper==0.7.1
+filelock==3.12.2
+flatbuffers==23.5.26
+frozenlist==1.4.0
+fsspec==2023.6.0
+google-crc32c==1.5.0
+h11==0.14.0
+httpcore==0.17.3
+huggingface-hub==0.16.4
+humanfriendly==10.0
+idna==3.4
+ifaddr==0.2.0
+loguru==0.7.0
+mpmath==1.3.0
+multidict==6.0.4
+numpy==1.25.1
+onnxruntime==1.15.1
+packaging==23.1
+protobuf==4.23.4
+pycparser==2.21
+pyee==11.0.0
+pylibsrtp==0.8.0
+pyOpenSSL==23.2.0
+PyYAML==6.0.1
+requests==2.31.0
+sniffio==1.3.0
+sortedcontainers==2.4.0
+sympy==1.12
+tokenizers==0.13.3
+tqdm==4.65.0
+typing_extensions==4.7.1
+urllib3==2.0.4
+yarl==1.9.2
+wave==0.0.2
--- a/server_executor_cleaned.py
+++ b/server_executor_cleaned.py
@@ -1,29 +1,30 @@
+import argparse
 import asyncio
 import datetime
-import io
 import json
+import os
 import uuid
 import wave
 from concurrent.futures import ThreadPoolExecutor

 import aiohttp_cors
-import jax.numpy as jnp
 import requests
 from aiohttp import web
 from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
 from aiortc.contrib.media import MediaRelay
 from av import AudioFifo
+from faster_whisper import WhisperModel
 from loguru import logger
-from whisper_jax import FlaxWhisperPipline
-from utils.run_utils import run_in_executor
 from sortedcontainers import SortedDict

+from utils.run_utils import run_in_executor, config
+
 pcs = set()
 relay = MediaRelay()
 data_channel = None
-pipeline = FlaxWhisperPipline("openai/whisper-tiny",
-                              dtype=jnp.float16,
-                              batch_size=16)
+model = WhisperModel("tiny", device="cpu",
+                     compute_type="float32",
+                     num_workers=12)

 CHANNELS = 2
 RATE = 48000
@@ -31,8 +32,8 @@ audio_buffer = AudioFifo()
 executor = ThreadPoolExecutor()
 transcription_text = ""
 last_transcribed_time = 0.0
-LLM_MACHINE_IP = "216.153.52.83"
-LLM_MACHINE_PORT = "5000"
+LLM_MACHINE_IP = config["DEFAULT"]["LLM_MACHINE_IP"]
+LLM_MACHINE_PORT = config["DEFAULT"]["LLM_MACHINE_PORT"]
 LLM_URL = f"http://{LLM_MACHINE_IP}:{LLM_MACHINE_PORT}/api/v1/generate"
 incremental_responses = []
 sorted_transcripts = SortedDict()
@@ -43,7 +44,7 @@ blacklisted_messages = [" Thank you.", " See you next time!",


 def get_title_and_summary(llm_input_text, last_timestamp):
-    print("Generating title and summary")
+    logger.info("Generating title and summary")
    # output = llm.generate(prompt)

    # Use monadical-ml to fire this query to an LLM and get result
@@ -53,11 +54,11 @@ def get_title_and_summary(llm_input_text, last_timestamp):

    prompt = f"""
        ### Human:
-        Create a JSON object as response. The JSON object must have 2 fields: 
-        i) title and ii) summary. For the title field,generate a short title 
-        for the given text. For the summary field, summarize the given text 
+        Create a JSON object as response. The JSON object must have 2 fields:
+        i) title and ii) summary. For the title field,generate a short title
+        for the given text. For the summary field, summarize the given text
        in three sentences.
-        
+
        {llm_input_text}

        ### Assistant:
@@ -67,27 +68,28 @@ def get_title_and_summary(llm_input_text, last_timestamp):
            "prompt": prompt
    }

-    # To-do: Handle unexpected output formats from the model
+    # TODO : Handle unexpected output formats from the model
    try:
        response = requests.post(LLM_URL, headers=headers, json=data)
        output = json.loads(response.json()["results"][0]["text"])
        output["description"] = output.pop("summary")
        output["transcript"] = llm_input_text
-        output["timestamp"] =\
+        output["timestamp"] = \
            str(datetime.timedelta(seconds=round(last_timestamp)))
        incremental_responses.append(output)
        result = {
                "cmd": "UPDATE_TOPICS",
                "topics": incremental_responses,
        }
+
    except Exception as e:
-        print("Exception" + str(e))
+        logger.info("Exception" + str(e))
        result = None
    return result


 def channel_log(channel, t, message):
-    print("channel(%s) %s %s" % (channel.label, t, message))
+    logger.info("channel(%s) %s %s" % (channel.label, t, message))


 def channel_send(channel, message):
@@ -113,18 +115,25 @@ def channel_send_transcript(channel):
            # Due to exceptions if one of the earlier batches can't return
            # a transcript, we don't want to be stuck waiting for the result
            # With the threshold size of 3, we pop the first(lost) element
-            elif len(sorted_transcripts) >= 3:
-                del sorted_transcripts[least_time]
+            else:
+                if len(sorted_transcripts) >= 3:
+                    del sorted_transcripts[least_time]
        except Exception as e:
-            print("Exception", str(e))
+            logger.info("Exception", str(e))
            pass


 def get_transcription(frames):
-    print("Transcribing..")
+    logger.info("Transcribing..")
    sorted_transcripts[frames[0].time] = None
-    out_file = io.BytesIO()
-    wf = wave.open(out_file, "wb")
+
+    # TODO:
+    # Passing IO objects instead of temporary files throws an error
+    # Passing ndarrays (typecasted with float) does not give any
+    # transcription. Refer issue,
+    # https://github.com/guillaumekln/faster-whisper/issues/369
+    audiofilename = "test" + str(datetime.datetime.now())
+    wf = wave.open(audiofilename, "wb")
    wf.setnchannels(CHANNELS)
    wf.setframerate(RATE)
    wf.setsampwidth(2)
@@ -133,22 +142,40 @@ def get_transcription(frames):
        wf.writeframes(b"".join(frame.to_ndarray()))
    wf.close()

-    # To-Do: Look into WhisperTimeStampLogitsProcessor exception
-    try:
-        whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
-    except Exception as e:
-        return
+    result_text = ""

-    global transcription_text, last_transcribed_time
-    transcription_text += whisper_result["text"]
-    duration = whisper_result["chunks"][0]["timestamp"][1]
-    if not duration:
-        duration = 5.0
-    last_transcribed_time += duration
+    try:
+        segments, _ = \
+            model.transcribe(audiofilename,
+                             language="en",
+                             beam_size=5,
+                             vad_filter=True,
+                             vad_parameters=dict(min_silence_duration_ms=500))
+        os.remove(audiofilename)
+        segments = list(segments)
+        result_text = ""
+        duration = 0.0
+        for segment in segments:
+            result_text += segment.text
+            start_time = segment.start
+            end_time = segment.end
+            if not segment.start:
+                start_time = 0.0
+            if not segment.end:
+                end_time = 5.5
+            duration += (end_time - start_time)
+
+        global last_transcribed_time, transcription_text
+        last_transcribed_time += duration
+        transcription_text += result_text
+
+    except Exception as e:
+        logger.info("Exception" + str(e))
+        pass

    result = {
            "cmd": "SHOW_TRANSCRIPTION",
-            "text": whisper_result["text"]
+            "text": result_text
    }
    sorted_transcripts[frames[0].time] = result
    return result
@@ -167,6 +194,9 @@ def get_final_summary_response():
                    seconds=round(last_transcribed_time))),
            "summary": final_summary
    }
+
+    with open("./artefacts/meeting_titles_and_summaries.txt", "a") as f:
+        f.write(json.dumps(incremental_responses))
    return response


@@ -196,7 +226,7 @@ class AudioStreamTrack(MediaStreamTrack):
                    else None
            )

-        if len(transcription_text) > 500:
+        if len(transcription_text) > 750:
            llm_input_text = transcription_text
            transcription_text = ""
            llm_result = run_in_executor(get_title_and_summary,
@@ -245,7 +275,6 @@ async def offer(request):
            if isinstance(message, str) and message.startswith("ping"):
                channel_send(channel, "pong" + message[4:])

-
    @pc.on("connectionstatechange")
    async def on_connectionstatechange():
        log_info("Connection state is " + pc.connectionState)
@@ -278,6 +307,16 @@ async def on_shutdown(app):


 if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+            description="WebRTC based server for Reflector"
+    )
+    parser.add_argument(
+            "--host", default="0.0.0.0", help="Server host IP (def: 0.0.0.0)"
+    )
+    parser.add_argument(
+            "--port", type=int, default=1250, help="Server port (def: 1250)"
+    )
+    args = parser.parse_args()
    app = web.Application()
    cors = aiohttp_cors.setup(
            app,
@@ -293,4 +332,4 @@ if __name__ == "__main__":
    offer_resource = cors.add(app.router.add_resource("/offer"))
    cors.add(offer_resource.add_route("POST", offer))
    app.on_shutdown.append(on_shutdown)
-    web.run_app(app, access_log=None, host="127.0.0.1", port=1250)
+    web.run_app(app, access_log=None,  host=args.host, port=args.port)
--- a/stream_client.py
+++ b/stream_client.py
@@ -17,7 +17,7 @@ class StreamClient:
    def __init__(
            self,
            signaling,
-            url="http://127.0.0.1:1250",
+            url="http://0.0.0.0:1250",
            play_from=None,
            ping_pong=False
    ):
@@ -114,7 +114,7 @@ class StreamClient:
                self.channel_log(channel, "<", message)

                if isinstance(message, str) and message.startswith("pong"):
-                    elapsed_ms = (self.current_stamp() - int(message[5:]))\
+                    elapsed_ms = (self.current_stamp() - int(message[5:])) \
                                 / 1000
                    print(" RTT %.2f ms" % elapsed_ms)

--- a/trials/init.py
+++ b/trials/init.py
--- a/trials/finetuning/init.py
+++ b/trials/finetuning/init.py
--- a/trials/finetuning/inference_fine_tuned.py
+++ b/trials/finetuning/inference_fine_tuned.py
@@ -0,0 +1,24 @@
+# Steps to prepare data and submit/check OpenAI finetuning
+# import subprocess
+# subprocess.run("openai tools fine_tunes.prepare_data -f " + "finetuning_dataset.jsonl")
+# export OPENAI_API_KEY=
+# openai api fine_tunes.create -t <TRAIN_FILE_ID_OR_PATH> -m <BASE_MODEL>
+# openai api fine_tunes.list
+
+
+import openai
+
+# Use your OpenAI API Key
+openai.api_key = ""
+
+sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . -> ",
+                 " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas . - > "]
+
+# Give your finetuned model name here
+# "davinci:ft-personal-2023-07-14-10-43-51"
+model_name = ""
+response = openai.Completion.create(
+    model=model_name,
+    prompt=sample_chunks[0])
+
+print(response)
--- a/trials/finetuning/youtube_scraping.py
+++ b/trials/finetuning/youtube_scraping.py
@@ -0,0 +1,98 @@
+import json
+import yt_dlp as youtube_dl
+from whisper_jax import FlaxWhisperPipline
+import jax.numpy as jnp
+
+# Function to extract chapter information from a YouTube video URL
+def get_youtube_chapters(video_id):
+    video_url = "https://www.youtube.com/watch?v=" + video_id
+    ydl_opts = {
+        'extract_flat': 'in_playlist',
+        'skip_download': True,
+        'quiet': True,
+    }
+
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        video_info = ydl.extract_info(video_url, download=False)
+
+    chapters = []
+
+    if 'chapters' in video_info:
+        for chapter in video_info['chapters']:
+            start_time = chapter['start_time']
+            end_time = chapter['end_time']
+            title = chapter['title']
+
+            chapters.append({
+                'start': start_time,
+                'end': end_time,
+                'title': title
+            })
+
+    return chapters
+
+
+# Function to extract video transcription using yt_dlp
+def get_youtube_transcription(video_id):
+    ydl_opts = {
+            'format': 'bestaudio/best',
+            'postprocessors': [{
+                    'key': 'FFmpegExtractAudio',
+                    'preferredcodec': 'mp3',
+                    'preferredquality': '192',
+            }],
+            'outtmpl': './artefacts/audio',  # Specify output file path and name
+    }
+
+    # Download the audio
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        ydl.download(["https://www.youtube.com/watch?v=" + video_id])
+    media_file = "./artefacts/audio.mp3"
+
+    pipeline = FlaxWhisperPipline("openai/whisper-" + "tiny",
+                                  dtype=jnp.float16,
+                                  batch_size=16)
+    whisper_result = pipeline(media_file, return_timestamps=True)
+    return whisper_result["chunks"]
+
+
+
+# Function to scrape YouTube video transcripts and chapter information
+def scrape_youtube_data(video_id):
+    transcript_text = get_youtube_transcription(video_id)
+    chapters = get_youtube_chapters(video_id)
+    print("transcript_text", transcript_text)
+    print("chapters", chapters)
+    return transcript_text, chapters
+
+
+# Function to generate fine-tuning dataset from YouTube data
+def generate_finetuning_dataset(video_ids):
+    prompt_completion_pairs = []
+    for video_id in video_ids:
+        transcript_text, chapters = scrape_youtube_data(video_id)
+        if transcript_text is not None and chapters is not None:
+            for chapter in chapters:
+                start_time = chapter["start"]
+                end_time = chapter["end"]
+                chapter_text = chapter["title"]
+
+                prompt = ""
+                for transcript in transcript_text:
+                    if transcript["timestamp"][0] >= start_time and transcript["timestamp"][1] < end_time:
+                        prompt += transcript["text"]
+
+                if prompt is not None:
+                    completion = chapter_text
+                    prompt_completion_pairs.append({"prompt": prompt, "completion": completion})
+
+    return prompt_completion_pairs
+
+
+# Add all the video ids here, the videos must have captions [chapters]
+video_ids = ["yTnSEZIwnkU"]
+dataset = generate_finetuning_dataset(video_ids)
+
+with open("finetuning_dataset.jsonl", "w") as f:
+    for example in dataset:
+        f.write(json.dumps(example) + "\n")
--- a/trials/gpt2.py
+++ b/trials/gpt2.py
@@ -1,98 +0,0 @@
-# # Approach 1
-# from transformers import GPTNeoForCausalLM, GPT2Tokenizer
-#
-# model_name = 'EleutherAI/gpt-neo-1.3B'
-# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-# model = GPTNeoForCausalLM.from_pretrained(model_name)
-#
-# conversation = """
-# Summarize the following conversation in 3 key sentences:
-#
-# We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
-# Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
-# Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
-# Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
-# Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
-# Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
-# """
-#
-# input_ids = tokenizer.encode(conversation, return_tensors='pt')
-#
-# output = model.generate(input_ids,
-#                         max_length=30,
-#                         num_return_sequences=1)
-#
-# caption = tokenizer.decode(output[0], skip_special_tokens=True)
-# print("Caption:", caption[len(input_ids):])
-
-#
-# # Approach 2
-# import torch
-# from transformers import GPT2LMHeadModel, GPT2Tokenizer
-#
-# model_name = "gpt2"
-# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-# model = GPT2LMHeadModel.from_pretrained(model_name)
-#
-# model.eval()
-#
-# text = """
-# You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
-# """
-#
-# tokenizer.pad_token = tokenizer.eos_token
-# input_ids = tokenizer.encode(text,
-#                              max_length=100,
-#                              truncation=True,
-#                              return_tensors="pt")
-# attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
-# output = model.generate(input_ids,
-#                         max_new_tokens=20,
-#                         num_return_sequences=1,
-#                         num_beams=2,
-#                         attention_mask=attention_mask)
-#
-# chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
-# for i, title in enumerate(chapter_titles):
-#     print("Caption: ", title)
-
-# Approach 3
-
-import torch
-from transformers import GPT2Tokenizer, GPT2LMHeadModel
-
-def generate_response(conversation, max_length=100):
-    input_text = ""
-    for entry in conversation:
-        role = entry["role"]
-        content = entry["content"]
-        input_text += f"{role}: {content}\n"
-
-    # Tokenize the entire conversation
-    input_ids = tokenizer.encode(input_text, return_tensors="pt")
-
-    # Generate text based on the entire conversation
-    with torch.no_grad():
-        output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
-
-    # Decode the generated text and return it
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    return response
-
-if __name__ == "__main__":
-    model_name = "gpt2"
-    model = GPT2LMHeadModel.from_pretrained(model_name)
-    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-
-    sample_chunks = [
-        "You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
-    ]
-
-    conversation = [
-        {"role": "system", "content": "Summarize this text" },
-        {"role": "user", "content": " text : " + sample_chunks[0]},
-    ]
-
-    response = generate_response(conversation)
-    print("Response:", response)
-
--- a/trials/server/init.py
+++ b/trials/server/init.py
--- a/trials/server/server_multithreaded.py
+++ b/trials/server/server_multithreaded.py
@@ -16,8 +16,8 @@ from av import AudioFifo
 from sortedcontainers import SortedDict
 from whisper_jax import FlaxWhisperPipline

-from utils.log_utils import logger
-from utils.run_utils import config, Mutex
+from reflector.utils.log_utils import logger
+from reflector.utils.run_utils import config, Mutex

 WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_REAL_TIME_MODEL_SIZE"]
 pcs = set()
--- a/trials/title_summary/init.py
+++ b/trials/title_summary/init.py
--- a/trials/title_summary/api.py
+++ b/trials/title_summary/api.py
@@ -0,0 +1,57 @@
+import requests
+import spacy
+
+# Enter the Machine where the LLM is hosted
+LLM_MACHINE_IP = ""
+# This is the URL of text-generation-webui
+URL = f"http://{LLM_MACHINE_IP}:5000/api/v1/generate"
+
+headers = {
+        "Content-Type": "application/json"
+}
+
+
+def split_text_file(filename, token_count):
+    nlp = spacy.load('en_core_web_md')
+
+    with open(filename, 'r') as file:
+        text = file.read()
+
+    doc = nlp(text)
+    total_tokens = len(doc)
+
+    parts = []
+    start_index = 0
+
+    while start_index < total_tokens:
+        end_index = start_index + token_count
+        part_tokens = doc[start_index:end_index - 5]
+        part = ' '.join(token.text for token in part_tokens)
+        parts.append(part)
+        start_index = end_index
+
+    return parts
+
+
+final_summary = ""
+parts = split_text_file("transcript.txt", 1600)
+
+for part in parts:
+    prompt = f"""
+              ### Human:
+             Given the following text, distill the most important information 
+             into a short summary:  {part}
+
+              ### Assistant:
+              """
+    data = {
+            "prompt": prompt
+    }
+    try:
+        response = requests.post(URL, headers=headers, json=data)
+        print(response.json())
+    except Exception as e:
+        print(str(e))
+
+with open("summary.txt", "w") as sum:
+    sum.write(" ".join(final_summary))
--- a/trials/title_summary/bert.py
+++ b/trials/title_summary/bert.py
@@ -0,0 +1,43 @@
+import torch
+from transformers import BertTokenizer, BertModel
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+
+# Load the pre-trained BERT model and tokenizer
+model_name = "bert-base-uncased"
+model = BertModel.from_pretrained(model_name)
+tokenizer = BertTokenizer.from_pretrained(model_name)
+
+# Set the device to use
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+
+# Load the SentenceTransformer model
+sentence_transformer_model = SentenceTransformer('average_word_embeddings_glove.6B.300d')
+
+# Define the input text
+text = "Your input text to be summarized goes here."
+
+# Tokenize the text
+tokens = tokenizer.tokenize(text)
+input_ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = torch.tensor([input_ids]).to(device)
+
+# Get the BERT model output
+with torch.no_grad():
+    outputs = model(input_ids)[0]  # Extract the last hidden states
+
+# Calculate sentence embeddings
+sentence_embeddings = outputs.mean(dim=1).squeeze().cpu().numpy()
+input_text_embedding = sentence_transformer_model.encode([text])[0]
+
+# Calculate cosine similarity between sentences and input text
+similarity_scores = cosine_similarity([input_text_embedding], sentence_embeddings)
+
+# Sort the sentences by similarity scores in descending order
+sorted_sentences = [sent for _, sent in sorted(zip(similarity_scores[0], sentences), reverse=True)]
+
+# Choose the top sentences as the summary
+num_summary_sentences = 2  # Adjust as needed
+summary = ". ".join(sorted_sentences[:num_summary_sentences])
+print("Summary:", summary)
--- a/trials/title_summary/gpt2.py
+++ b/trials/title_summary/gpt2.py
@@ -0,0 +1,101 @@
+# Approach 1
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+
+model_name = 'EleutherAI/gpt-neo-1.3B'
+tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+model = GPTNeoForCausalLM.from_pretrained(model_name)
+
+conversation = """
+Summarize the following conversation in 3 key sentences:
+
+We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
+Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
+Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
+Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
+Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
+Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
+"""
+
+input_ids = tokenizer.encode(conversation, return_tensors='pt')
+
+output = model.generate(input_ids,
+                        max_length=30,
+                        num_return_sequences=1)
+
+caption = tokenizer.decode(output[0], skip_special_tokens=True)
+print("Caption:", caption[len(input_ids):])
+
+
+# Approach 2
+import torch
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+
+model_name = "gpt2"
+tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+model = GPT2LMHeadModel.from_pretrained(model_name)
+
+model.eval()
+
+text = """
+You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
+"""
+
+tokenizer.pad_token = tokenizer.eos_token
+input_ids = tokenizer.encode(text,
+                             max_length=100,
+                             truncation=True,
+                             return_tensors="pt")
+attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
+output = model.generate(input_ids,
+                        max_new_tokens=20,
+                        num_return_sequences=1,
+                        num_beams=2,
+                        attention_mask=attention_mask)
+
+chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
+for i, title in enumerate(chapter_titles):
+    print("Caption: ", title)
+
+# Approach 3
+
+import torch
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+
+
+def generate_response(conversation, max_length=100):
+    input_text = ""
+    for entry in conversation:
+        role = entry["role"]
+        content = entry["content"]
+        input_text += f"{role}: {content}\n"
+
+    # Tokenize the entire conversation
+    input_ids = tokenizer.encode(input_text, return_tensors="pt")
+
+    # Generate text based on the entire conversation
+    with torch.no_grad():
+        output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
+
+    # Decode the generated text and return it
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return response
+
+
+if __name__ == "__main__":
+
+    # Call appropriate approach from the main while experimenting
+    model_name = "gpt2"
+    model = GPT2LMHeadModel.from_pretrained(model_name)
+    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+
+    sample_chunks = [
+            "You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
+    ]
+
+    conversation = [
+            {"role": "system", "content": "Summarize this text"},
+            {"role": "user", "content": " text : " + sample_chunks[0]},
+    ]
+
+    response = generate_response(conversation)
+    print("Response:", response)
--- a/trials/title_summary/incsum.py
+++ b/trials/title_summary/incsum.py
@@ -1,9 +1,11 @@
+import spacy
+import sys
+
+
 # Observe the incremental summaries by performing summaries in chunks
 with open("transcript.txt") as f:
    transcription = f.read()

-import spacy
-

 def split_text_file(filename, token_count):
    nlp = spacy.load('en_core_web_md')
@@ -26,8 +28,9 @@ def split_text_file(filename, token_count):

    return parts

+
 # Set the chunk length here to split the transcript and test
-MAX_CHUNK_LENGTH=1000
+MAX_CHUNK_LENGTH = 1000

 chunks = split_text_file("transcript.txt", MAX_CHUNK_LENGTH)
 print("Number of chunks", len(chunks))
@@ -41,19 +44,17 @@ with open("chunks" + str(MAX_CHUNK_LENGTH) + ".txt", "a") as f:
 # ex. python incsum.py 1 => will run approach 1
 # If no input, will run all approaches

-import sys
 try:
    index = sys.argv[1]
 except:
    index = None

-
 # Approach 1 : facebook/bart-large-cnn
 if index == "1" or index is None:
-    SUMMARY_MODEL="facebook/bart-large-cnn"
-    MIN_LENGTH=5
-    MAX_LENGTH=10
-    BEAM_SIZE=2
+    SUMMARY_MODEL = "facebook/bart-large-cnn"
+    MIN_LENGTH = 5
+    MAX_LENGTH = 10
+    BEAM_SIZE = 2

    print("Performing chunk summary : " + SUMMARY_MODEL)

@@ -81,7 +82,6 @@ if index == "1" or index is None:
        for summary in summaries:
            f.write(summary + "\n\n")

-
 # Approach 2
 if index == "2" or index is None:
    print("Performing chunk summary : " + "gpt-neo-1.3B")
@@ -108,14 +108,14 @@ if index == "2" or index is None:
                                max_length=max_length,
                                attention_mask=attention_mask,
                                pad_token_id=model.config.eos_token_id,
-                                 num_beams=4,
-                                 length_penalty=2.0,
-                                 early_stopping=True)
+                                num_beams=4,
+                                length_penalty=2.0,
+                                early_stopping=True)
        summary_ids = output[0, input_length:]
        summary = tokenizer.decode(summary_ids, skip_special_tokens=True)
        summaries.append(summary)
        with open("gptneo1.3B-summaries.txt", "a") as f:
-                f.write(summary + "\n\n")
+            f.write(summary + "\n\n")

 # Approach 3
 if index == "3" or index is None:
@@ -155,4 +155,3 @@ if index == "3" or index is None:
    with open("mpt-7b-summaries.txt", "a") as f:
        for summary in summaries:
            f.write(summary + "\n\n")
-
--- a/trials/title_summary/openai_endpoint.py
+++ b/trials/title_summary/openai_endpoint.py
@@ -0,0 +1,37 @@
+# Use OpenAI API endpoint to send data to OpenAI
+# along with prompts to caption/summarize the conversation
+
+import openai
+
+openai.api_key = ""
+
+# to caption, user prompt used : "caption this conversation"
+#             max_tokens=20
+
+# to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
+#               max_tokens=300
+
+sample_chunks = [
+        "You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
+        " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
+
+conversation = [
+        {"role": "system",
+         "content": sample_chunks[1]},
+        {"role": "user",
+         "content": "summarize this conversation in a few sentences by taking key points"}
+]
+
+model = "gpt-3.5-turbo"
+response = openai.ChatCompletion.create(model=model,
+                                        messages=conversation,
+                                        n=1,
+                                        max_tokens=300)
+
+# Try fine tuned model
+# model = "davinci:ft-personal-2023-07-14-10-43-51"
+# response = openai.Completion.create(model=model,
+#                                         prompt=sample_chunks[0] + " -> ")
+
+caption = response.choices[0]
+print(caption)
--- a/trials/title_summary/pegasus.py
+++ b/trials/title_summary/pegasus.py
@@ -0,0 +1,33 @@
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+# Load the Pegasus model and tokenizer
+model_name = "google/pegasus-large"
+model = PegasusForConditionalGeneration.from_pretrained(model_name)
+tokenizer = PegasusTokenizer.from_pretrained(model_name)
+
+# Set the device to use
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+
+sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
+                 " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
+
+
+# Define the input text for summarization
+text = sample_chunks[1]
+
+inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt").to(device)
+
+# Generate the summary
+summary_ids = model.generate(
+    inputs["input_ids"],
+    attention_mask=inputs["attention_mask"],
+    max_length=200,
+    num_beams=4,
+    length_penalty=2.0,
+    early_stopping=True,
+)
+
+# Decode and print the summary
+summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+print("Summary:", summary)
--- a/trials/title_summary/t5.py
+++ b/trials/title_summary/t5.py
@@ -1,36 +1,27 @@
-# Use OpenAI API endpoint to send data to OpenAI
-# along with prompts to caption/summarize the conversation
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+import torch
+# Load the T5 model and tokenizer
+model_name = "t5-base"
+model = T5ForConditionalGeneration.from_pretrained(model_name)
+tokenizer = T5Tokenizer.from_pretrained(model_name)

-import openai
-
-openai.api_key = ""
-
-# to caption, user prompt used : "caption this conversation"
-#             max_tokens=20
-
-# to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
-#               max_tokens=300
+# Set the device to use
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)

 sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple .  If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody .  Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me .  So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody .  And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
                 " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .  Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]

-conversation = [
-    {"role": "system",
-     "content":  sample_chunks[1]},
-    {"role": "user",
-     "content": "summarize this conversation in a few sentences by taking key points"}
-]

-model = "gpt-3.5-turbo"
-response = openai.ChatCompletion.create(model=model,
-                                        messages=conversation,
-                                        n=1,
-                                        max_tokens=300)
+# Define the input text for summarization
+text = "Summarize the following text in 3 key points. text : " + sample_chunks[1]

-# Try finetuned model
-# model = "davinci:ft-personal-2023-07-14-10-43-51"
-# response = openai.Completion.create(model=model,
-#                                         prompt=sample_chunks[0] + " -> ")
+# Tokenize the input text
+inputs = tokenizer.encode(text, return_tensors="pt").to(device)

-caption = response.choices[0]
-print(caption)
+# Generate the summary
+summary_ids = model.generate(inputs, max_length=1000, num_beams=4, early_stopping=True)
+
+# Decode and print the summary
+summary = tokenizer.decode(summary_ids.squeeze(), skip_special_tokens=True)
+print("Summary:", summary)
--- a/trials/title_summary/transcript.txt
+++ b/trials/title_summary/transcript.txt
--- a/trials/title_summary/vicuna.py
+++ b/trials/title_summary/vicuna.py
@@ -0,0 +1,44 @@
+from gpt4all import GPT4All
+
+model = GPT4All("/Users/gokulmohanarangan/Library/Application Support/nomic.ai/GPT4All/ggml-vicuna-13b-1.1-q4_2.bin")
+
+import spacy
+
+
+def split_text_file(filename, token_count):
+    nlp = spacy.load('en_core_web_md')
+
+    with open(filename, 'r') as file:
+        text = file.read()
+
+    doc = nlp(text)
+    total_tokens = len(doc)
+
+    parts = []
+    start_index = 0
+
+    while start_index < total_tokens:
+        end_index = start_index + token_count
+        part_tokens = doc[start_index:end_index]
+        part = ' '.join(token.text for token in part_tokens)
+        parts.append(part)
+        start_index = end_index
+
+    return parts
+
+parts = split_text_file("transcript.txt", 1800)
+final_summary = []
+for part in parts:
+       prompt = f"""
+              ### Human:
+              Summarize the following text without missing any key points and action items.
+                     
+              {part}
+              ### Assistant:
+              """
+       output = model.generate(prompt)
+       final_summary.append(output)
+
+
+with open("sum.txt", "w") as sum:
+       sum.write(" ".join(final_summary))
--- a/trials/whisper-jax/init.py
+++ b/trials/whisper-jax/init.py
--- a/trials/whisper-jax/whisjax.py
+++ b/trials/whisper-jax/whisjax.py
@@ -18,11 +18,11 @@ import nltk
 import yt_dlp as youtube_dl
 from whisper_jax import FlaxWhisperPipline

-from utils.file_utils import download_files, upload_files
-from utils.log_utils import logger
-from utils.run_utils import config
-from utils.text_utilities import post_process_transcription, summarize
-from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
+from ...utils.file_utils import download_files, upload_files
+from ...utils.log_utils import logger
+from ...utils.run_utils import config
+from ...utils.text_utils import post_process_transcription, summarize
+from ...utils.viz_utils import create_talk_diff_scatter_viz, create_wordcloud

 nltk.download('punkt', quiet=True)
 nltk.download('stopwords', quiet=True)
@@ -30,8 +30,8 @@ nltk.download('stopwords', quiet=True)
 WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
 NOW = datetime.now()

-if not os.path.exists('./artefacts'):
-    os.makedirs('./artefacts')
+if not os.path.exists('../../artefacts'):
+    os.makedirs('../../artefacts')


 def init_argparse() -> argparse.ArgumentParser:
@@ -91,7 +91,7 @@ def main():
            # Download the audio
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([args.location])
-            media_file = "./artefacts/audio.mp3"
+            media_file = "../artefacts/audio.mp3"

            logger.info("Saved downloaded YouTube video to: " + media_file)
        else:
--- a/trials/whisper-jax/whisjax_realtime.py
+++ b/trials/whisper-jax/whisjax_realtime.py
@@ -10,11 +10,11 @@ from pynput import keyboard
 from termcolor import colored
 from whisper_jax import FlaxWhisperPipline

-from utils.file_utils import upload_files
-from utils.log_utils import logger
-from utils.run_utils import config
-from utils.text_utilities import post_process_transcription, summarize
-from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
+from ...utils.file_utils import upload_files
+from ...utils.log_utils import logger
+from ...utils.run_utils import config
+from ...utils.text_utils import post_process_transcription, summarize
+from ...utils.viz_utils import create_talk_diff_scatter_viz, create_wordcloud

 WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]

--- a/utils/config.ini
+++ b/utils/config.ini
@@ -1,5 +1,6 @@
 [DEFAULT]
-#SetexceptionruleforOpenMPerrortoallowduplicatelibinitialization
+#Set exception rule for OpenMP error
+#to allow duplicate lib initialization
 KMP_DUPLICATE_LIB_OK=TRUE
 #ExportOpenAIAPIKey
 OPENAI_APIKEY=
@@ -7,8 +8,8 @@ OPENAI_APIKEY=
 WHISPER_MODEL_SIZE=tiny
 WHISPER_REAL_TIME_MODEL_SIZE=tiny
 #AWSconfig
-AWS_ACCESS_KEY=***REMOVED***
-AWS_SECRET_KEY=***REMOVED***
+AWS_ACCESS_KEY=
+AWS_SECRET_KEY=
 BUCKET_NAME=reflector-bucket
 #Summarizerconfig
 SUMMARY_MODEL=facebook/bart-large-cnn
@@ -17,8 +18,9 @@ MAX_LENGTH=2048
 BEAM_SIZE=6
 MAX_CHUNK_LENGTH=1024
 SUMMARIZE_USING_CHUNKS=YES
-#Audiodevice
+# Audiodevice
 BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME=aggregator
 AV_FOUNDATION_DEVICE_ID=1
-# LLM PATH
-LLM_PATH=
+# LLM configs
+LLM_MACHINE_IP=
+LLM_MACHINE_PORT=
--- a/utils/format_output.py
+++ b/utils/format_output.py
@@ -0,0 +1,32 @@
+import json
+
+with open("../artefacts/meeting_titles_and_summaries.txt", "r") as f:
+    outputs = f.read()
+
+outputs = json.loads(outputs)
+
+transcript_file = open("../artefacts/meeting_transcript.txt", "a")
+title_desc_file = open("../artefacts/meeting_title_description.txt", "a")
+summary_file = open("../artefacts/meeting_summary.txt", "a")
+
+for item in outputs["topics"]:
+    transcript_file.write(item["transcript"])
+    summary_file.write(item["description"])
+
+    title_desc_file.write("TITLE: \n")
+    title_desc_file.write(item["title"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("DESCRIPTION: \n")
+    title_desc_file.write(item["description"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("TRANSCRIPT: \n")
+    title_desc_file.write(item["transcript"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("---------------------------------------- \n\n")
+
+transcript_file.close()
+title_desc_file.close()
+summary_file.close()
--- a/utils/text_utilities.py
+++ b/utils/text_utilities.py
@@ -6,8 +6,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from transformers import BartForConditionalGeneration, BartTokenizer

-from utils.log_utils import logger
-from utils.run_utils import config
+from log_utils import logger
+from run_utils import config

 nltk.download('punkt', quiet=True)

@@ -154,7 +154,7 @@ def chunk_text(text,

 def summarize(transcript_text, timestamp,
              real_time=False,
-              summarize_using_chunks=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]):
+              chunk_summarize=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    summary_model = config["DEFAULT"]["SUMMARY_MODEL"]
    if not summary_model:
@@ -166,27 +166,35 @@ def summarize(transcript_text, timestamp,
    model = BartForConditionalGeneration.from_pretrained(summary_model)
    model = model.to(device)

-    output_filename = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
+    output_file = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    if real_time:
-        output_filename = "real_time_" + output_filename
+        output_file = "real_time_" + output_file

-    if summarize_using_chunks != "YES":
-        inputs = tokenizer.\
+    if chunk_summarize != "YES":
+        max_length = int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"])
+        inputs = tokenizer. \
            batch_encode_plus([transcript_text], truncation=True,
                              padding='longest',
-                              max_length=int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"]),
+                              max_length=max_length,
                              return_tensors='pt')
        inputs = inputs.to(device)

        with torch.no_grad():
+            num_beans = int(config["DEFAULT"]["BEAM_SIZE"])
+            max_length = int(config["DEFAULT"]["MAX_LENGTH"])
            summaries = model.generate(inputs['input_ids'],
-                                       num_beams=int(config["DEFAULT"]["BEAM_SIZE"]), length_penalty=2.0,
-                                       max_length=int(config["DEFAULT"]["MAX_LENGTH"]), early_stopping=True)
+                                       num_beams=num_beans,
+                                       length_penalty=2.0,
+                                       max_length=max_length,
+                                       early_stopping=True)

-        decoded_summaries = [tokenizer.decode(summary, skip_special_tokens=True, clean_up_tokenization_spaces=False)
-                             for summary in summaries]
+        decoded_summaries = \
+            [tokenizer.decode(summary,
+                              skip_special_tokens=True,
+                              clean_up_tokenization_spaces=False)
+             for summary in summaries]
        summary = " ".join(decoded_summaries)
-        with open("./artefacts/" + output_filename, 'w') as f:
+        with open("./artefacts/" + output_file, 'w') as f:
            f.write(summary.strip() + "\n")
    else:
        logger.info("Breaking transcript into smaller chunks")
@@ -195,8 +203,8 @@ def summarize(transcript_text, timestamp,
        logger.info(f"Transcript broken into {len(chunks)} "
                    f"chunks of at most 500 words")

-        logger.info(f"Writing summary text to: {output_filename}")
-        with open(output_filename, 'w') as f:
+        logger.info(f"Writing summary text to: {output_file}")
+        with open(output_file, 'w') as f:
            summaries = summarize_chunks(chunks, tokenizer, model)
            for summary in summaries:
                f.write(summary.strip() + " ")
--- a/utils/viz_utilities.py
+++ b/utils/viz_utilities.py
@@ -13,7 +13,7 @@ from wordcloud import STOPWORDS, WordCloud
 en = spacy.load('en_core_web_md')
 spacy_stopwords = en.Defaults.stop_words

-STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))).\
+STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))). \
    union(set(spacy_stopwords))


@@ -24,7 +24,7 @@ def create_wordcloud(timestamp, real_time=False):
    """
    filename = "transcript"
    if real_time:
-        filename = "real_time_" + filename + "_" +\
+        filename = "real_time_" + filename + "_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    else:
        filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -45,24 +45,24 @@ def create_wordcloud(timestamp, real_time=False):
    plt.axis("off")
    plt.tight_layout(pad=0)

-    wordcloud_name = "wordcloud"
+    wordcloud = "wordcloud"
    if real_time:
-        wordcloud_name = "real_time_" + wordcloud_name + "_" +\
+        wordcloud = "real_time_" + wordcloud + "_" + \
                         timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
    else:
-        wordcloud_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
+        wordcloud += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"

-    plt.savefig("./artefacts/" + wordcloud_name)
+    plt.savefig("./artefacts/" + wordcloud)


 def create_talk_diff_scatter_viz(timestamp, real_time=False):
    """
-    Perform agenda vs transription diff to see covered topics.
+    Perform agenda vs transcription diff to see covered topics.
    Create a scatter plot of words in topics.
    :return: None. Saved locally.
    """
-    spaCy_model = "en_core_web_md"
-    nlp = spacy.load(spaCy_model)
+    spacy_model = "en_core_web_md"
+    nlp = spacy.load(spacy_model)
    nlp.add_pipe('sentencizer')

    agenda_topics = []
@@ -75,12 +75,11 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
                agenda_topics.append(line.split(":")[0])

    # Load the transcription with timestamp
-    filename = ""
    if real_time:
-        filename = "./artefacts/real_time_transcript_with_timestamp_" +\
+        filename = "./artefacts/real_time_transcript_with_timestamp_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    else:
-        filename = "./artefacts/transcript_with_timestamp_" +\
+        filename = "./artefacts/transcript_with_timestamp_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    with open(filename) as f:
        transcription_timestamp_text = f.read()
@@ -142,7 +141,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):

    df = df.apply(create_new_columns, axis=1)

-    # Count the number of items covered and calculatre the percentage
+    # Count the number of items covered and calculate the percentage
    num_covered_items = sum(covered_items.values())
    percentage_covered = num_covered_items / len(agenda) * 100

@@ -158,7 +157,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
    # Save df, mappings for further experimentation
    df_name = "df"
    if real_time:
-        df_name = "real_time_" + df_name + "_" +\
+        df_name = "real_time_" + df_name + "_" + \
                  timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
    else:
        df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
@@ -169,7 +168,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):

    mappings_name = "mappings"
    if real_time:
-        mappings_name = "real_time_" + mappings_name + "_" +\
+        mappings_name = "real_time_" + mappings_name + "_" + \
                        timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
    else:
        mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"