Merge pull request #40 from Monadical-SAS/feat/gokul

Code refactor and cleanup from Feat/gokul
This commit is contained in:
projects-g
2023-07-25 13:56:14 +05:30
committed by GitHub
33 changed files with 700 additions and 242 deletions

2
.gitignore vendored
View File

@@ -165,7 +165,7 @@ cython_debug/
transcript_*.txt transcript_*.txt
test_*.txt test_*.txt
wordcloud*.png wordcloud*.png
*.ini utils/config.ini
test_samples/ test_samples/
*.wav *.wav
*.mp3 *.mp3

View File

@@ -5,15 +5,15 @@ import signal
from aiortc.contrib.signaling import (add_signaling_arguments, from aiortc.contrib.signaling import (add_signaling_arguments,
create_signaling) create_signaling)
from stream_client import StreamClient
from utils.log_utils import logger from utils.log_utils import logger
from stream_client import StreamClient
async def main(): async def main():
parser = argparse.ArgumentParser(description="Data channels ping/pong") parser = argparse.ArgumentParser(description="Data channels ping/pong")
parser.add_argument( parser.add_argument(
"--url", type=str, nargs="?", default="http://127.0.0.1:1250/offer" "--url", type=str, nargs="?", default="http://0.0.0.0:1250/offer"
) )
parser.add_argument( parser.add_argument(

View File

@@ -2,8 +2,6 @@ pyaudio==0.2.13
keyboard==0.13.5 keyboard==0.13.5
pynput==1.7.6 pynput==1.7.6
wave==0.0.2 wave==0.0.2
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2 async-timeout==4.0.2
attrs==23.1.0 attrs==23.1.0
certifi==2023.5.7 certifi==2023.5.7
@@ -51,11 +49,8 @@ matplotlib==3.7.2
matplotlib-inline==0.1.6 matplotlib-inline==0.1.6
termcolor==2.3.0 termcolor==2.3.0
ffmpeg==1.4 ffmpeg==1.4
aiortc==1.5.0
cached_property==1.5.2 cached_property==1.5.2
stamina==23.1.0 stamina==23.1.0
httpx==0.24.1 httpx==0.24.1
sortedcontainers==2.4.0
https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
gpt4all==1.0.5 gpt4all==1.0.5
aiohttp_cors==0.7.0

View File

@@ -26,7 +26,7 @@ pip install git+https://github.com/sanchit-gandhi/whisper-jax.git
# Update to latest version # Update to latest version
pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git
pip install -r ../requirements.txt pip install -r ../server-requirements.txt
# download spacy models # download spacy models
spacy download en_core_web_sm spacy download en_core_web_sm

View File

@@ -0,0 +1,4 @@
#!/bin/sh
pip install --upgrade pip
pip install -r ../server-requirements.txt

50
server-requirements.txt Normal file
View File

@@ -0,0 +1,50 @@
aiohttp==3.8.5
aiohttp-cors==0.7.0
aioice==0.9.0
aiortc==1.5.0
aiosignal==1.3.1
anyio==3.7.1
async-timeout==4.0.2
attrs==23.1.0
av==10.0.0
certifi==2023.7.22
cffi==1.15.1
charset-normalizer==3.2.0
coloredlogs==15.0.1
cryptography==41.0.2
ctranslate2==3.17.1
dnspython==2.4.0
faster-whisper==0.7.1
filelock==3.12.2
flatbuffers==23.5.26
frozenlist==1.4.0
fsspec==2023.6.0
google-crc32c==1.5.0
h11==0.14.0
httpcore==0.17.3
huggingface-hub==0.16.4
humanfriendly==10.0
idna==3.4
ifaddr==0.2.0
loguru==0.7.0
mpmath==1.3.0
multidict==6.0.4
numpy==1.25.1
onnxruntime==1.15.1
packaging==23.1
protobuf==4.23.4
pycparser==2.21
pyee==11.0.0
pylibsrtp==0.8.0
pyOpenSSL==23.2.0
PyYAML==6.0.1
requests==2.31.0
sniffio==1.3.0
sortedcontainers==2.4.0
sympy==1.12
tokenizers==0.13.3
tqdm==4.65.0
typing_extensions==4.7.1
urllib3==2.0.4
yarl==1.9.2
wave==0.0.2

View File

@@ -1,29 +1,30 @@
import argparse
import asyncio import asyncio
import datetime import datetime
import io
import json import json
import os
import uuid import uuid
import wave import wave
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import aiohttp_cors import aiohttp_cors
import jax.numpy as jnp
import requests import requests
from aiohttp import web from aiohttp import web
from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
from aiortc.contrib.media import MediaRelay from aiortc.contrib.media import MediaRelay
from av import AudioFifo from av import AudioFifo
from faster_whisper import WhisperModel
from loguru import logger from loguru import logger
from whisper_jax import FlaxWhisperPipline
from utils.run_utils import run_in_executor
from sortedcontainers import SortedDict from sortedcontainers import SortedDict
from utils.run_utils import run_in_executor, config
pcs = set() pcs = set()
relay = MediaRelay() relay = MediaRelay()
data_channel = None data_channel = None
pipeline = FlaxWhisperPipline("openai/whisper-tiny", model = WhisperModel("tiny", device="cpu",
dtype=jnp.float16, compute_type="float32",
batch_size=16) num_workers=12)
CHANNELS = 2 CHANNELS = 2
RATE = 48000 RATE = 48000
@@ -31,8 +32,8 @@ audio_buffer = AudioFifo()
executor = ThreadPoolExecutor() executor = ThreadPoolExecutor()
transcription_text = "" transcription_text = ""
last_transcribed_time = 0.0 last_transcribed_time = 0.0
LLM_MACHINE_IP = "216.153.52.83" LLM_MACHINE_IP = config["DEFAULT"]["LLM_MACHINE_IP"]
LLM_MACHINE_PORT = "5000" LLM_MACHINE_PORT = config["DEFAULT"]["LLM_MACHINE_PORT"]
LLM_URL = f"http://{LLM_MACHINE_IP}:{LLM_MACHINE_PORT}/api/v1/generate" LLM_URL = f"http://{LLM_MACHINE_IP}:{LLM_MACHINE_PORT}/api/v1/generate"
incremental_responses = [] incremental_responses = []
sorted_transcripts = SortedDict() sorted_transcripts = SortedDict()
@@ -43,7 +44,7 @@ blacklisted_messages = [" Thank you.", " See you next time!",
def get_title_and_summary(llm_input_text, last_timestamp): def get_title_and_summary(llm_input_text, last_timestamp):
print("Generating title and summary") logger.info("Generating title and summary")
# output = llm.generate(prompt) # output = llm.generate(prompt)
# Use monadical-ml to fire this query to an LLM and get result # Use monadical-ml to fire this query to an LLM and get result
@@ -67,7 +68,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
"prompt": prompt "prompt": prompt
} }
# To-do: Handle unexpected output formats from the model # TODO : Handle unexpected output formats from the model
try: try:
response = requests.post(LLM_URL, headers=headers, json=data) response = requests.post(LLM_URL, headers=headers, json=data)
output = json.loads(response.json()["results"][0]["text"]) output = json.loads(response.json()["results"][0]["text"])
@@ -80,14 +81,15 @@ def get_title_and_summary(llm_input_text, last_timestamp):
"cmd": "UPDATE_TOPICS", "cmd": "UPDATE_TOPICS",
"topics": incremental_responses, "topics": incremental_responses,
} }
except Exception as e: except Exception as e:
print("Exception" + str(e)) logger.info("Exception" + str(e))
result = None result = None
return result return result
def channel_log(channel, t, message): def channel_log(channel, t, message):
print("channel(%s) %s %s" % (channel.label, t, message)) logger.info("channel(%s) %s %s" % (channel.label, t, message))
def channel_send(channel, message): def channel_send(channel, message):
@@ -113,18 +115,25 @@ def channel_send_transcript(channel):
# Due to exceptions if one of the earlier batches can't return # Due to exceptions if one of the earlier batches can't return
# a transcript, we don't want to be stuck waiting for the result # a transcript, we don't want to be stuck waiting for the result
# With the threshold size of 3, we pop the first(lost) element # With the threshold size of 3, we pop the first(lost) element
elif len(sorted_transcripts) >= 3: else:
if len(sorted_transcripts) >= 3:
del sorted_transcripts[least_time] del sorted_transcripts[least_time]
except Exception as e: except Exception as e:
print("Exception", str(e)) logger.info("Exception", str(e))
pass pass
def get_transcription(frames): def get_transcription(frames):
print("Transcribing..") logger.info("Transcribing..")
sorted_transcripts[frames[0].time] = None sorted_transcripts[frames[0].time] = None
out_file = io.BytesIO()
wf = wave.open(out_file, "wb") # TODO:
# Passing IO objects instead of temporary files throws an error
# Passing ndarrays (typecasted with float) does not give any
# transcription. Refer issue,
# https://github.com/guillaumekln/faster-whisper/issues/369
audiofilename = "test" + str(datetime.datetime.now())
wf = wave.open(audiofilename, "wb")
wf.setnchannels(CHANNELS) wf.setnchannels(CHANNELS)
wf.setframerate(RATE) wf.setframerate(RATE)
wf.setsampwidth(2) wf.setsampwidth(2)
@@ -133,22 +142,40 @@ def get_transcription(frames):
wf.writeframes(b"".join(frame.to_ndarray())) wf.writeframes(b"".join(frame.to_ndarray()))
wf.close() wf.close()
# To-Do: Look into WhisperTimeStampLogitsProcessor exception result_text = ""
try:
whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
except Exception as e:
return
global transcription_text, last_transcribed_time try:
transcription_text += whisper_result["text"] segments, _ = \
duration = whisper_result["chunks"][0]["timestamp"][1] model.transcribe(audiofilename,
if not duration: language="en",
duration = 5.0 beam_size=5,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500))
os.remove(audiofilename)
segments = list(segments)
result_text = ""
duration = 0.0
for segment in segments:
result_text += segment.text
start_time = segment.start
end_time = segment.end
if not segment.start:
start_time = 0.0
if not segment.end:
end_time = 5.5
duration += (end_time - start_time)
global last_transcribed_time, transcription_text
last_transcribed_time += duration last_transcribed_time += duration
transcription_text += result_text
except Exception as e:
logger.info("Exception" + str(e))
pass
result = { result = {
"cmd": "SHOW_TRANSCRIPTION", "cmd": "SHOW_TRANSCRIPTION",
"text": whisper_result["text"] "text": result_text
} }
sorted_transcripts[frames[0].time] = result sorted_transcripts[frames[0].time] = result
return result return result
@@ -167,6 +194,9 @@ def get_final_summary_response():
seconds=round(last_transcribed_time))), seconds=round(last_transcribed_time))),
"summary": final_summary "summary": final_summary
} }
with open("./artefacts/meeting_titles_and_summaries.txt", "a") as f:
f.write(json.dumps(incremental_responses))
return response return response
@@ -196,7 +226,7 @@ class AudioStreamTrack(MediaStreamTrack):
else None else None
) )
if len(transcription_text) > 500: if len(transcription_text) > 750:
llm_input_text = transcription_text llm_input_text = transcription_text
transcription_text = "" transcription_text = ""
llm_result = run_in_executor(get_title_and_summary, llm_result = run_in_executor(get_title_and_summary,
@@ -245,7 +275,6 @@ async def offer(request):
if isinstance(message, str) and message.startswith("ping"): if isinstance(message, str) and message.startswith("ping"):
channel_send(channel, "pong" + message[4:]) channel_send(channel, "pong" + message[4:])
@pc.on("connectionstatechange") @pc.on("connectionstatechange")
async def on_connectionstatechange(): async def on_connectionstatechange():
log_info("Connection state is " + pc.connectionState) log_info("Connection state is " + pc.connectionState)
@@ -278,6 +307,16 @@ async def on_shutdown(app):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="WebRTC based server for Reflector"
)
parser.add_argument(
"--host", default="0.0.0.0", help="Server host IP (def: 0.0.0.0)"
)
parser.add_argument(
"--port", type=int, default=1250, help="Server port (def: 1250)"
)
args = parser.parse_args()
app = web.Application() app = web.Application()
cors = aiohttp_cors.setup( cors = aiohttp_cors.setup(
app, app,
@@ -293,4 +332,4 @@ if __name__ == "__main__":
offer_resource = cors.add(app.router.add_resource("/offer")) offer_resource = cors.add(app.router.add_resource("/offer"))
cors.add(offer_resource.add_route("POST", offer)) cors.add(offer_resource.add_route("POST", offer))
app.on_shutdown.append(on_shutdown) app.on_shutdown.append(on_shutdown)
web.run_app(app, access_log=None, host="127.0.0.1", port=1250) web.run_app(app, access_log=None, host=args.host, port=args.port)

View File

@@ -17,7 +17,7 @@ class StreamClient:
def __init__( def __init__(
self, self,
signaling, signaling,
url="http://127.0.0.1:1250", url="http://0.0.0.0:1250",
play_from=None, play_from=None,
ping_pong=False ping_pong=False
): ):

0
trials/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,24 @@
# Steps to prepare data and submit/check OpenAI finetuning
# import subprocess
# subprocess.run("openai tools fine_tunes.prepare_data -f " + "finetuning_dataset.jsonl")
# export OPENAI_API_KEY=
# openai api fine_tunes.create -t <TRAIN_FILE_ID_OR_PATH> -m <BASE_MODEL>
# openai api fine_tunes.list
import openai
# Use your OpenAI API Key
openai.api_key = ""
sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . -> ",
" We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude . Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas . - > "]
# Give your finetuned model name here
# "davinci:ft-personal-2023-07-14-10-43-51"
model_name = ""
response = openai.Completion.create(
model=model_name,
prompt=sample_chunks[0])
print(response)

View File

@@ -0,0 +1,98 @@
import json
import yt_dlp as youtube_dl
from whisper_jax import FlaxWhisperPipline
import jax.numpy as jnp
# Function to extract chapter information from a YouTube video URL
def get_youtube_chapters(video_id):
video_url = "https://www.youtube.com/watch?v=" + video_id
ydl_opts = {
'extract_flat': 'in_playlist',
'skip_download': True,
'quiet': True,
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
video_info = ydl.extract_info(video_url, download=False)
chapters = []
if 'chapters' in video_info:
for chapter in video_info['chapters']:
start_time = chapter['start_time']
end_time = chapter['end_time']
title = chapter['title']
chapters.append({
'start': start_time,
'end': end_time,
'title': title
})
return chapters
# Function to extract video transcription using yt_dlp
def get_youtube_transcription(video_id):
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'outtmpl': './artefacts/audio', # Specify output file path and name
}
# Download the audio
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(["https://www.youtube.com/watch?v=" + video_id])
media_file = "./artefacts/audio.mp3"
pipeline = FlaxWhisperPipline("openai/whisper-" + "tiny",
dtype=jnp.float16,
batch_size=16)
whisper_result = pipeline(media_file, return_timestamps=True)
return whisper_result["chunks"]
# Function to scrape YouTube video transcripts and chapter information
def scrape_youtube_data(video_id):
transcript_text = get_youtube_transcription(video_id)
chapters = get_youtube_chapters(video_id)
print("transcript_text", transcript_text)
print("chapters", chapters)
return transcript_text, chapters
# Function to generate fine-tuning dataset from YouTube data
def generate_finetuning_dataset(video_ids):
prompt_completion_pairs = []
for video_id in video_ids:
transcript_text, chapters = scrape_youtube_data(video_id)
if transcript_text is not None and chapters is not None:
for chapter in chapters:
start_time = chapter["start"]
end_time = chapter["end"]
chapter_text = chapter["title"]
prompt = ""
for transcript in transcript_text:
if transcript["timestamp"][0] >= start_time and transcript["timestamp"][1] < end_time:
prompt += transcript["text"]
if prompt is not None:
completion = chapter_text
prompt_completion_pairs.append({"prompt": prompt, "completion": completion})
return prompt_completion_pairs
# Add all the video ids here, the videos must have captions [chapters]
video_ids = ["yTnSEZIwnkU"]
dataset = generate_finetuning_dataset(video_ids)
with open("finetuning_dataset.jsonl", "w") as f:
for example in dataset:
f.write(json.dumps(example) + "\n")

View File

@@ -1,98 +0,0 @@
# # Approach 1
# from transformers import GPTNeoForCausalLM, GPT2Tokenizer
#
# model_name = 'EleutherAI/gpt-neo-1.3B'
# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# model = GPTNeoForCausalLM.from_pretrained(model_name)
#
# conversation = """
# Summarize the following conversation in 3 key sentences:
#
# We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
# Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
# Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
# Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
# Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
# Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
# """
#
# input_ids = tokenizer.encode(conversation, return_tensors='pt')
#
# output = model.generate(input_ids,
# max_length=30,
# num_return_sequences=1)
#
# caption = tokenizer.decode(output[0], skip_special_tokens=True)
# print("Caption:", caption[len(input_ids):])
#
# # Approach 2
# import torch
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
#
# model_name = "gpt2"
# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# model = GPT2LMHeadModel.from_pretrained(model_name)
#
# model.eval()
#
# text = """
# You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
# """
#
# tokenizer.pad_token = tokenizer.eos_token
# input_ids = tokenizer.encode(text,
# max_length=100,
# truncation=True,
# return_tensors="pt")
# attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
# output = model.generate(input_ids,
# max_new_tokens=20,
# num_return_sequences=1,
# num_beams=2,
# attention_mask=attention_mask)
#
# chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
# for i, title in enumerate(chapter_titles):
# print("Caption: ", title)
# Approach 3
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
def generate_response(conversation, max_length=100):
input_text = ""
for entry in conversation:
role = entry["role"]
content = entry["content"]
input_text += f"{role}: {content}\n"
# Tokenize the entire conversation
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# Generate text based on the entire conversation
with torch.no_grad():
output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
# Decode the generated text and return it
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response
if __name__ == "__main__":
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
sample_chunks = [
"You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
]
conversation = [
{"role": "system", "content": "Summarize this text" },
{"role": "user", "content": " text : " + sample_chunks[0]},
]
response = generate_response(conversation)
print("Response:", response)

View File

View File

@@ -16,8 +16,8 @@ from av import AudioFifo
from sortedcontainers import SortedDict from sortedcontainers import SortedDict
from whisper_jax import FlaxWhisperPipline from whisper_jax import FlaxWhisperPipline
from utils.log_utils import logger from reflector.utils.log_utils import logger
from utils.run_utils import config, Mutex from reflector.utils.run_utils import config, Mutex
WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_REAL_TIME_MODEL_SIZE"] WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_REAL_TIME_MODEL_SIZE"]
pcs = set() pcs = set()

View File

View File

@@ -0,0 +1,57 @@
import requests
import spacy
# Enter the Machine where the LLM is hosted
LLM_MACHINE_IP = ""
# This is the URL of text-generation-webui
URL = f"http://{LLM_MACHINE_IP}:5000/api/v1/generate"
headers = {
"Content-Type": "application/json"
}
def split_text_file(filename, token_count):
nlp = spacy.load('en_core_web_md')
with open(filename, 'r') as file:
text = file.read()
doc = nlp(text)
total_tokens = len(doc)
parts = []
start_index = 0
while start_index < total_tokens:
end_index = start_index + token_count
part_tokens = doc[start_index:end_index - 5]
part = ' '.join(token.text for token in part_tokens)
parts.append(part)
start_index = end_index
return parts
final_summary = ""
parts = split_text_file("transcript.txt", 1600)
for part in parts:
prompt = f"""
### Human:
Given the following text, distill the most important information
into a short summary: {part}
### Assistant:
"""
data = {
"prompt": prompt
}
try:
response = requests.post(URL, headers=headers, json=data)
print(response.json())
except Exception as e:
print(str(e))
with open("summary.txt", "w") as sum:
sum.write(" ".join(final_summary))

View File

@@ -0,0 +1,43 @@
import torch
from transformers import BertTokenizer, BertModel
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# Load the pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
model = BertModel.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)
# Set the device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Load the SentenceTransformer model
sentence_transformer_model = SentenceTransformer('average_word_embeddings_glove.6B.300d')
# Define the input text
text = "Your input text to be summarized goes here."
# Tokenize the text
tokens = tokenizer.tokenize(text)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = torch.tensor([input_ids]).to(device)
# Get the BERT model output
with torch.no_grad():
outputs = model(input_ids)[0] # Extract the last hidden states
# Calculate sentence embeddings
sentence_embeddings = outputs.mean(dim=1).squeeze().cpu().numpy()
input_text_embedding = sentence_transformer_model.encode([text])[0]
# Calculate cosine similarity between sentences and input text
similarity_scores = cosine_similarity([input_text_embedding], sentence_embeddings)
# Sort the sentences by similarity scores in descending order
sorted_sentences = [sent for _, sent in sorted(zip(similarity_scores[0], sentences), reverse=True)]
# Choose the top sentences as the summary
num_summary_sentences = 2 # Adjust as needed
summary = ". ".join(sorted_sentences[:num_summary_sentences])
print("Summary:", summary)

View File

@@ -0,0 +1,101 @@
# Approach 1
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
model_name = 'EleutherAI/gpt-neo-1.3B'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPTNeoForCausalLM.from_pretrained(model_name)
conversation = """
Summarize the following conversation in 3 key sentences:
We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
"""
input_ids = tokenizer.encode(conversation, return_tensors='pt')
output = model.generate(input_ids,
max_length=30,
num_return_sequences=1)
caption = tokenizer.decode(output[0], skip_special_tokens=True)
print("Caption:", caption[len(input_ids):])
# Approach 2
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()
text = """
You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
"""
tokenizer.pad_token = tokenizer.eos_token
input_ids = tokenizer.encode(text,
max_length=100,
truncation=True,
return_tensors="pt")
attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
output = model.generate(input_ids,
max_new_tokens=20,
num_return_sequences=1,
num_beams=2,
attention_mask=attention_mask)
chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
for i, title in enumerate(chapter_titles):
print("Caption: ", title)
# Approach 3
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
def generate_response(conversation, max_length=100):
input_text = ""
for entry in conversation:
role = entry["role"]
content = entry["content"]
input_text += f"{role}: {content}\n"
# Tokenize the entire conversation
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# Generate text based on the entire conversation
with torch.no_grad():
output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
# Decode the generated text and return it
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response
if __name__ == "__main__":
# Call appropriate approach from the main while experimenting
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
sample_chunks = [
"You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
]
conversation = [
{"role": "system", "content": "Summarize this text"},
{"role": "user", "content": " text : " + sample_chunks[0]},
]
response = generate_response(conversation)
print("Response:", response)

View File

@@ -1,9 +1,11 @@
import spacy
import sys
# Observe the incremental summaries by performing summaries in chunks # Observe the incremental summaries by performing summaries in chunks
with open("transcript.txt") as f: with open("transcript.txt") as f:
transcription = f.read() transcription = f.read()
import spacy
def split_text_file(filename, token_count): def split_text_file(filename, token_count):
nlp = spacy.load('en_core_web_md') nlp = spacy.load('en_core_web_md')
@@ -26,6 +28,7 @@ def split_text_file(filename, token_count):
return parts return parts
# Set the chunk length here to split the transcript and test # Set the chunk length here to split the transcript and test
MAX_CHUNK_LENGTH = 1000 MAX_CHUNK_LENGTH = 1000
@@ -41,13 +44,11 @@ with open("chunks" + str(MAX_CHUNK_LENGTH) + ".txt", "a") as f:
# ex. python incsum.py 1 => will run approach 1 # ex. python incsum.py 1 => will run approach 1
# If no input, will run all approaches # If no input, will run all approaches
import sys
try: try:
index = sys.argv[1] index = sys.argv[1]
except: except:
index = None index = None
# Approach 1 : facebook/bart-large-cnn # Approach 1 : facebook/bart-large-cnn
if index == "1" or index is None: if index == "1" or index is None:
SUMMARY_MODEL = "facebook/bart-large-cnn" SUMMARY_MODEL = "facebook/bart-large-cnn"
@@ -81,7 +82,6 @@ if index == "1" or index is None:
for summary in summaries: for summary in summaries:
f.write(summary + "\n\n") f.write(summary + "\n\n")
# Approach 2 # Approach 2
if index == "2" or index is None: if index == "2" or index is None:
print("Performing chunk summary : " + "gpt-neo-1.3B") print("Performing chunk summary : " + "gpt-neo-1.3B")
@@ -155,4 +155,3 @@ if index == "3" or index is None:
with open("mpt-7b-summaries.txt", "a") as f: with open("mpt-7b-summaries.txt", "a") as f:
for summary in summaries: for summary in summaries:
f.write(summary + "\n\n") f.write(summary + "\n\n")

View File

@@ -0,0 +1,37 @@
# Use OpenAI API endpoint to send data to OpenAI
# along with prompts to caption/summarize the conversation
import openai
openai.api_key = ""
# to caption, user prompt used : "caption this conversation"
# max_tokens=20
# to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
# max_tokens=300
sample_chunks = [
"You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
" We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude . Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
conversation = [
{"role": "system",
"content": sample_chunks[1]},
{"role": "user",
"content": "summarize this conversation in a few sentences by taking key points"}
]
model = "gpt-3.5-turbo"
response = openai.ChatCompletion.create(model=model,
messages=conversation,
n=1,
max_tokens=300)
# Try fine tuned model
# model = "davinci:ft-personal-2023-07-14-10-43-51"
# response = openai.Completion.create(model=model,
# prompt=sample_chunks[0] + " -> ")
caption = response.choices[0]
print(caption)

View File

@@ -0,0 +1,33 @@
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import torch
# Load the Pegasus model and tokenizer
model_name = "google/pegasus-large"
model = PegasusForConditionalGeneration.from_pretrained(model_name)
tokenizer = PegasusTokenizer.from_pretrained(model_name)
# Set the device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
" We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude . Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
# Define the input text for summarization
text = sample_chunks[1]
inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt").to(device)
# Generate the summary
summary_ids = model.generate(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_length=200,
num_beams=4,
length_penalty=2.0,
early_stopping=True,
)
# Decode and print the summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Summary:", summary)

View File

@@ -1,36 +1,27 @@
# Use OpenAI API endpoint to send data to OpenAI from transformers import T5ForConditionalGeneration, T5Tokenizer
# along with prompts to caption/summarize the conversation import torch
# Load the T5 model and tokenizer
model_name = "t5-base"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)
import openai # Set the device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
openai.api_key = "" model.to(device)
# to caption, user prompt used : "caption this conversation"
# max_tokens=20
# to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
# max_tokens=300
sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ", sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
" We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude . Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."] " We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude . Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
conversation = [
{"role": "system",
"content": sample_chunks[1]},
{"role": "user",
"content": "summarize this conversation in a few sentences by taking key points"}
]
model = "gpt-3.5-turbo" # Define the input text for summarization
response = openai.ChatCompletion.create(model=model, text = "Summarize the following text in 3 key points. text : " + sample_chunks[1]
messages=conversation,
n=1,
max_tokens=300)
# Try finetuned model # Tokenize the input text
# model = "davinci:ft-personal-2023-07-14-10-43-51" inputs = tokenizer.encode(text, return_tensors="pt").to(device)
# response = openai.Completion.create(model=model,
# prompt=sample_chunks[0] + " -> ")
caption = response.choices[0] # Generate the summary
print(caption) summary_ids = model.generate(inputs, max_length=1000, num_beams=4, early_stopping=True)
# Decode and print the summary
summary = tokenizer.decode(summary_ids.squeeze(), skip_special_tokens=True)
print("Summary:", summary)

View File

@@ -0,0 +1,44 @@
from gpt4all import GPT4All
model = GPT4All("/Users/gokulmohanarangan/Library/Application Support/nomic.ai/GPT4All/ggml-vicuna-13b-1.1-q4_2.bin")
import spacy
def split_text_file(filename, token_count):
nlp = spacy.load('en_core_web_md')
with open(filename, 'r') as file:
text = file.read()
doc = nlp(text)
total_tokens = len(doc)
parts = []
start_index = 0
while start_index < total_tokens:
end_index = start_index + token_count
part_tokens = doc[start_index:end_index]
part = ' '.join(token.text for token in part_tokens)
parts.append(part)
start_index = end_index
return parts
parts = split_text_file("transcript.txt", 1800)
final_summary = []
for part in parts:
prompt = f"""
### Human:
Summarize the following text without missing any key points and action items.
{part}
### Assistant:
"""
output = model.generate(prompt)
final_summary.append(output)
with open("sum.txt", "w") as sum:
sum.write(" ".join(final_summary))

View File

View File

@@ -18,11 +18,11 @@ import nltk
import yt_dlp as youtube_dl import yt_dlp as youtube_dl
from whisper_jax import FlaxWhisperPipline from whisper_jax import FlaxWhisperPipline
from utils.file_utils import download_files, upload_files from ...utils.file_utils import download_files, upload_files
from utils.log_utils import logger from ...utils.log_utils import logger
from utils.run_utils import config from ...utils.run_utils import config
from utils.text_utilities import post_process_transcription, summarize from ...utils.text_utils import post_process_transcription, summarize
from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud from ...utils.viz_utils import create_talk_diff_scatter_viz, create_wordcloud
nltk.download('punkt', quiet=True) nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True) nltk.download('stopwords', quiet=True)
@@ -30,8 +30,8 @@ nltk.download('stopwords', quiet=True)
WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"] WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
NOW = datetime.now() NOW = datetime.now()
if not os.path.exists('./artefacts'): if not os.path.exists('../../artefacts'):
os.makedirs('./artefacts') os.makedirs('../../artefacts')
def init_argparse() -> argparse.ArgumentParser: def init_argparse() -> argparse.ArgumentParser:
@@ -91,7 +91,7 @@ def main():
# Download the audio # Download the audio
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([args.location]) ydl.download([args.location])
media_file = "./artefacts/audio.mp3" media_file = "../artefacts/audio.mp3"
logger.info("Saved downloaded YouTube video to: " + media_file) logger.info("Saved downloaded YouTube video to: " + media_file)
else: else:

View File

@@ -10,11 +10,11 @@ from pynput import keyboard
from termcolor import colored from termcolor import colored
from whisper_jax import FlaxWhisperPipline from whisper_jax import FlaxWhisperPipline
from utils.file_utils import upload_files from ...utils.file_utils import upload_files
from utils.log_utils import logger from ...utils.log_utils import logger
from utils.run_utils import config from ...utils.run_utils import config
from utils.text_utilities import post_process_transcription, summarize from ...utils.text_utils import post_process_transcription, summarize
from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud from ...utils.viz_utils import create_talk_diff_scatter_viz, create_wordcloud
WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"] WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]

View File

@@ -1,5 +1,6 @@
[DEFAULT] [DEFAULT]
#SetexceptionruleforOpenMPerrortoallowduplicatelibinitialization #Set exception rule for OpenMP error
#to allow duplicate lib initialization
KMP_DUPLICATE_LIB_OK=TRUE KMP_DUPLICATE_LIB_OK=TRUE
#ExportOpenAIAPIKey #ExportOpenAIAPIKey
OPENAI_APIKEY= OPENAI_APIKEY=
@@ -7,8 +8,8 @@ OPENAI_APIKEY=
WHISPER_MODEL_SIZE=tiny WHISPER_MODEL_SIZE=tiny
WHISPER_REAL_TIME_MODEL_SIZE=tiny WHISPER_REAL_TIME_MODEL_SIZE=tiny
#AWSconfig #AWSconfig
AWS_ACCESS_KEY=***REMOVED*** AWS_ACCESS_KEY=
AWS_SECRET_KEY=***REMOVED*** AWS_SECRET_KEY=
BUCKET_NAME=reflector-bucket BUCKET_NAME=reflector-bucket
#Summarizerconfig #Summarizerconfig
SUMMARY_MODEL=facebook/bart-large-cnn SUMMARY_MODEL=facebook/bart-large-cnn
@@ -20,5 +21,6 @@ SUMMARIZE_USING_CHUNKS=YES
# Audiodevice # Audiodevice
BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME=aggregator BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME=aggregator
AV_FOUNDATION_DEVICE_ID=1 AV_FOUNDATION_DEVICE_ID=1
# LLM PATH # LLM configs
LLM_PATH= LLM_MACHINE_IP=
LLM_MACHINE_PORT=

32
utils/format_output.py Normal file
View File

@@ -0,0 +1,32 @@
import json
with open("../artefacts/meeting_titles_and_summaries.txt", "r") as f:
outputs = f.read()
outputs = json.loads(outputs)
transcript_file = open("../artefacts/meeting_transcript.txt", "a")
title_desc_file = open("../artefacts/meeting_title_description.txt", "a")
summary_file = open("../artefacts/meeting_summary.txt", "a")
for item in outputs["topics"]:
transcript_file.write(item["transcript"])
summary_file.write(item["description"])
title_desc_file.write("TITLE: \n")
title_desc_file.write(item["title"])
title_desc_file.write("\n")
title_desc_file.write("DESCRIPTION: \n")
title_desc_file.write(item["description"])
title_desc_file.write("\n")
title_desc_file.write("TRANSCRIPT: \n")
title_desc_file.write(item["transcript"])
title_desc_file.write("\n")
title_desc_file.write("---------------------------------------- \n\n")
transcript_file.close()
title_desc_file.close()
summary_file.close()

View File

@@ -6,8 +6,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
from transformers import BartForConditionalGeneration, BartTokenizer from transformers import BartForConditionalGeneration, BartTokenizer
from utils.log_utils import logger from log_utils import logger
from utils.run_utils import config from run_utils import config
nltk.download('punkt', quiet=True) nltk.download('punkt', quiet=True)
@@ -154,7 +154,7 @@ def chunk_text(text,
def summarize(transcript_text, timestamp, def summarize(transcript_text, timestamp,
real_time=False, real_time=False,
summarize_using_chunks=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]): chunk_summarize=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summary_model = config["DEFAULT"]["SUMMARY_MODEL"] summary_model = config["DEFAULT"]["SUMMARY_MODEL"]
if not summary_model: if not summary_model:
@@ -166,27 +166,35 @@ def summarize(transcript_text, timestamp,
model = BartForConditionalGeneration.from_pretrained(summary_model) model = BartForConditionalGeneration.from_pretrained(summary_model)
model = model.to(device) model = model.to(device)
output_filename = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" output_file = "summary_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
if real_time: if real_time:
output_filename = "real_time_" + output_filename output_file = "real_time_" + output_file
if summarize_using_chunks != "YES": if chunk_summarize != "YES":
max_length = int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"])
inputs = tokenizer. \ inputs = tokenizer. \
batch_encode_plus([transcript_text], truncation=True, batch_encode_plus([transcript_text], truncation=True,
padding='longest', padding='longest',
max_length=int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"]), max_length=max_length,
return_tensors='pt') return_tensors='pt')
inputs = inputs.to(device) inputs = inputs.to(device)
with torch.no_grad(): with torch.no_grad():
num_beans = int(config["DEFAULT"]["BEAM_SIZE"])
max_length = int(config["DEFAULT"]["MAX_LENGTH"])
summaries = model.generate(inputs['input_ids'], summaries = model.generate(inputs['input_ids'],
num_beams=int(config["DEFAULT"]["BEAM_SIZE"]), length_penalty=2.0, num_beams=num_beans,
max_length=int(config["DEFAULT"]["MAX_LENGTH"]), early_stopping=True) length_penalty=2.0,
max_length=max_length,
early_stopping=True)
decoded_summaries = [tokenizer.decode(summary, skip_special_tokens=True, clean_up_tokenization_spaces=False) decoded_summaries = \
[tokenizer.decode(summary,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)
for summary in summaries] for summary in summaries]
summary = " ".join(decoded_summaries) summary = " ".join(decoded_summaries)
with open("./artefacts/" + output_filename, 'w') as f: with open("./artefacts/" + output_file, 'w') as f:
f.write(summary.strip() + "\n") f.write(summary.strip() + "\n")
else: else:
logger.info("Breaking transcript into smaller chunks") logger.info("Breaking transcript into smaller chunks")
@@ -195,8 +203,8 @@ def summarize(transcript_text, timestamp,
logger.info(f"Transcript broken into {len(chunks)} " logger.info(f"Transcript broken into {len(chunks)} "
f"chunks of at most 500 words") f"chunks of at most 500 words")
logger.info(f"Writing summary text to: {output_filename}") logger.info(f"Writing summary text to: {output_file}")
with open(output_filename, 'w') as f: with open(output_file, 'w') as f:
summaries = summarize_chunks(chunks, tokenizer, model) summaries = summarize_chunks(chunks, tokenizer, model)
for summary in summaries: for summary in summaries:
f.write(summary.strip() + " ") f.write(summary.strip() + " ")

View File

@@ -45,24 +45,24 @@ def create_wordcloud(timestamp, real_time=False):
plt.axis("off") plt.axis("off")
plt.tight_layout(pad=0) plt.tight_layout(pad=0)
wordcloud_name = "wordcloud" wordcloud = "wordcloud"
if real_time: if real_time:
wordcloud_name = "real_time_" + wordcloud_name + "_" +\ wordcloud = "real_time_" + wordcloud + "_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png" timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
else: else:
wordcloud_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png" wordcloud += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
plt.savefig("./artefacts/" + wordcloud_name) plt.savefig("./artefacts/" + wordcloud)
def create_talk_diff_scatter_viz(timestamp, real_time=False): def create_talk_diff_scatter_viz(timestamp, real_time=False):
""" """
Perform agenda vs transription diff to see covered topics. Perform agenda vs transcription diff to see covered topics.
Create a scatter plot of words in topics. Create a scatter plot of words in topics.
:return: None. Saved locally. :return: None. Saved locally.
""" """
spaCy_model = "en_core_web_md" spacy_model = "en_core_web_md"
nlp = spacy.load(spaCy_model) nlp = spacy.load(spacy_model)
nlp.add_pipe('sentencizer') nlp.add_pipe('sentencizer')
agenda_topics = [] agenda_topics = []
@@ -75,7 +75,6 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
agenda_topics.append(line.split(":")[0]) agenda_topics.append(line.split(":")[0])
# Load the transcription with timestamp # Load the transcription with timestamp
filename = ""
if real_time: if real_time:
filename = "./artefacts/real_time_transcript_with_timestamp_" + \ filename = "./artefacts/real_time_transcript_with_timestamp_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -142,7 +141,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
df = df.apply(create_new_columns, axis=1) df = df.apply(create_new_columns, axis=1)
# Count the number of items covered and calculatre the percentage # Count the number of items covered and calculate the percentage
num_covered_items = sum(covered_items.values()) num_covered_items = sum(covered_items.values())
percentage_covered = num_covered_items / len(agenda) * 100 percentage_covered = num_covered_items / len(agenda) * 100