Merge pull request #32 from Monadical-SAS/feat/gokul

Clean up and move notebook to notebooks folder
This commit is contained in:
projects-g
2023-07-18 22:45:39 +05:30
committed by GitHub
5 changed files with 129 additions and 62 deletions

View File

@@ -57,3 +57,4 @@ stamina==23.1.0
httpx==0.24.1 httpx==0.24.1
sortedcontainers==2.4.0 sortedcontainers==2.4.0
https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
gpt4all==1.0.5

View File

@@ -10,6 +10,7 @@ from aiohttp import web
from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
from aiortc.contrib.media import MediaRelay from aiortc.contrib.media import MediaRelay
from av import AudioFifo from av import AudioFifo
from gpt4all import GPT4All
from loguru import logger from loguru import logger
from whisper_jax import FlaxWhisperPipline from whisper_jax import FlaxWhisperPipline
@@ -26,6 +27,28 @@ CHANNELS = 2
RATE = 48000 RATE = 48000
audio_buffer = AudioFifo() audio_buffer = AudioFifo()
executor = ThreadPoolExecutor() executor = ThreadPoolExecutor()
transcription_text = ""
llm = GPT4All("/Users/gokulmohanarangan/Library/Application Support/nomic.ai/GPT4All/ggml-vicuna-13b-1.1-q4_2.bin")
def get_title_and_summary():
global transcription_text
output = None
if len(transcription_text) > 1000:
print("Generating title and summary")
prompt = f"""
### Human:
Create a JSON object having 2 fields: title and summary. For the title field generate a short title for the given
text and for the summary field, summarize the given text by creating 3 key points.
{transcription_text}
### Assistant:
"""
transcription_text = ""
output = llm.generate(prompt)
return str(output)
return output
def channel_log(channel, t, message): def channel_log(channel, t, message):
@@ -34,8 +57,8 @@ def channel_log(channel, t, message):
def channel_send(channel, message): def channel_send(channel, message):
# channel_log(channel, ">", message) # channel_log(channel, ">", message)
if channel: if channel and message:
channel.send(message) channel.send(str(message))
def get_transcription(frames): def get_transcription(frames):
@@ -50,9 +73,9 @@ def get_transcription(frames):
wf.writeframes(b"".join(frame.to_ndarray())) wf.writeframes(b"".join(frame.to_ndarray()))
wf.close() wf.close()
whisper_result = pipeline(out_file.getvalue(), return_timestamps=True) whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
with open("test_exec.txt", "a") as f: # whisper_result['start_time'] = [f.time for f in frames]
f.write(whisper_result["text"]) global transcription_text
whisper_result['start_time'] = [f.time for f in frames] transcription_text += whisper_result["text"]
return whisper_result return whisper_result
@@ -75,9 +98,15 @@ class AudioStreamTrack(MediaStreamTrack):
get_transcription, local_frames, executor=executor get_transcription, local_frames, executor=executor
) )
whisper_result.add_done_callback( whisper_result.add_done_callback(
lambda f: channel_send(data_channel, lambda f: channel_send(data_channel, whisper_result.result())
str(whisper_result.result())) if f.result()
if (f.result()) else None
)
llm_result = run_in_executor(get_title_and_summary,
executor=executor)
llm_result.add_done_callback(
lambda f: channel_send(data_channel, llm_result.result())
if f.result()
else None else None
) )
return frame return frame

View File

@@ -1,4 +1,3 @@
import ast
import asyncio import asyncio
import time import time
import uuid import uuid
@@ -11,9 +10,7 @@ from aiortc import (RTCPeerConnection, RTCSessionDescription)
from aiortc.contrib.media import (MediaPlayer, MediaRelay) from aiortc.contrib.media import (MediaPlayer, MediaRelay)
from utils.log_utils import logger from utils.log_utils import logger
from utils.run_utils import config, Mutex from utils.run_utils import config
file_lock = Mutex(open("test_sm_6.txt", "a"))
class StreamClient: class StreamClient:
@@ -146,10 +143,7 @@ class StreamClient:
async def worker(self, name, queue): async def worker(self, name, queue):
while True: while True:
msg = await self.queue.get() msg = await self.queue.get()
msg = ast.literal_eval(msg) yield msg
with file_lock.lock() as file:
file.write(msg["text"])
yield msg["text"]
self.queue.task_done() self.queue.task_done()
async def start(self): async def start(self):

View File

@@ -1,55 +1,98 @@
# Approach 1 # # Approach 1
from transformers import GPTNeoForCausalLM, GPT2Tokenizer # from transformers import GPTNeoForCausalLM, GPT2Tokenizer
#
# model_name = 'EleutherAI/gpt-neo-1.3B'
# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# model = GPTNeoForCausalLM.from_pretrained(model_name)
#
# conversation = """
# Summarize the following conversation in 3 key sentences:
#
# We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
# Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
# Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
# Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
# Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
# Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
# """
#
# input_ids = tokenizer.encode(conversation, return_tensors='pt')
#
# output = model.generate(input_ids,
# max_length=30,
# num_return_sequences=1)
#
# caption = tokenizer.decode(output[0], skip_special_tokens=True)
# print("Caption:", caption[len(input_ids):])
model_name = 'EleutherAI/gpt-neo-1.3B' #
tokenizer = GPT2Tokenizer.from_pretrained(model_name) # # Approach 2
model = GPTNeoForCausalLM.from_pretrained(model_name) # import torch
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
#
# model_name = "gpt2"
# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# model = GPT2LMHeadModel.from_pretrained(model_name)
#
# model.eval()
#
# text = """
# You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
# """
#
# tokenizer.pad_token = tokenizer.eos_token
# input_ids = tokenizer.encode(text,
# max_length=100,
# truncation=True,
# return_tensors="pt")
# attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
# output = model.generate(input_ids,
# max_new_tokens=20,
# num_return_sequences=1,
# num_beams=2,
# attention_mask=attention_mask)
#
# chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
# for i, title in enumerate(chapter_titles):
# print("Caption: ", title)
conversation = """ # Approach 3
We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
"""
input_ids = tokenizer.encode(conversation, return_tensors='pt')
output = model.generate(input_ids,
max_length=30,
num_return_sequences=1)
caption = tokenizer.decode(output[0], skip_special_tokens=True)
print("Caption:", caption[len(input_ids):])
# Approach 2
import torch import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer from transformers import GPT2Tokenizer, GPT2LMHeadModel
model_name = "gpt2" def generate_response(conversation, max_length=100):
tokenizer = GPT2Tokenizer.from_pretrained(model_name) input_text = ""
model = GPT2LMHeadModel.from_pretrained(model_name) for entry in conversation:
role = entry["role"]
content = entry["content"]
input_text += f"{role}: {content}\n"
model.eval() # Tokenize the entire conversation
input_ids = tokenizer.encode(input_text, return_tensors="pt")
text = """ # Generate text based on the entire conversation
You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . " with torch.no_grad():
""" output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id)
tokenizer.pad_token = tokenizer.eos_token # Decode the generated text and return it
input_ids = tokenizer.encode(text, response = tokenizer.decode(output[0], skip_special_tokens=True)
max_length=100, return response
truncation=True,
return_tensors="pt") if __name__ == "__main__":
attention_mask = torch.ones(input_ids.shape, dtype=torch.long) model_name = "gpt2"
output = model.generate(input_ids, model = GPT2LMHeadModel.from_pretrained(model_name)
max_new_tokens=20, tokenizer = GPT2Tokenizer.from_pretrained(model_name)
num_return_sequences=1,
num_beams=2, sample_chunks = [
attention_mask=attention_mask) "You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
]
conversation = [
{"role": "system", "content": "Summarize this text" },
{"role": "user", "content": " text : " + sample_chunks[0]},
]
response = generate_response(conversation)
print("Response:", response)
chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
for i, title in enumerate(chapter_titles):
print("Caption: ", title)