mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
organize trails
This commit is contained in:
0
trials/__init__.py
Normal file
0
trials/__init__.py
Normal file
@@ -1,11 +1,13 @@
|
|||||||
import requests
|
import requests
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
|
# Enter the Machine where the LLM is hosted
|
||||||
|
LLM_MACHINE_IP = ""
|
||||||
# This is the URL of text-generation-webui
|
# This is the URL of text-generation-webui
|
||||||
URL = "http://216.153.52.83:5000/api/v1/generate"
|
URL = f"http://{LLM_MACHINE_IP}:5000/api/v1/generate"
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -23,7 +25,7 @@ def split_text_file(filename, token_count):
|
|||||||
|
|
||||||
while start_index < total_tokens:
|
while start_index < total_tokens:
|
||||||
end_index = start_index + token_count
|
end_index = start_index + token_count
|
||||||
part_tokens = doc[start_index:end_index-5]
|
part_tokens = doc[start_index:end_index - 5]
|
||||||
part = ' '.join(token.text for token in part_tokens)
|
part = ' '.join(token.text for token in part_tokens)
|
||||||
parts.append(part)
|
parts.append(part)
|
||||||
start_index = end_index
|
start_index = end_index
|
||||||
@@ -33,7 +35,6 @@ def split_text_file(filename, token_count):
|
|||||||
|
|
||||||
final_summary = ""
|
final_summary = ""
|
||||||
parts = split_text_file("transcript.txt", 1600)
|
parts = split_text_file("transcript.txt", 1600)
|
||||||
previous_summary = ""
|
|
||||||
|
|
||||||
for part in parts:
|
for part in parts:
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
@@ -52,5 +53,5 @@ for part in parts:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(str(e))
|
print(str(e))
|
||||||
|
|
||||||
with open("sum.txt", "w") as sum:
|
with open("summary.txt", "w") as sum:
|
||||||
sum.write(" ".join(final_summary))
|
sum.write(" ".join(final_summary))
|
||||||
|
|||||||
125
trials/gpt2.py
125
trials/gpt2.py
@@ -1,65 +1,66 @@
|
|||||||
# # Approach 1
|
# Approach 1
|
||||||
# from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
||||||
#
|
|
||||||
# model_name = 'EleutherAI/gpt-neo-1.3B'
|
|
||||||
# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
|
||||||
# model = GPTNeoForCausalLM.from_pretrained(model_name)
|
|
||||||
#
|
|
||||||
# conversation = """
|
|
||||||
# Summarize the following conversation in 3 key sentences:
|
|
||||||
#
|
|
||||||
# We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
|
|
||||||
# Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
|
|
||||||
# Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
|
|
||||||
# Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
|
|
||||||
# Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
|
|
||||||
# Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# input_ids = tokenizer.encode(conversation, return_tensors='pt')
|
|
||||||
#
|
|
||||||
# output = model.generate(input_ids,
|
|
||||||
# max_length=30,
|
|
||||||
# num_return_sequences=1)
|
|
||||||
#
|
|
||||||
# caption = tokenizer.decode(output[0], skip_special_tokens=True)
|
|
||||||
# print("Caption:", caption[len(input_ids):])
|
|
||||||
|
|
||||||
#
|
model_name = 'EleutherAI/gpt-neo-1.3B'
|
||||||
# # Approach 2
|
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
||||||
# import torch
|
model = GPTNeoForCausalLM.from_pretrained(model_name)
|
||||||
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
||||||
#
|
conversation = """
|
||||||
# model_name = "gpt2"
|
Summarize the following conversation in 3 key sentences:
|
||||||
# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
|
||||||
# model = GPT2LMHeadModel.from_pretrained(model_name)
|
We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI .
|
||||||
#
|
Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development .
|
||||||
# model.eval()
|
Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations .
|
||||||
#
|
Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude .
|
||||||
# text = """
|
Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council .
|
||||||
# You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
|
Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas .
|
||||||
# """
|
"""
|
||||||
#
|
|
||||||
# tokenizer.pad_token = tokenizer.eos_token
|
input_ids = tokenizer.encode(conversation, return_tensors='pt')
|
||||||
# input_ids = tokenizer.encode(text,
|
|
||||||
# max_length=100,
|
output = model.generate(input_ids,
|
||||||
# truncation=True,
|
max_length=30,
|
||||||
# return_tensors="pt")
|
num_return_sequences=1)
|
||||||
# attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
|
|
||||||
# output = model.generate(input_ids,
|
caption = tokenizer.decode(output[0], skip_special_tokens=True)
|
||||||
# max_new_tokens=20,
|
print("Caption:", caption[len(input_ids):])
|
||||||
# num_return_sequences=1,
|
|
||||||
# num_beams=2,
|
|
||||||
# attention_mask=attention_mask)
|
# Approach 2
|
||||||
#
|
import torch
|
||||||
# chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
|
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
||||||
# for i, title in enumerate(chapter_titles):
|
|
||||||
# print("Caption: ", title)
|
model_name = "gpt2"
|
||||||
|
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
||||||
|
model = GPT2LMHeadModel.from_pretrained(model_name)
|
||||||
|
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
text = """
|
||||||
|
You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
|
||||||
|
"""
|
||||||
|
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
|
input_ids = tokenizer.encode(text,
|
||||||
|
max_length=100,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt")
|
||||||
|
attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
|
||||||
|
output = model.generate(input_ids,
|
||||||
|
max_new_tokens=20,
|
||||||
|
num_return_sequences=1,
|
||||||
|
num_beams=2,
|
||||||
|
attention_mask=attention_mask)
|
||||||
|
|
||||||
|
chapter_titles = [tokenizer.decode(output[i], skip_special_tokens=True) for i in range(output.shape[0])]
|
||||||
|
for i, title in enumerate(chapter_titles):
|
||||||
|
print("Caption: ", title)
|
||||||
|
|
||||||
# Approach 3
|
# Approach 3
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
||||||
|
|
||||||
|
|
||||||
def generate_response(conversation, max_length=100):
|
def generate_response(conversation, max_length=100):
|
||||||
input_text = ""
|
input_text = ""
|
||||||
@@ -79,20 +80,22 @@ def generate_response(conversation, max_length=100):
|
|||||||
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
# Call appropriate approach from the main while experimenting
|
||||||
model_name = "gpt2"
|
model_name = "gpt2"
|
||||||
model = GPT2LMHeadModel.from_pretrained(model_name)
|
model = GPT2LMHeadModel.from_pretrained(model_name)
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
||||||
|
|
||||||
sample_chunks = [
|
sample_chunks = [
|
||||||
"You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
|
"You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . "
|
||||||
]
|
]
|
||||||
|
|
||||||
conversation = [
|
conversation = [
|
||||||
{"role": "system", "content": "Summarize this text" },
|
{"role": "system", "content": "Summarize this text"},
|
||||||
{"role": "user", "content": " text : " + sample_chunks[0]},
|
{"role": "user", "content": " text : " + sample_chunks[0]},
|
||||||
]
|
]
|
||||||
|
|
||||||
response = generate_response(conversation)
|
response = generate_response(conversation)
|
||||||
print("Response:", response)
|
print("Response:", response)
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
|
import spacy
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
# Observe the incremental summaries by performing summaries in chunks
|
# Observe the incremental summaries by performing summaries in chunks
|
||||||
with open("transcript.txt") as f:
|
with open("transcript.txt") as f:
|
||||||
transcription = f.read()
|
transcription = f.read()
|
||||||
|
|
||||||
import spacy
|
|
||||||
|
|
||||||
|
|
||||||
def split_text_file(filename, token_count):
|
def split_text_file(filename, token_count):
|
||||||
nlp = spacy.load('en_core_web_md')
|
nlp = spacy.load('en_core_web_md')
|
||||||
@@ -26,8 +28,9 @@ def split_text_file(filename, token_count):
|
|||||||
|
|
||||||
return parts
|
return parts
|
||||||
|
|
||||||
|
|
||||||
# Set the chunk length here to split the transcript and test
|
# Set the chunk length here to split the transcript and test
|
||||||
MAX_CHUNK_LENGTH=1000
|
MAX_CHUNK_LENGTH = 1000
|
||||||
|
|
||||||
chunks = split_text_file("transcript.txt", MAX_CHUNK_LENGTH)
|
chunks = split_text_file("transcript.txt", MAX_CHUNK_LENGTH)
|
||||||
print("Number of chunks", len(chunks))
|
print("Number of chunks", len(chunks))
|
||||||
@@ -41,19 +44,17 @@ with open("chunks" + str(MAX_CHUNK_LENGTH) + ".txt", "a") as f:
|
|||||||
# ex. python incsum.py 1 => will run approach 1
|
# ex. python incsum.py 1 => will run approach 1
|
||||||
# If no input, will run all approaches
|
# If no input, will run all approaches
|
||||||
|
|
||||||
import sys
|
|
||||||
try:
|
try:
|
||||||
index = sys.argv[1]
|
index = sys.argv[1]
|
||||||
except:
|
except:
|
||||||
index = None
|
index = None
|
||||||
|
|
||||||
|
|
||||||
# Approach 1 : facebook/bart-large-cnn
|
# Approach 1 : facebook/bart-large-cnn
|
||||||
if index == "1" or index is None:
|
if index == "1" or index is None:
|
||||||
SUMMARY_MODEL="facebook/bart-large-cnn"
|
SUMMARY_MODEL = "facebook/bart-large-cnn"
|
||||||
MIN_LENGTH=5
|
MIN_LENGTH = 5
|
||||||
MAX_LENGTH=10
|
MAX_LENGTH = 10
|
||||||
BEAM_SIZE=2
|
BEAM_SIZE = 2
|
||||||
|
|
||||||
print("Performing chunk summary : " + SUMMARY_MODEL)
|
print("Performing chunk summary : " + SUMMARY_MODEL)
|
||||||
|
|
||||||
@@ -81,7 +82,6 @@ if index == "1" or index is None:
|
|||||||
for summary in summaries:
|
for summary in summaries:
|
||||||
f.write(summary + "\n\n")
|
f.write(summary + "\n\n")
|
||||||
|
|
||||||
|
|
||||||
# Approach 2
|
# Approach 2
|
||||||
if index == "2" or index is None:
|
if index == "2" or index is None:
|
||||||
print("Performing chunk summary : " + "gpt-neo-1.3B")
|
print("Performing chunk summary : " + "gpt-neo-1.3B")
|
||||||
@@ -108,14 +108,14 @@ if index == "2" or index is None:
|
|||||||
max_length=max_length,
|
max_length=max_length,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
pad_token_id=model.config.eos_token_id,
|
pad_token_id=model.config.eos_token_id,
|
||||||
num_beams=4,
|
num_beams=4,
|
||||||
length_penalty=2.0,
|
length_penalty=2.0,
|
||||||
early_stopping=True)
|
early_stopping=True)
|
||||||
summary_ids = output[0, input_length:]
|
summary_ids = output[0, input_length:]
|
||||||
summary = tokenizer.decode(summary_ids, skip_special_tokens=True)
|
summary = tokenizer.decode(summary_ids, skip_special_tokens=True)
|
||||||
summaries.append(summary)
|
summaries.append(summary)
|
||||||
with open("gptneo1.3B-summaries.txt", "a") as f:
|
with open("gptneo1.3B-summaries.txt", "a") as f:
|
||||||
f.write(summary + "\n\n")
|
f.write(summary + "\n\n")
|
||||||
|
|
||||||
# Approach 3
|
# Approach 3
|
||||||
if index == "3" or index is None:
|
if index == "3" or index is None:
|
||||||
@@ -155,4 +155,3 @@ if index == "3" or index is None:
|
|||||||
with open("mpt-7b-summaries.txt", "a") as f:
|
with open("mpt-7b-summaries.txt", "a") as f:
|
||||||
for summary in summaries:
|
for summary in summaries:
|
||||||
f.write(summary + "\n\n")
|
f.write(summary + "\n\n")
|
||||||
|
|
||||||
|
|||||||
@@ -11,14 +11,15 @@ openai.api_key = ""
|
|||||||
# to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
|
# to incremental summarize, user prompt used : "summarize this conversation in a few sentences by taking key points"
|
||||||
# max_tokens=300
|
# max_tokens=300
|
||||||
|
|
||||||
sample_chunks = ["You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
|
sample_chunks = [
|
||||||
" We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude . Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
|
"You all just came off of your incredible Google Cloud next conference where you released a wide variety of functionality and features and new products across artisan television and also across the entire sort of cloud ecosystem . You want to just first by walking through , first start by walking through all the innovations that you sort of released and what you 're excited about when you come to Google Cloud ? Now our vision is super simple . If you look at what smartphones did for a consumer , you know they took a computer and internet browser , a communication device , and a camera , and made it so that it 's in everybody 's pocket , so it really brought computation to every person . We feel that , you know , our , what we 're trying to do is take all the technological innovation that Google 's doing , but make it super simple so that everyone can consume it . And so that includes our global data center footprint , all the new types of hardware and large-scale systems we work on , the software that we 're making available for people to do high-scale computation , tools for data processing , tools for cybersecurity , processing , tools for cyber security , tools for machine learning , but make it so simple that everyone can use it . And every step that we do to simplify things for people , we think adoption can grow . And so that 's a lot of what we 've done these last three , four years , and we made a number of announcements that next in machine learning and AI in particular , you know , we look at our work as four elements , how we take our large-scale compute systems that were building for AI and how we make that available to everybody . Second , what we 're doing with the software stacks and top of it , things like jacks and other things and how we 're making those available to everybody . Third is advances because different people have different levels of expertise . Some people say I need the hardware to build my own large language model or algorithm . Other people say , look , I really need to use a building block . You guys give me . So , 30s we 've done a lot with AutoML and we announce new capability for image , video , and translation to make it available to everybody . And then lastly , we 're also building completely packaged solutions for some areas and we announce some new stuff . ",
|
||||||
|
" We 're joined next by Thomas Curian , CEO of Google Cloud , and Alexander Wang , CEO and founder of Scale AI . Thomas joined Google in November 2018 as the CEO of Google Cloud . Prior to Google , Thomas spent 22 years at Oracle , where most recently he was president of product development . Before that , Thomas worked at McKinsey as a business analyst and engagement manager . His nearly 30 years of experience have given him a deep knowledge of engineering enterprise relationships and leadership of large organizations . Thomas 's degrees include an MBA in administration and management from Stanford University , as an RJ Miller scholar and a BSEE in electrical engineering and computer science from Princeton University , where he graduated suma cum laude . Thomas serves as a member of the Stanford graduate School of Business Advisory Council and Princeton University School of Engineering Advisory Council . Please welcome to the stage , Thomas Curian and Alexander Wang . This is a super exciting conversation . Thanks for being here , Thomas ."]
|
||||||
|
|
||||||
conversation = [
|
conversation = [
|
||||||
{"role": "system",
|
{"role": "system",
|
||||||
"content": sample_chunks[1]},
|
"content": sample_chunks[1]},
|
||||||
{"role": "user",
|
{"role": "user",
|
||||||
"content": "summarize this conversation in a few sentences by taking key points"}
|
"content": "summarize this conversation in a few sentences by taking key points"}
|
||||||
]
|
]
|
||||||
|
|
||||||
model = "gpt-3.5-turbo"
|
model = "gpt-3.5-turbo"
|
||||||
|
|||||||
0
trials/whisper-jax/__init__.py
Normal file
0
trials/whisper-jax/__init__.py
Normal file
@@ -18,11 +18,11 @@ import nltk
|
|||||||
import yt_dlp as youtube_dl
|
import yt_dlp as youtube_dl
|
||||||
from whisper_jax import FlaxWhisperPipline
|
from whisper_jax import FlaxWhisperPipline
|
||||||
|
|
||||||
from utils.file_utils import download_files, upload_files
|
from ...utils.file_utils import download_files, upload_files
|
||||||
from utils.log_utils import logger
|
from ...utils.log_utils import logger
|
||||||
from utils.run_utils import config
|
from ...utils.run_utils import config
|
||||||
from utils.text_utilities import post_process_transcription, summarize
|
from ...utils.text_utilities import post_process_transcription, summarize
|
||||||
from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
|
from ...utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
|
||||||
|
|
||||||
nltk.download('punkt', quiet=True)
|
nltk.download('punkt', quiet=True)
|
||||||
nltk.download('stopwords', quiet=True)
|
nltk.download('stopwords', quiet=True)
|
||||||
@@ -30,8 +30,8 @@ nltk.download('stopwords', quiet=True)
|
|||||||
WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
|
WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
|
||||||
NOW = datetime.now()
|
NOW = datetime.now()
|
||||||
|
|
||||||
if not os.path.exists('./artefacts'):
|
if not os.path.exists('../../artefacts'):
|
||||||
os.makedirs('./artefacts')
|
os.makedirs('../../artefacts')
|
||||||
|
|
||||||
|
|
||||||
def init_argparse() -> argparse.ArgumentParser:
|
def init_argparse() -> argparse.ArgumentParser:
|
||||||
@@ -91,7 +91,7 @@ def main():
|
|||||||
# Download the audio
|
# Download the audio
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download([args.location])
|
ydl.download([args.location])
|
||||||
media_file = "./artefacts/audio.mp3"
|
media_file = "../artefacts/audio.mp3"
|
||||||
|
|
||||||
logger.info("Saved downloaded YouTube video to: " + media_file)
|
logger.info("Saved downloaded YouTube video to: " + media_file)
|
||||||
else:
|
else:
|
||||||
@@ -10,11 +10,11 @@ from pynput import keyboard
|
|||||||
from termcolor import colored
|
from termcolor import colored
|
||||||
from whisper_jax import FlaxWhisperPipline
|
from whisper_jax import FlaxWhisperPipline
|
||||||
|
|
||||||
from utils.file_utils import upload_files
|
from ...utils.file_utils import upload_files
|
||||||
from utils.log_utils import logger
|
from ...utils.log_utils import logger
|
||||||
from utils.run_utils import config
|
from ...utils.run_utils import config
|
||||||
from utils.text_utilities import post_process_transcription, summarize
|
from ...utils.text_utilities import post_process_transcription, summarize
|
||||||
from utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
|
from ...utils.viz_utilities import create_talk_diff_scatter_viz, create_wordcloud
|
||||||
|
|
||||||
WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
|
WHISPER_MODEL_SIZE = config['DEFAULT']["WHISPER_MODEL_SIZE"]
|
||||||
|
|
||||||
Reference in New Issue
Block a user