flake8 warnings fix

This commit is contained in:
Gokul Mohanarangan
2023-07-11 14:06:20 +05:30
parent 88af112131
commit d962ff1712
10 changed files with 122 additions and 70 deletions

View File

@@ -47,7 +47,7 @@ async def main():
logger.info(f"Cancelling {len(tasks)} outstanding tasks") logger.info(f"Cancelling {len(tasks)} outstanding tasks")
await asyncio.gather(*tasks, return_exceptions=True) await asyncio.gather(*tasks, return_exceptions=True)
logger.info(f"Flushing metrics") logger.info(f'{"Flushing metrics"}')
loop.stop() loop.stop()
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT) signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)

View File

@@ -74,7 +74,8 @@ class AudioStreamTrack(MediaStreamTrack):
get_transcription, local_frames, executor=executor get_transcription, local_frames, executor=executor
) )
whisper_result.add_done_callback( whisper_result.add_done_callback(
lambda f: channel_send(data_channel, str(whisper_result.result())) lambda f: channel_send(data_channel,
str(whisper_result.result()))
if (f.result()) if (f.result())
else None else None
) )
@@ -126,7 +127,8 @@ async def offer(request):
return web.Response( return web.Response(
content_type="application/json", content_type="application/json",
text=json.dumps( text=json.dumps(
{"sdp": pc.localDescription.sdp, "type": pc.localDescription.type} {"sdp": pc.localDescription.sdp,
"type": pc.localDescription.type}
), ),
) )

View File

@@ -37,8 +37,10 @@ class StreamClient:
self.pcs = set() self.pcs = set()
self.time_start = None self.time_start = None
self.queue = asyncio.Queue() self.queue = asyncio.Queue()
self.player = MediaPlayer(':' + str(config['DEFAULT']["AV_FOUNDATION_DEVICE_ID"]), self.player = MediaPlayer(
format='avfoundation', options={'channels': '2'}) ':' + str(config['DEFAULT']["AV_FOUNDATION_DEVICE_ID"]),
format='avfoundation',
options={'channels': '2'})
def stop(self): def stop(self):
self.loop.run_until_complete(self.signaling.close()) self.loop.run_until_complete(self.signaling.close())
@@ -115,7 +117,8 @@ class StreamClient:
self.channel_log(channel, "<", message) self.channel_log(channel, "<", message)
if isinstance(message, str) and message.startswith("pong"): if isinstance(message, str) and message.startswith("pong"):
elapsed_ms = (self.current_stamp() - int(message[5:])) / 1000 elapsed_ms = (self.current_stamp() - int(message[5:]))\
/ 1000
print(" RTT %.2f ms" % elapsed_ms) print(" RTT %.2f ms" % elapsed_ms)
await pc.setLocalDescription(await pc.createOffer()) await pc.setLocalDescription(await pc.createOffer())
@@ -135,7 +138,7 @@ class StreamClient:
answer = RTCSessionDescription(sdp=params["sdp"], type=params["type"]) answer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])
await pc.setRemoteDescription(answer) await pc.setRemoteDescription(answer)
self.reader = self.worker(f"worker", self.queue) self.reader = self.worker(f'{"worker"}', self.queue)
def get_reader(self): def get_reader(self):
return self.reader return self.reader

0
utils/__init__.py Normal file
View File

View File

@@ -1,4 +1,4 @@
from loguru import logger import loguru
class SingletonLogger: class SingletonLogger:
@@ -11,7 +11,7 @@ class SingletonLogger:
:return: SingletonLogger instance :return: SingletonLogger instance
""" """
if not SingletonLogger.__instance: if not SingletonLogger.__instance:
SingletonLogger.__instance = logger SingletonLogger.__instance = loguru.logger
return SingletonLogger.__instance return SingletonLogger.__instance

View File

@@ -31,7 +31,7 @@ def run_in_executor(func, *args, executor=None, **kwargs):
""" """
callback = partial(func, *args, **kwargs) callback = partial(func, *args, **kwargs)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
return asyncio.get_event_loop().run_in_executor(executor, callback) return loop.run_in_executor(executor, callback)
# Genetic type template # Genetic type template

View File

@@ -15,7 +15,8 @@ nltk.download('punkt', quiet=True)
def preprocess_sentence(sentence): def preprocess_sentence(sentence):
stop_words = set(stopwords.words('english')) stop_words = set(stopwords.words('english'))
tokens = word_tokenize(sentence.lower()) tokens = word_tokenize(sentence.lower())
tokens = [token for token in tokens if token.isalnum() and token not in stop_words] tokens = [token for token in tokens
if token.isalnum() and token not in stop_words]
return ' '.join(tokens) return ' '.join(tokens)
@@ -49,12 +50,14 @@ def remove_almost_alike_sentences(sentences, threshold=0.7):
sentence1 = preprocess_sentence(sentences[i]) sentence1 = preprocess_sentence(sentences[i])
sentence2 = preprocess_sentence(sentences[j]) sentence2 = preprocess_sentence(sentences[j])
if len(sentence1) != 0 and len(sentence2) != 0: if len(sentence1) != 0 and len(sentence2) != 0:
similarity = compute_similarity(sentence1, sentence2) similarity = compute_similarity(sentence1,
sentence2)
if similarity >= threshold: if similarity >= threshold:
removed_indices.add(max(i, j)) removed_indices.add(max(i, j))
filtered_sentences = [sentences[i] for i in range(num_sentences) if i not in removed_indices] filtered_sentences = [sentences[i] for i in range(num_sentences)
if i not in removed_indices]
return filtered_sentences return filtered_sentences
@@ -74,11 +77,13 @@ def remove_whisper_repetitive_hallucination(nonduplicate_sentences):
words = nltk.word_tokenize(sent) words = nltk.word_tokenize(sent)
n_gram_filter = 3 n_gram_filter = 3
for i in range(len(words)): for i in range(len(words)):
if str(words[i:i + n_gram_filter]) in seen and seen[str(words[i:i + n_gram_filter])] == words[ if str(words[i:i + n_gram_filter]) in seen and \
i + 1:i + n_gram_filter + 2]: seen[str(words[i:i + n_gram_filter])] == \
words[i + 1:i + n_gram_filter + 2]:
pass pass
else: else:
seen[str(words[i:i + n_gram_filter])] = words[i + 1:i + n_gram_filter + 2] seen[str(words[i:i + n_gram_filter])] = \
words[i + 1:i + n_gram_filter + 2]
temp_result += words[i] temp_result += words[i]
temp_result += " " temp_result += " "
chunk_sentences.append(temp_result) chunk_sentences.append(temp_result)
@@ -88,9 +93,12 @@ def remove_whisper_repetitive_hallucination(nonduplicate_sentences):
def post_process_transcription(whisper_result): def post_process_transcription(whisper_result):
transcript_text = "" transcript_text = ""
for chunk in whisper_result["chunks"]: for chunk in whisper_result["chunks"]:
nonduplicate_sentences = remove_outright_duplicate_sentences_from_chunk(chunk) nonduplicate_sentences = \
chunk_sentences = remove_whisper_repetitive_hallucination(nonduplicate_sentences) remove_outright_duplicate_sentences_from_chunk(chunk)
similarity_matched_sentences = remove_almost_alike_sentences(chunk_sentences) chunk_sentences = \
remove_whisper_repetitive_hallucination(nonduplicate_sentences)
similarity_matched_sentences = \
remove_almost_alike_sentences(chunk_sentences)
chunk["text"] = " ".join(similarity_matched_sentences) chunk["text"] = " ".join(similarity_matched_sentences)
transcript_text += chunk["text"] transcript_text += chunk["text"]
whisper_result["text"] = transcript_text whisper_result["text"] = transcript_text
@@ -111,18 +119,23 @@ def summarize_chunks(chunks, tokenizer, model):
input_ids = tokenizer.encode(c, return_tensors='pt') input_ids = tokenizer.encode(c, return_tensors='pt')
input_ids = input_ids.to(device) input_ids = input_ids.to(device)
with torch.no_grad(): with torch.no_grad():
summary_ids = model.generate(input_ids, summary_ids = \
num_beams=int(config["DEFAULT"]["BEAM_SIZE"]), length_penalty=2.0, model.generate(input_ids,
max_length=int(config["DEFAULT"]["MAX_LENGTH"]), early_stopping=True) num_beams=int(config["DEFAULT"]["BEAM_SIZE"]),
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) length_penalty=2.0,
max_length=int(config["DEFAULT"]["MAX_LENGTH"]),
early_stopping=True)
summary = tokenizer.decode(summary_ids[0],
skip_special_tokens=True)
summaries.append(summary) summaries.append(summary)
return summaries return summaries
def chunk_text(text, max_chunk_length=int(config["DEFAULT"]["MAX_CHUNK_LENGTH"])): def chunk_text(text,
max_chunk_length=int(config["DEFAULT"]["MAX_CHUNK_LENGTH"])):
""" """
Split text into smaller chunks. Split text into smaller chunks.
:param txt: Text to be chunked :param text: Text to be chunked
:param max_chunk_length: length of chunk :param max_chunk_length: length of chunk
:return: chunked texts :return: chunked texts
""" """
@@ -140,7 +153,8 @@ def chunk_text(text, max_chunk_length=int(config["DEFAULT"]["MAX_CHUNK_LENGTH"])
def summarize(transcript_text, timestamp, def summarize(transcript_text, timestamp,
real_time=False, summarize_using_chunks=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]): real_time=False,
summarize_using_chunks=config["DEFAULT"]["SUMMARIZE_USING_CHUNKS"]):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summary_model = config["DEFAULT"]["SUMMARY_MODEL"] summary_model = config["DEFAULT"]["SUMMARY_MODEL"]
if not summary_model: if not summary_model:
@@ -157,9 +171,11 @@ def summarize(transcript_text, timestamp,
output_filename = "real_time_" + output_filename output_filename = "real_time_" + output_filename
if summarize_using_chunks != "YES": if summarize_using_chunks != "YES":
inputs = tokenizer.batch_encode_plus([transcript_text], truncation=True, padding='longest', inputs = tokenizer.\
max_length=int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"]), batch_encode_plus([transcript_text], truncation=True,
return_tensors='pt') padding='longest',
max_length=int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"]),
return_tensors='pt')
inputs = inputs.to(device) inputs = inputs.to(device)
with torch.no_grad(): with torch.no_grad():
@@ -167,8 +183,8 @@ def summarize(transcript_text, timestamp,
num_beams=int(config["DEFAULT"]["BEAM_SIZE"]), length_penalty=2.0, num_beams=int(config["DEFAULT"]["BEAM_SIZE"]), length_penalty=2.0,
max_length=int(config["DEFAULT"]["MAX_LENGTH"]), early_stopping=True) max_length=int(config["DEFAULT"]["MAX_LENGTH"]), early_stopping=True)
decoded_summaries = [tokenizer.decode(summary, skip_special_tokens=True, clean_up_tokenization_spaces=False) for decoded_summaries = [tokenizer.decode(summary, skip_special_tokens=True, clean_up_tokenization_spaces=False)
summary in summaries] for summary in summaries]
summary = " ".join(decoded_summaries) summary = " ".join(decoded_summaries)
with open(output_filename, 'w') as f: with open(output_filename, 'w') as f:
f.write(summary.strip() + "\n") f.write(summary.strip() + "\n")
@@ -176,7 +192,8 @@ def summarize(transcript_text, timestamp,
logger.info("Breaking transcript into smaller chunks") logger.info("Breaking transcript into smaller chunks")
chunks = chunk_text(transcript_text) chunks = chunk_text(transcript_text)
logger.info(f"Transcript broken into {len(chunks)} chunks of at most 500 words") # TODO fix variable logger.info(f"Transcript broken into {len(chunks)} "
f"chunks of at most 500 words")
logger.info(f"Writing summary text to: {output_filename}") logger.info(f"Writing summary text to: {output_filename}")
with open(output_filename, 'w') as f: with open(output_filename, 'w') as f:

View File

@@ -2,7 +2,6 @@ import ast
import collections import collections
import os import os
import pickle import pickle
from pathlib import Path
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
@@ -14,7 +13,8 @@ from wordcloud import STOPWORDS, WordCloud
en = spacy.load('en_core_web_md') en = spacy.load('en_core_web_md')
spacy_stopwords = en.Defaults.stop_words spacy_stopwords = en.Defaults.stop_words
STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))).union(set(spacy_stopwords)) STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))).\
union(set(spacy_stopwords))
def create_wordcloud(timestamp, real_time=False): def create_wordcloud(timestamp, real_time=False):
@@ -24,7 +24,8 @@ def create_wordcloud(timestamp, real_time=False):
""" """
filename = "transcript" filename = "transcript"
if real_time: if real_time:
filename = "real_time_" + filename + "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" filename = "real_time_" + filename + "_" +\
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
else: else:
filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -46,7 +47,8 @@ def create_wordcloud(timestamp, real_time=False):
wordcloud_name = "wordcloud" wordcloud_name = "wordcloud"
if real_time: if real_time:
wordcloud_name = "real_time_" + wordcloud_name + "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png" wordcloud_name = "real_time_" + wordcloud_name + "_" +\
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
else: else:
wordcloud_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png" wordcloud_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
@@ -66,7 +68,6 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
agenda_topics = [] agenda_topics = []
agenda = [] agenda = []
# Load the agenda # Load the agenda
path = Path(__file__)
with open(os.path.join(os.getcwd(), "agenda-headers.txt"), "r") as f: with open(os.path.join(os.getcwd(), "agenda-headers.txt"), "r") as f:
for line in f.readlines(): for line in f.readlines():
if line.strip(): if line.strip():
@@ -76,9 +77,11 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
# Load the transcription with timestamp # Load the transcription with timestamp
filename = "" filename = ""
if real_time: if real_time:
filename = "real_time_transcript_with_timestamp_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" filename = "real_time_transcript_with_timestamp_" +\
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
else: else:
filename = "transcript_with_timestamp_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" filename = "transcript_with_timestamp_" +\
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
with open(filename) as f: with open(filename) as f:
transcription_timestamp_text = f.read() transcription_timestamp_text = f.read()
@@ -94,7 +97,8 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
ts_to_topic_mapping_top_1 = {} ts_to_topic_mapping_top_1 = {}
ts_to_topic_mapping_top_2 = {} ts_to_topic_mapping_top_2 = {}
# Also create a mapping of the different timestamps in which each topic was covered # Also create a mapping of the different timestamps
# in which each topic was covered
topic_to_ts_mapping_top_1 = collections.defaultdict(list) topic_to_ts_mapping_top_1 = collections.defaultdict(list)
topic_to_ts_mapping_top_2 = collections.defaultdict(list) topic_to_ts_mapping_top_2 = collections.defaultdict(list)
@@ -105,7 +109,8 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
topic_similarities = [] topic_similarities = []
for item in range(len(agenda)): for item in range(len(agenda)):
item_doc = nlp(agenda[item]) item_doc = nlp(agenda[item])
# if not doc_transcription or not all(token.has_vector for token in doc_transcription): # if not doc_transcription or not all
# (token.has_vector for token in doc_transcription):
if not doc_transcription: if not doc_transcription:
continue continue
similarity = doc_transcription.similarity(item_doc) similarity = doc_transcription.similarity(item_doc)
@@ -129,8 +134,10 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
:param record: :param record:
:return: :return:
""" """
record["ts_to_topic_mapping_top_1"] = ts_to_topic_mapping_top_1[record["timestamp"]] record["ts_to_topic_mapping_top_1"] = \
record["ts_to_topic_mapping_top_2"] = ts_to_topic_mapping_top_2[record["timestamp"]] ts_to_topic_mapping_top_1[record["timestamp"]]
record["ts_to_topic_mapping_top_2"] = \
ts_to_topic_mapping_top_2[record["timestamp"]]
return record return record
df = df.apply(create_new_columns, axis=1) df = df.apply(create_new_columns, axis=1)
@@ -151,7 +158,8 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
# Save df, mappings for further experimentation # Save df, mappings for further experimentation
df_name = "df" df_name = "df"
if real_time: if real_time:
df_name = "real_time_" + df_name + "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl" df_name = "real_time_" + df_name + "_" +\
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
else: else:
df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl" df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
df.to_pickle(df_name) df.to_pickle(df_name)
@@ -161,7 +169,8 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
mappings_name = "mappings" mappings_name = "mappings"
if real_time: if real_time:
mappings_name = "real_time_" + mappings_name + "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl" mappings_name = "real_time_" + mappings_name + "_" +\
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
else: else:
mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl" mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
pickle.dump(my_mappings, open(mappings_name, "wb")) pickle.dump(my_mappings, open(mappings_name, "wb"))
@@ -197,6 +206,8 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
transform=st.Scalers.dense_rank transform=st.Scalers.dense_rank
) )
if real_time: if real_time:
open('./artefacts/real_time_scatter_' + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html) open('./artefacts/real_time_scatter_' +
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html)
else: else:
open('./artefacts/scatter_' + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html) open('./artefacts/scatter_' +
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html)

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# summarize https://www.youtube.com/watch?v=imzTxoEDH_g --transcript=transcript.txt summary.txt # summarize https://www.youtube.com/watch?v=imzTxoEDH_g
# summarize https://www.sprocket.org/video/cheesemaking.mp4 summary.txt # summarize https://www.sprocket.org/video/cheesemaking.mp4 summary.txt
# summarize podcast.mp3 summary.txt # summarize podcast.mp3 summary.txt
@@ -14,7 +14,6 @@ from urllib.parse import urlparse
import jax.numpy as jnp import jax.numpy as jnp
import moviepy.editor import moviepy.editor
import moviepy.editor
import nltk import nltk
import yt_dlp as youtube_dl import yt_dlp as youtube_dl
from whisper_jax import FlaxWhisperPipline from whisper_jax import FlaxWhisperPipline
@@ -39,11 +38,16 @@ def init_argparse() -> argparse.ArgumentParser:
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
usage="%(prog)s [OPTIONS] <LOCATION> <OUTPUT>", usage="%(prog)s [OPTIONS] <LOCATION> <OUTPUT>",
description="Creates a transcript of a video or audio file, then summarizes it using ChatGPT." description="Creates a transcript of a video or audio file, then"
" summarizes it using ChatGPT."
) )
parser.add_argument("-l", "--language", help="Language that the summary should be written in", type=str, parser.add_argument("-l", "--language",
default="english", choices=['english', 'spanish', 'french', 'german', 'romanian']) help="Language that the summary should be written in",
type=str,
default="english",
choices=['english', 'spanish', 'french', 'german',
'romanian'])
parser.add_argument("location") parser.add_argument("location")
return parser return parser
@@ -61,10 +65,12 @@ def main():
media_file = "" media_file = ""
if url.scheme == 'http' or url.scheme == 'https': if url.scheme == 'http' or url.scheme == 'https':
# Check if we're being asked to retreive a YouTube URL, which is handled # Check if we're being asked to retreive a YouTube URL, which is
# diffrently, as we'll use a secondary site to download the video first. # handled differently, as we'll use a secondary site to download
# the video first.
if re.search('youtube.com', url.netloc, re.IGNORECASE): if re.search('youtube.com', url.netloc, re.IGNORECASE):
# Download the lowest resolution YouTube video (since we're just interested in the audio). # Download the lowest resolution YouTube video
# (since we're just interested in the audio).
# It will be saved to the current directory. # It will be saved to the current directory.
logger.info("Downloading YouTube video at url: " + args.location) logger.info("Downloading YouTube video at url: " + args.location)
@@ -76,7 +82,7 @@ def main():
'preferredcodec': 'mp3', 'preferredcodec': 'mp3',
'preferredquality': '192', 'preferredquality': '192',
}], }],
'outtmpl': 'audio', # Specify the output file path and name 'outtmpl': 'audio', # Specify output file path and name
} }
# Download the audio # Download the audio
@@ -86,7 +92,8 @@ def main():
logger.info("Saved downloaded YouTube video to: " + media_file) logger.info("Saved downloaded YouTube video to: " + media_file)
else: else:
# XXX - Download file using urllib, check if file is audio/video using python-magic # XXX - Download file using urllib, check if file is
# audio/video using python-magic
logger.info(f"Downloading file at url: {args.location}") logger.info(f"Downloading file at url: {args.location}")
logger.info(" XXX - This method hasn't been implemented yet.") logger.info(" XXX - This method hasn't been implemented yet.")
elif url.scheme == '': elif url.scheme == '':
@@ -97,7 +104,7 @@ def main():
if media_file.endswith(".m4a"): if media_file.endswith(".m4a"):
subprocess.run(["ffmpeg", "-i", media_file, f"{media_file}.mp4"]) subprocess.run(["ffmpeg", "-i", media_file, f"{media_file}.mp4"])
input_file = f"{media_file}.mp4" media_file = f"{media_file}.mp4"
else: else:
print("Unsupported URL scheme: " + url.scheme) print("Unsupported URL scheme: " + url.scheme)
quit() quit()
@@ -106,13 +113,15 @@ def main():
if not media_file.endswith(".mp3"): if not media_file.endswith(".mp3"):
try: try:
video = moviepy.editor.VideoFileClip(media_file) video = moviepy.editor.VideoFileClip(media_file)
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3",
delete=False).name
video.audio.write_audiofile(audio_filename, logger=None) video.audio.write_audiofile(audio_filename, logger=None)
logger.info(f"Extracting audio to: {audio_filename}") logger.info(f"Extracting audio to: {audio_filename}")
# Handle audio only file # Handle audio only file
except: except Exception:
audio = moviepy.editor.AudioFileClip(media_file) audio = moviepy.editor.AudioFileClip(media_file)
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3",
delete=False).name
audio.write_audiofile(audio_filename, logger=None) audio.write_audiofile(audio_filename, logger=None)
else: else:
audio_filename = media_file audio_filename = media_file
@@ -132,10 +141,12 @@ def main():
for chunk in whisper_result["chunks"]: for chunk in whisper_result["chunks"]:
transcript_text += chunk["text"] transcript_text += chunk["text"]
with open("./artefacts/transcript_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", "w") as transcript_file: with open("./artefacts/transcript_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") +
".txt", "w") as transcript_file:
transcript_file.write(transcript_text) transcript_file.write(transcript_text)
with open("./artefacts/transcript_with_timestamp_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", with open("./artefacts/transcript_with_timestamp_" +
NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt",
"w") as transcript_file_timestamps: "w") as transcript_file_timestamps:
transcript_file_timestamps.write(str(whisper_result)) transcript_file_timestamps.write(str(whisper_result))

View File

@@ -30,7 +30,8 @@ def main():
p = pyaudio.PyAudio() p = pyaudio.PyAudio()
AUDIO_DEVICE_ID = -1 AUDIO_DEVICE_ID = -1
for i in range(p.get_device_count()): for i in range(p.get_device_count()):
if p.get_device_info_by_index(i)["name"] == config["DEFAULT"]["BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME"]: if p.get_device_info_by_index(i)["name"] == \
config["DEFAULT"]["BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME"]:
AUDIO_DEVICE_ID = i AUDIO_DEVICE_ID = i
audio_devices = p.get_device_info_by_index(AUDIO_DEVICE_ID) audio_devices = p.get_device_info_by_index(AUDIO_DEVICE_ID)
stream = p.open( stream = p.open(
@@ -42,7 +43,8 @@ def main():
input_device_index=int(audio_devices['index']) input_device_index=int(audio_devices['index'])
) )
pipeline = FlaxWhisperPipline("openai/whisper-" + config["DEFAULT"]["WHISPER_REAL_TIME_MODEL_SIZE"], pipeline = FlaxWhisperPipline("openai/whisper-" +
config["DEFAULT"]["WHISPER_REAL_TIME_MODEL_SIZE"],
dtype=jnp.float16, dtype=jnp.float16,
batch_size=16) batch_size=16)
@@ -69,7 +71,8 @@ def main():
frames = [] frames = []
start_time = time.time() start_time = time.time()
for i in range(0, int(RATE / FRAMES_PER_BUFFER * RECORD_SECONDS)): for i in range(0, int(RATE / FRAMES_PER_BUFFER * RECORD_SECONDS)):
data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) data = stream.read(FRAMES_PER_BUFFER,
exception_on_overflow=False)
frames.append(data) frames.append(data)
end_time = time.time() end_time = time.time()
@@ -87,7 +90,8 @@ def main():
if end is None: if end is None:
end = start + 15.0 end = start + 15.0
duration = end - start duration = end - start
item = {'timestamp': (last_transcribed_time, last_transcribed_time + duration), item = {'timestamp': (last_transcribed_time,
last_transcribed_time + duration),
'text': whisper_result['text'], 'text': whisper_result['text'],
'stats': (str(end_time - start_time), str(duration)) 'stats': (str(end_time - start_time), str(duration))
} }
@@ -97,15 +101,19 @@ def main():
print(colored("<START>", "yellow")) print(colored("<START>", "yellow"))
print(colored(whisper_result['text'], 'green')) print(colored(whisper_result['text'], 'green'))
print(colored("<END> Recorded duration: " + str(end_time - start_time) + " | Transcribed duration: " + print(colored("<END> Recorded duration: " +
str(end_time - start_time) +
" | Transcribed duration: " +
str(duration), "yellow")) str(duration), "yellow"))
except Exception as e: except Exception as e:
print(e) print(e)
finally: finally:
with open("real_time_transcript_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", "w") as f: with open("real_time_transcript_" +
NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", "w") as f:
f.write(transcription) f.write(transcription)
with open("real_time_transcript_with_timestamp_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", "w") as f: with open("real_time_transcript_with_timestamp_" +
NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", "w") as f:
transcript_with_timestamp["text"] = transcription transcript_with_timestamp["text"] = transcription
f.write(str(transcript_with_timestamp)) f.write(str(transcript_with_timestamp))