flake8 warnings fix

This commit is contained in:
Gokul Mohanarangan
2023-07-11 14:06:20 +05:30
parent 88af112131
commit d962ff1712
10 changed files with 122 additions and 70 deletions

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# summarize https://www.youtube.com/watch?v=imzTxoEDH_g --transcript=transcript.txt summary.txt
# summarize https://www.youtube.com/watch?v=imzTxoEDH_g
# summarize https://www.sprocket.org/video/cheesemaking.mp4 summary.txt
# summarize podcast.mp3 summary.txt
@@ -14,7 +14,6 @@ from urllib.parse import urlparse
import jax.numpy as jnp
import moviepy.editor
import moviepy.editor
import nltk
import yt_dlp as youtube_dl
from whisper_jax import FlaxWhisperPipline
@@ -39,11 +38,16 @@ def init_argparse() -> argparse.ArgumentParser:
"""
parser = argparse.ArgumentParser(
usage="%(prog)s [OPTIONS] <LOCATION> <OUTPUT>",
description="Creates a transcript of a video or audio file, then summarizes it using ChatGPT."
description="Creates a transcript of a video or audio file, then"
" summarizes it using ChatGPT."
)
parser.add_argument("-l", "--language", help="Language that the summary should be written in", type=str,
default="english", choices=['english', 'spanish', 'french', 'german', 'romanian'])
parser.add_argument("-l", "--language",
help="Language that the summary should be written in",
type=str,
default="english",
choices=['english', 'spanish', 'french', 'german',
'romanian'])
parser.add_argument("location")
return parser
@@ -61,10 +65,12 @@ def main():
media_file = ""
if url.scheme == 'http' or url.scheme == 'https':
# Check if we're being asked to retreive a YouTube URL, which is handled
# diffrently, as we'll use a secondary site to download the video first.
# Check if we're being asked to retreive a YouTube URL, which is
# handled differently, as we'll use a secondary site to download
# the video first.
if re.search('youtube.com', url.netloc, re.IGNORECASE):
# Download the lowest resolution YouTube video (since we're just interested in the audio).
# Download the lowest resolution YouTube video
# (since we're just interested in the audio).
# It will be saved to the current directory.
logger.info("Downloading YouTube video at url: " + args.location)
@@ -76,7 +82,7 @@ def main():
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'outtmpl': 'audio', # Specify the output file path and name
'outtmpl': 'audio', # Specify output file path and name
}
# Download the audio
@@ -86,7 +92,8 @@ def main():
logger.info("Saved downloaded YouTube video to: " + media_file)
else:
# XXX - Download file using urllib, check if file is audio/video using python-magic
# XXX - Download file using urllib, check if file is
# audio/video using python-magic
logger.info(f"Downloading file at url: {args.location}")
logger.info(" XXX - This method hasn't been implemented yet.")
elif url.scheme == '':
@@ -97,7 +104,7 @@ def main():
if media_file.endswith(".m4a"):
subprocess.run(["ffmpeg", "-i", media_file, f"{media_file}.mp4"])
input_file = f"{media_file}.mp4"
media_file = f"{media_file}.mp4"
else:
print("Unsupported URL scheme: " + url.scheme)
quit()
@@ -106,13 +113,15 @@ def main():
if not media_file.endswith(".mp3"):
try:
video = moviepy.editor.VideoFileClip(media_file)
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3",
delete=False).name
video.audio.write_audiofile(audio_filename, logger=None)
logger.info(f"Extracting audio to: {audio_filename}")
# Handle audio only file
except:
except Exception:
audio = moviepy.editor.AudioFileClip(media_file)
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3",
delete=False).name
audio.write_audiofile(audio_filename, logger=None)
else:
audio_filename = media_file
@@ -132,10 +141,12 @@ def main():
for chunk in whisper_result["chunks"]:
transcript_text += chunk["text"]
with open("./artefacts/transcript_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", "w") as transcript_file:
with open("./artefacts/transcript_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") +
".txt", "w") as transcript_file:
transcript_file.write(transcript_text)
with open("./artefacts/transcript_with_timestamp_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt",
with open("./artefacts/transcript_with_timestamp_" +
NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt",
"w") as transcript_file_timestamps:
transcript_file_timestamps.write(str(whisper_result))