mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
flake8 warnings fix
This commit is contained in:
43
whisjax.py
43
whisjax.py
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# summarize https://www.youtube.com/watch?v=imzTxoEDH_g --transcript=transcript.txt summary.txt
|
||||
# summarize https://www.youtube.com/watch?v=imzTxoEDH_g
|
||||
# summarize https://www.sprocket.org/video/cheesemaking.mp4 summary.txt
|
||||
# summarize podcast.mp3 summary.txt
|
||||
|
||||
@@ -14,7 +14,6 @@ from urllib.parse import urlparse
|
||||
|
||||
import jax.numpy as jnp
|
||||
import moviepy.editor
|
||||
import moviepy.editor
|
||||
import nltk
|
||||
import yt_dlp as youtube_dl
|
||||
from whisper_jax import FlaxWhisperPipline
|
||||
@@ -39,11 +38,16 @@ def init_argparse() -> argparse.ArgumentParser:
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
usage="%(prog)s [OPTIONS] <LOCATION> <OUTPUT>",
|
||||
description="Creates a transcript of a video or audio file, then summarizes it using ChatGPT."
|
||||
description="Creates a transcript of a video or audio file, then"
|
||||
" summarizes it using ChatGPT."
|
||||
)
|
||||
|
||||
parser.add_argument("-l", "--language", help="Language that the summary should be written in", type=str,
|
||||
default="english", choices=['english', 'spanish', 'french', 'german', 'romanian'])
|
||||
parser.add_argument("-l", "--language",
|
||||
help="Language that the summary should be written in",
|
||||
type=str,
|
||||
default="english",
|
||||
choices=['english', 'spanish', 'french', 'german',
|
||||
'romanian'])
|
||||
parser.add_argument("location")
|
||||
return parser
|
||||
|
||||
@@ -61,10 +65,12 @@ def main():
|
||||
|
||||
media_file = ""
|
||||
if url.scheme == 'http' or url.scheme == 'https':
|
||||
# Check if we're being asked to retreive a YouTube URL, which is handled
|
||||
# diffrently, as we'll use a secondary site to download the video first.
|
||||
# Check if we're being asked to retreive a YouTube URL, which is
|
||||
# handled differently, as we'll use a secondary site to download
|
||||
# the video first.
|
||||
if re.search('youtube.com', url.netloc, re.IGNORECASE):
|
||||
# Download the lowest resolution YouTube video (since we're just interested in the audio).
|
||||
# Download the lowest resolution YouTube video
|
||||
# (since we're just interested in the audio).
|
||||
# It will be saved to the current directory.
|
||||
logger.info("Downloading YouTube video at url: " + args.location)
|
||||
|
||||
@@ -76,7 +82,7 @@ def main():
|
||||
'preferredcodec': 'mp3',
|
||||
'preferredquality': '192',
|
||||
}],
|
||||
'outtmpl': 'audio', # Specify the output file path and name
|
||||
'outtmpl': 'audio', # Specify output file path and name
|
||||
}
|
||||
|
||||
# Download the audio
|
||||
@@ -86,7 +92,8 @@ def main():
|
||||
|
||||
logger.info("Saved downloaded YouTube video to: " + media_file)
|
||||
else:
|
||||
# XXX - Download file using urllib, check if file is audio/video using python-magic
|
||||
# XXX - Download file using urllib, check if file is
|
||||
# audio/video using python-magic
|
||||
logger.info(f"Downloading file at url: {args.location}")
|
||||
logger.info(" XXX - This method hasn't been implemented yet.")
|
||||
elif url.scheme == '':
|
||||
@@ -97,7 +104,7 @@ def main():
|
||||
|
||||
if media_file.endswith(".m4a"):
|
||||
subprocess.run(["ffmpeg", "-i", media_file, f"{media_file}.mp4"])
|
||||
input_file = f"{media_file}.mp4"
|
||||
media_file = f"{media_file}.mp4"
|
||||
else:
|
||||
print("Unsupported URL scheme: " + url.scheme)
|
||||
quit()
|
||||
@@ -106,13 +113,15 @@ def main():
|
||||
if not media_file.endswith(".mp3"):
|
||||
try:
|
||||
video = moviepy.editor.VideoFileClip(media_file)
|
||||
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
|
||||
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3",
|
||||
delete=False).name
|
||||
video.audio.write_audiofile(audio_filename, logger=None)
|
||||
logger.info(f"Extracting audio to: {audio_filename}")
|
||||
# Handle audio only file
|
||||
except:
|
||||
except Exception:
|
||||
audio = moviepy.editor.AudioFileClip(media_file)
|
||||
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
|
||||
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3",
|
||||
delete=False).name
|
||||
audio.write_audiofile(audio_filename, logger=None)
|
||||
else:
|
||||
audio_filename = media_file
|
||||
@@ -132,10 +141,12 @@ def main():
|
||||
for chunk in whisper_result["chunks"]:
|
||||
transcript_text += chunk["text"]
|
||||
|
||||
with open("./artefacts/transcript_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt", "w") as transcript_file:
|
||||
with open("./artefacts/transcript_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") +
|
||||
".txt", "w") as transcript_file:
|
||||
transcript_file.write(transcript_text)
|
||||
|
||||
with open("./artefacts/transcript_with_timestamp_" + NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt",
|
||||
with open("./artefacts/transcript_with_timestamp_" +
|
||||
NOW.strftime("%m-%d-%Y_%H:%M:%S") + ".txt",
|
||||
"w") as transcript_file_timestamps:
|
||||
transcript_file_timestamps.write(str(whisper_result))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user