fix youtibe download bug

This commit is contained in:
Gokul Mohanarangan
2023-07-07 14:28:43 +05:30
parent 42329211c7
commit 4636dc030b
5 changed files with 32 additions and 17 deletions

View File

@@ -4,7 +4,7 @@ KMP_DUPLICATE_LIB_OK=TRUE
# Export OpenAI API Key # Export OpenAI API Key
OPENAI_APIKEY= OPENAI_APIKEY=
# Export Whisper Model Size # Export Whisper Model Size
WHISPER_MODEL_SIZE=medium WHISPER_MODEL_SIZE=tiny
WHISPER_REAL_TIME_MODEL_SIZE=tiny WHISPER_REAL_TIME_MODEL_SIZE=tiny
# AWS config # AWS config
AWS_ACCESS_KEY=***REMOVED*** AWS_ACCESS_KEY=***REMOVED***

View File

@@ -49,3 +49,4 @@ jupyter
seaborn seaborn
matplotlib matplotlib
termcolor termcolor
https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz

View File

@@ -20,7 +20,6 @@ def preprocess_sentence(sentence):
def compute_similarity(sent1, sent2): def compute_similarity(sent1, sent2):
tfidf_vectorizer = TfidfVectorizer() tfidf_vectorizer = TfidfVectorizer()
print("semt1", sent1, sent2)
if sent1 is not None and sent2 is not None: if sent1 is not None and sent2 is not None:
tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2]) tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2])
return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0] return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]

View File

@@ -16,7 +16,7 @@ import subprocess
import re import re
import tempfile import tempfile
from loguru import logger from loguru import logger
from pytube import YouTube import yt_dlp as youtube_dl
from urllib.parse import urlparse from urllib.parse import urlparse
from whisper_jax import FlaxWhisperPipline from whisper_jax import FlaxWhisperPipline
@@ -73,9 +73,21 @@ def main():
# It will be saved to the current directory. # It will be saved to the current directory.
logger.info("Downloading YouTube video at url: " + args.location) logger.info("Downloading YouTube video at url: " + args.location)
youtube = YouTube(args.location) # Create options for the download
media_file = youtube.streams.filter(progressive=True, file_extension='mp4').order_by( ydl_opts = {
'resolution').asc().first().download() 'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'outtmpl': 'audio', # Specify the output file path and name
}
# Download the audio
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([args.location])
media_file = "audio.mp3"
logger.info("Saved downloaded YouTube video to: " + media_file) logger.info("Saved downloaded YouTube video to: " + media_file)
else: else:
@@ -96,6 +108,7 @@ def main():
quit() quit()
# Handle video # Handle video
if not media_file.endswith(".mp3"):
try: try:
video = moviepy.editor.VideoFileClip(media_file) video = moviepy.editor.VideoFileClip(media_file)
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
@@ -106,6 +119,8 @@ def main():
audio = moviepy.editor.AudioFileClip(media_file) audio = moviepy.editor.AudioFileClip(media_file)
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
audio.write_audiofile(audio_filename, logger=None) audio.write_audiofile(audio_filename, logger=None)
else:
audio_filename = media_file
logger.info("Finished extracting audio") logger.info("Finished extracting audio")

0
youtube.py Normal file
View File