mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
fix youtibe download bug
This commit is contained in:
@@ -4,7 +4,7 @@ KMP_DUPLICATE_LIB_OK=TRUE
|
|||||||
# Export OpenAI API Key
|
# Export OpenAI API Key
|
||||||
OPENAI_APIKEY=
|
OPENAI_APIKEY=
|
||||||
# Export Whisper Model Size
|
# Export Whisper Model Size
|
||||||
WHISPER_MODEL_SIZE=medium
|
WHISPER_MODEL_SIZE=tiny
|
||||||
WHISPER_REAL_TIME_MODEL_SIZE=tiny
|
WHISPER_REAL_TIME_MODEL_SIZE=tiny
|
||||||
# AWS config
|
# AWS config
|
||||||
AWS_ACCESS_KEY=***REMOVED***
|
AWS_ACCESS_KEY=***REMOVED***
|
||||||
|
|||||||
@@ -49,3 +49,4 @@ jupyter
|
|||||||
seaborn
|
seaborn
|
||||||
matplotlib
|
matplotlib
|
||||||
termcolor
|
termcolor
|
||||||
|
https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ def preprocess_sentence(sentence):
|
|||||||
|
|
||||||
def compute_similarity(sent1, sent2):
|
def compute_similarity(sent1, sent2):
|
||||||
tfidf_vectorizer = TfidfVectorizer()
|
tfidf_vectorizer = TfidfVectorizer()
|
||||||
print("semt1", sent1, sent2)
|
|
||||||
if sent1 is not None and sent2 is not None:
|
if sent1 is not None and sent2 is not None:
|
||||||
tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2])
|
tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2])
|
||||||
return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
|
return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
|
||||||
|
|||||||
23
whisjax.py
23
whisjax.py
@@ -16,7 +16,7 @@ import subprocess
|
|||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from pytube import YouTube
|
import yt_dlp as youtube_dl
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from whisper_jax import FlaxWhisperPipline
|
from whisper_jax import FlaxWhisperPipline
|
||||||
@@ -73,9 +73,21 @@ def main():
|
|||||||
# It will be saved to the current directory.
|
# It will be saved to the current directory.
|
||||||
logger.info("Downloading YouTube video at url: " + args.location)
|
logger.info("Downloading YouTube video at url: " + args.location)
|
||||||
|
|
||||||
youtube = YouTube(args.location)
|
# Create options for the download
|
||||||
media_file = youtube.streams.filter(progressive=True, file_extension='mp4').order_by(
|
ydl_opts = {
|
||||||
'resolution').asc().first().download()
|
'format': 'bestaudio/best',
|
||||||
|
'postprocessors': [{
|
||||||
|
'key': 'FFmpegExtractAudio',
|
||||||
|
'preferredcodec': 'mp3',
|
||||||
|
'preferredquality': '192',
|
||||||
|
}],
|
||||||
|
'outtmpl': 'audio', # Specify the output file path and name
|
||||||
|
}
|
||||||
|
|
||||||
|
# Download the audio
|
||||||
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
ydl.download([args.location])
|
||||||
|
media_file = "audio.mp3"
|
||||||
|
|
||||||
logger.info("Saved downloaded YouTube video to: " + media_file)
|
logger.info("Saved downloaded YouTube video to: " + media_file)
|
||||||
else:
|
else:
|
||||||
@@ -96,6 +108,7 @@ def main():
|
|||||||
quit()
|
quit()
|
||||||
|
|
||||||
# Handle video
|
# Handle video
|
||||||
|
if not media_file.endswith(".mp3"):
|
||||||
try:
|
try:
|
||||||
video = moviepy.editor.VideoFileClip(media_file)
|
video = moviepy.editor.VideoFileClip(media_file)
|
||||||
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
|
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
|
||||||
@@ -106,6 +119,8 @@ def main():
|
|||||||
audio = moviepy.editor.AudioFileClip(media_file)
|
audio = moviepy.editor.AudioFileClip(media_file)
|
||||||
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
|
audio_filename = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
|
||||||
audio.write_audiofile(audio_filename, logger=None)
|
audio.write_audiofile(audio_filename, logger=None)
|
||||||
|
else:
|
||||||
|
audio_filename = media_file
|
||||||
|
|
||||||
logger.info("Finished extracting audio")
|
logger.info("Finished extracting audio")
|
||||||
|
|
||||||
|
|||||||
0
youtube.py
Normal file
0
youtube.py
Normal file
Reference in New Issue
Block a user