mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
fix bug in wordcloud
This commit is contained in:
@@ -27,10 +27,8 @@ To setup,
|
|||||||
|
|
||||||
```sh setup_dependencies.sh cuda12```
|
```sh setup_dependencies.sh cuda12```
|
||||||
|
|
||||||
4) ``` pip install -r requirements.txt```
|
|
||||||
|
|
||||||
|
4) Run the Whisper-JAX pipeline. Currently, the repo can take a Youtube video and transcribes/summarizes it.
|
||||||
5) Run the Whisper-JAX pipeline. Currently, the repo can take a Youtube video and transcribes/summarizes it.
|
|
||||||
|
|
||||||
``` python3 whisjax.py "https://www.youtube.com/watch?v=ihf0S97oxuQ"```
|
``` python3 whisjax.py "https://www.youtube.com/watch?v=ihf0S97oxuQ"```
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,4 +1,4 @@
|
|||||||
# Upgrade pip
|
Upgrade pip
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
|
|
||||||
# Default to CPU Installation of JAX
|
# Default to CPU Installation of JAX
|
||||||
@@ -24,3 +24,10 @@ pip install git+https://github.com/sanchit-gandhi/whisper-jax.git
|
|||||||
# Update to latest version
|
# Update to latest version
|
||||||
pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git
|
pip install --upgrade --no-deps --force-reinstall git+https://github.com/sanchit-gandhi/whisper-jax.git
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# download spacy models
|
||||||
|
export KMP_DUPLICATE_LIB_OK=True
|
||||||
|
python -m spacy download en_core_web_sm
|
||||||
|
python -m spacy download en_core_web_md
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from wordcloud import WordCloud, STOPWORDS
|
from wordcloud import WordCloud, STOPWORDS
|
||||||
from nltk.corpus import stopwords as nltk_stopwords
|
from nltk.corpus import stopwords
|
||||||
import collections
|
import collections
|
||||||
import spacy
|
import spacy
|
||||||
import pickle
|
import pickle
|
||||||
@@ -15,7 +15,7 @@ config.read('config.ini')
|
|||||||
en = spacy.load('en_core_web_md')
|
en = spacy.load('en_core_web_md')
|
||||||
spacy_stopwords = en.Defaults.stop_words
|
spacy_stopwords = en.Defaults.stop_words
|
||||||
|
|
||||||
STOPWORDS = set(STOPWORDS).union(set(nltk_stopwords)).union(set(spacy_stopwords))
|
STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))).union(set(spacy_stopwords))
|
||||||
|
|
||||||
def create_wordcloud(timestamp, real_time=False):
|
def create_wordcloud(timestamp, real_time=False):
|
||||||
"""
|
"""
|
||||||
@@ -195,4 +195,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
|
|||||||
width_in_pixels=1000,
|
width_in_pixels=1000,
|
||||||
transform=st.Scalers.dense_rank
|
transform=st.Scalers.dense_rank
|
||||||
)
|
)
|
||||||
open('./scatter_' + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html)
|
if real_time:
|
||||||
|
open('./real_time_scatter_' + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html)
|
||||||
|
else:
|
||||||
|
open('./scatter_' + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + '.html', 'w').write(html)
|
||||||
@@ -26,8 +26,8 @@ from file_utilities import upload_files, download_files
|
|||||||
from viz_utilities import create_wordcloud, create_talk_diff_scatter_viz
|
from viz_utilities import create_wordcloud, create_talk_diff_scatter_viz
|
||||||
from text_utilities import summarize, post_process_transcription
|
from text_utilities import summarize, post_process_transcription
|
||||||
|
|
||||||
nltk.download('punkt')
|
nltk.download('punkt', quiet=True)
|
||||||
nltk.download('stopwords')
|
nltk.download('stopwords', quiet=True)
|
||||||
|
|
||||||
# Configurations can be found in config.ini. Set them properly before executing
|
# Configurations can be found in config.ini. Set them properly before executing
|
||||||
config = configparser.ConfigParser()
|
config = configparser.ConfigParser()
|
||||||
@@ -141,7 +141,8 @@ def main():
|
|||||||
"transcript_with_timestamp_" + suffix + ".txt",
|
"transcript_with_timestamp_" + suffix + ".txt",
|
||||||
"df_" + suffix + ".pkl",
|
"df_" + suffix + ".pkl",
|
||||||
"wordcloud_" + suffix + ".png",
|
"wordcloud_" + suffix + ".png",
|
||||||
"mappings_" + suffix + ".pkl"]
|
"mappings_" + suffix + ".pkl",
|
||||||
|
"scatter_" + suffix + ".html"]
|
||||||
upload_files(files_to_upload)
|
upload_files(files_to_upload)
|
||||||
|
|
||||||
summarize(transcript_text, NOW, False, False)
|
summarize(transcript_text, NOW, False, False)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from viz_utilities import create_wordcloud, create_talk_diff_scatter_viz
|
|||||||
from text_utilities import summarize, post_process_transcription
|
from text_utilities import summarize, post_process_transcription
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
import nltk
|
import nltk
|
||||||
nltk.download('stopwords')
|
nltk.download('stopwords', quiet=True)
|
||||||
|
|
||||||
config = configparser.ConfigParser()
|
config = configparser.ConfigParser()
|
||||||
config.read('config.ini')
|
config.read('config.ini')
|
||||||
@@ -118,7 +118,8 @@ def main():
|
|||||||
"real_time_transcript_with_timestamp" + suffix + ".txt",
|
"real_time_transcript_with_timestamp" + suffix + ".txt",
|
||||||
"real_time_df_" + suffix + ".pkl",
|
"real_time_df_" + suffix + ".pkl",
|
||||||
"real_time_wordcloud_" + suffix + ".png",
|
"real_time_wordcloud_" + suffix + ".png",
|
||||||
"real_time_mappings_" + suffix + ".pkl"]
|
"real_time_mappings_" + suffix + ".pkl",
|
||||||
|
"real_time_scatter_" + suffix + ".html"]
|
||||||
upload_files(files_to_upload)
|
upload_files(files_to_upload)
|
||||||
|
|
||||||
summarize(transcript_with_timestamp["text"], NOW, True, True)
|
summarize(transcript_with_timestamp["text"], NOW, True, True)
|
||||||
|
|||||||
Reference in New Issue
Block a user