mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
update
This commit is contained in:
@@ -7,5 +7,4 @@ OPENAI_APIKEY=
|
|||||||
WHISPER_MODEL_SIZE=tiny
|
WHISPER_MODEL_SIZE=tiny
|
||||||
AWS_ACCESS_KEY=***REMOVED***
|
AWS_ACCESS_KEY=***REMOVED***
|
||||||
AWS_SECRET_KEY=***REMOVED***
|
AWS_SECRET_KEY=***REMOVED***
|
||||||
BUCKET_NAME='reflector-bucket'
|
BUCKET_NAME='reflector-bucket'
|
||||||
|
|
||||||
54
whisjax.py
54
whisjax.py
@@ -223,32 +223,32 @@ def create_talk_diff_scatter_viz():
|
|||||||
# to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") )
|
# to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") )
|
||||||
|
|
||||||
# pick the 2 most matched topic to be used for plotting
|
# pick the 2 most matched topic to be used for plotting
|
||||||
topic_times = collections.defaultdict(int)
|
# topic_times = collections.defaultdict(int)
|
||||||
for key in ts_to_topic_mapping_top_1.keys():
|
# for key in ts_to_topic_mapping_top_1.keys():
|
||||||
duration = key[1] - key[0]
|
# duration = key[1] - key[0]
|
||||||
topic_times[ts_to_topic_mapping_top_1[key]] += duration
|
# topic_times[ts_to_topic_mapping_top_1[key]] += duration
|
||||||
|
#
|
||||||
topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
|
# topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
#
|
||||||
cat_1 = topic_times[0][0]
|
# cat_1 = topic_times[0][0]
|
||||||
cat_1_name = topic_times[0][0]
|
# cat_1_name = topic_times[0][0]
|
||||||
cat_2_name = topic_times[1][0]
|
# cat_2_name = topic_times[1][0]
|
||||||
|
#
|
||||||
# Scatter plot of topics
|
# # Scatter plot of topics
|
||||||
df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
|
# df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
|
||||||
corpus = st.CorpusFromParsedDocuments(
|
# corpus = st.CorpusFromParsedDocuments(
|
||||||
df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'
|
# df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'
|
||||||
).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
|
# ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
|
||||||
html = st.produce_scattertext_explorer(
|
# html = st.produce_scattertext_explorer(
|
||||||
corpus,
|
# corpus,
|
||||||
category=cat_1,
|
# category=cat_1,
|
||||||
category_name=cat_1_name,
|
# category_name=cat_1_name,
|
||||||
not_category_name=cat_2_name,
|
# not_category_name=cat_2_name,
|
||||||
minimum_term_frequency=0, pmi_threshold_coefficient=0,
|
# minimum_term_frequency=0, pmi_threshold_coefficient=0,
|
||||||
width_in_pixels=1000,
|
# width_in_pixels=1000,
|
||||||
transform=st.Scalers.dense_rank
|
# transform=st.Scalers.dense_rank
|
||||||
)
|
# )
|
||||||
open('./demo_compact.html', 'w').write(html)
|
# open('./demo_compact.html', 'w').write(html)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -333,7 +333,7 @@ def main():
|
|||||||
|
|
||||||
# S3 : Push artefacts to S3 bucket
|
# S3 : Push artefacts to S3 bucket
|
||||||
files_to_upload = ["transcript.txt", "transcript_timestamps.txt",
|
files_to_upload = ["transcript.txt", "transcript_timestamps.txt",
|
||||||
"demo_compact.html", "df.pkl",
|
"df.pkl",
|
||||||
"wordcloud.png", "mappings.pkl"]
|
"wordcloud.png", "mappings.pkl"]
|
||||||
upload_files(files_to_upload)
|
upload_files(files_to_upload)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user