diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..099db8af Binary files /dev/null and b/.DS_Store differ diff --git a/config.ini b/config.ini index 9c96931a..9d4a1d6a 100644 --- a/config.ini +++ b/config.ini @@ -7,5 +7,4 @@ OPENAI_APIKEY= WHISPER_MODEL_SIZE=tiny AWS_ACCESS_KEY=***REMOVED*** AWS_SECRET_KEY=***REMOVED*** -BUCKET_NAME='reflector-bucket' - +BUCKET_NAME='reflector-bucket' \ No newline at end of file diff --git a/whisjax.py b/whisjax.py index 38ddec9f..3ce31c67 100644 --- a/whisjax.py +++ b/whisjax.py @@ -223,32 +223,32 @@ def create_talk_diff_scatter_viz(): # to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") ) # pick the 2 most matched topic to be used for plotting - topic_times = collections.defaultdict(int) - for key in ts_to_topic_mapping_top_1.keys(): - duration = key[1] - key[0] - topic_times[ts_to_topic_mapping_top_1[key]] += duration - - topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True) - - cat_1 = topic_times[0][0] - cat_1_name = topic_times[0][0] - cat_2_name = topic_times[1][0] - - # Scatter plot of topics - df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)) - corpus = st.CorpusFromParsedDocuments( - df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse' - ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) - html = st.produce_scattertext_explorer( - corpus, - category=cat_1, - category_name=cat_1_name, - not_category_name=cat_2_name, - minimum_term_frequency=0, pmi_threshold_coefficient=0, - width_in_pixels=1000, - transform=st.Scalers.dense_rank - ) - open('./demo_compact.html', 'w').write(html) + # topic_times = collections.defaultdict(int) + # for key in ts_to_topic_mapping_top_1.keys(): + # duration = key[1] - key[0] + # topic_times[ts_to_topic_mapping_top_1[key]] += duration + # + # topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True) + # + # cat_1 = topic_times[0][0] + # cat_1_name = topic_times[0][0] + # cat_2_name = topic_times[1][0] + # + # # Scatter plot of topics + # df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)) + # corpus = st.CorpusFromParsedDocuments( + # df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse' + # ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) + # html = st.produce_scattertext_explorer( + # corpus, + # category=cat_1, + # category_name=cat_1_name, + # not_category_name=cat_2_name, + # minimum_term_frequency=0, pmi_threshold_coefficient=0, + # width_in_pixels=1000, + # transform=st.Scalers.dense_rank + # ) + # open('./demo_compact.html', 'w').write(html) def main(): @@ -333,7 +333,7 @@ def main(): # S3 : Push artefacts to S3 bucket files_to_upload = ["transcript.txt", "transcript_timestamps.txt", - "demo_compact.html", "df.pkl", + "df.pkl", "wordcloud.png", "mappings.pkl"] upload_files(files_to_upload)