From 9d2d6dd7e43677bfbdc6759d4e9a13c0c7018213 Mon Sep 17 00:00:00 2001 From: gokul Date: Wed, 14 Jun 2023 23:31:46 +0530 Subject: [PATCH] update --- .DS_Store | Bin 0 -> 6148 bytes config.ini | 3 +-- whisjax.py | 54 ++++++++++++++++++++++++++--------------------------- 3 files changed, 28 insertions(+), 29 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..099db8afcb5e121a888cbe6ba013b3decb1d208f GIT binary patch literal 6148 zcmeHK%}T>S5T0$TO{qc;3OxqA7VMAsgO^b23mDOZN=!)6V9b^#HHT8jSzpK}@p+ut z-HN65;z6X$!0b0WJCkL<4LiF4Ao`QA1JD2f3ze``!{!H}andCz84saQ-|!#>AD&?Z z@lrH9{v!jlcUzFb5O*$v@B4?IAWug@ECY;r4@Pm6X6^QeD3vQ4n^mi7)vY`CQRZ&u zXOpz=k8h}VDPg+L!@m;8zUL`Cy|GItB}k>gd3Ru8%ZcAtXVY z-V%hiLC0XB5l2vjPDRwI!aOmAPDj6O;v9p8Mx72q&5YkMGYj)V5o&hy+bSG{qmf%? zfEie1psc$Ms{iM!-~WqA++zlqfwf{lR0i&#k4rMOb!l-_Yi-nfR1%6SG=8O^p-M5v eQYmhtDnY+Z2BKrI(1;!sz6dB9xM2qVlz~q*xKEz| literal 0 HcmV?d00001 diff --git a/config.ini b/config.ini index 9c96931a..9d4a1d6a 100644 --- a/config.ini +++ b/config.ini @@ -7,5 +7,4 @@ OPENAI_APIKEY= WHISPER_MODEL_SIZE=tiny AWS_ACCESS_KEY=***REMOVED*** AWS_SECRET_KEY=***REMOVED*** -BUCKET_NAME='reflector-bucket' - +BUCKET_NAME='reflector-bucket' \ No newline at end of file diff --git a/whisjax.py b/whisjax.py index 38ddec9f..3ce31c67 100644 --- a/whisjax.py +++ b/whisjax.py @@ -223,32 +223,32 @@ def create_talk_diff_scatter_viz(): # to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") ) # pick the 2 most matched topic to be used for plotting - topic_times = collections.defaultdict(int) - for key in ts_to_topic_mapping_top_1.keys(): - duration = key[1] - key[0] - topic_times[ts_to_topic_mapping_top_1[key]] += duration - - topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True) - - cat_1 = topic_times[0][0] - cat_1_name = topic_times[0][0] - cat_2_name = topic_times[1][0] - - # Scatter plot of topics - df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)) - corpus = st.CorpusFromParsedDocuments( - df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse' - ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) - html = st.produce_scattertext_explorer( - corpus, - category=cat_1, - category_name=cat_1_name, - not_category_name=cat_2_name, - minimum_term_frequency=0, pmi_threshold_coefficient=0, - width_in_pixels=1000, - transform=st.Scalers.dense_rank - ) - open('./demo_compact.html', 'w').write(html) + # topic_times = collections.defaultdict(int) + # for key in ts_to_topic_mapping_top_1.keys(): + # duration = key[1] - key[0] + # topic_times[ts_to_topic_mapping_top_1[key]] += duration + # + # topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True) + # + # cat_1 = topic_times[0][0] + # cat_1_name = topic_times[0][0] + # cat_2_name = topic_times[1][0] + # + # # Scatter plot of topics + # df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)) + # corpus = st.CorpusFromParsedDocuments( + # df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse' + # ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) + # html = st.produce_scattertext_explorer( + # corpus, + # category=cat_1, + # category_name=cat_1_name, + # not_category_name=cat_2_name, + # minimum_term_frequency=0, pmi_threshold_coefficient=0, + # width_in_pixels=1000, + # transform=st.Scalers.dense_rank + # ) + # open('./demo_compact.html', 'w').write(html) def main(): @@ -333,7 +333,7 @@ def main(): # S3 : Push artefacts to S3 bucket files_to_upload = ["transcript.txt", "transcript_timestamps.txt", - "demo_compact.html", "df.pkl", + "df.pkl", "wordcloud.png", "mappings.pkl"] upload_files(files_to_upload)