Merge pull request #13 from Monadical-SAS/whisper-jax-gokul

update
2026-02-04 09:56:47 +00:00 · 2023-06-14 23:33:18 +05:30
parent 9269a519bd 9d2d6dd7e4
commit 6c7c09d92e
3 changed files with 28 additions and 29 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/config.ini
+++ b/config.ini
@@ -7,5 +7,4 @@ OPENAI_APIKEY=
 WHISPER_MODEL_SIZE=tiny
 AWS_ACCESS_KEY=***REMOVED***
 AWS_SECRET_KEY=***REMOVED***
-BUCKET_NAME='reflector-bucket'
-
+BUCKET_NAME='reflector-bucket'
--- a/whisjax.py
+++ b/whisjax.py
@@ -223,32 +223,32 @@ def create_talk_diff_scatter_viz():
    # to load,  my_mappings = pickle.load( open ("mappings.pkl", "rb") )

    # pick the 2 most matched topic to be used for plotting
-    topic_times = collections.defaultdict(int)
-    for key in ts_to_topic_mapping_top_1.keys():
-        duration = key[1] - key[0]
-        topic_times[ts_to_topic_mapping_top_1[key]] += duration
-
-    topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
-
-    cat_1 = topic_times[0][0]
-    cat_1_name = topic_times[0][0]
-    cat_2_name = topic_times[1][0]
-
-    # Scatter plot of topics
-    df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
-    corpus = st.CorpusFromParsedDocuments(
-        df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'
-    ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
-    html = st.produce_scattertext_explorer(
-        corpus,
-        category=cat_1,
-        category_name=cat_1_name,
-        not_category_name=cat_2_name,
-        minimum_term_frequency=0, pmi_threshold_coefficient=0,
-        width_in_pixels=1000,
-        transform=st.Scalers.dense_rank
-    )
-    open('./demo_compact.html', 'w').write(html)
+    # topic_times = collections.defaultdict(int)
+    # for key in ts_to_topic_mapping_top_1.keys():
+    #     duration = key[1] - key[0]
+    #     topic_times[ts_to_topic_mapping_top_1[key]] += duration
+    #
+    # topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
+    #
+    # cat_1 = topic_times[0][0]
+    # cat_1_name = topic_times[0][0]
+    # cat_2_name = topic_times[1][0]
+    #
+    # # Scatter plot of topics
+    # df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
+    # corpus = st.CorpusFromParsedDocuments(
+    #     df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'
+    # ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
+    # html = st.produce_scattertext_explorer(
+    #     corpus,
+    #     category=cat_1,
+    #     category_name=cat_1_name,
+    #     not_category_name=cat_2_name,
+    #     minimum_term_frequency=0, pmi_threshold_coefficient=0,
+    #     width_in_pixels=1000,
+    #     transform=st.Scalers.dense_rank
+    # )
+    # open('./demo_compact.html', 'w').write(html)


 def main():
@@ -333,7 +333,7 @@ def main():

    # S3 : Push artefacts to S3 bucket
    files_to_upload = ["transcript.txt", "transcript_timestamps.txt",
-                       "demo_compact.html", "df.pkl",
+                       "df.pkl",
                       "wordcloud.png", "mappings.pkl"]
    upload_files(files_to_upload)