This commit is contained in:
gokul
2023-06-14 23:31:46 +05:30
parent 05dac39d4e
commit 9d2d6dd7e4
3 changed files with 28 additions and 29 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View File

@@ -7,5 +7,4 @@ OPENAI_APIKEY=
WHISPER_MODEL_SIZE=tiny WHISPER_MODEL_SIZE=tiny
AWS_ACCESS_KEY=***REMOVED*** AWS_ACCESS_KEY=***REMOVED***
AWS_SECRET_KEY=***REMOVED*** AWS_SECRET_KEY=***REMOVED***
BUCKET_NAME='reflector-bucket' BUCKET_NAME='reflector-bucket'

View File

@@ -223,32 +223,32 @@ def create_talk_diff_scatter_viz():
# to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") ) # to load, my_mappings = pickle.load( open ("mappings.pkl", "rb") )
# pick the 2 most matched topic to be used for plotting # pick the 2 most matched topic to be used for plotting
topic_times = collections.defaultdict(int) # topic_times = collections.defaultdict(int)
for key in ts_to_topic_mapping_top_1.keys(): # for key in ts_to_topic_mapping_top_1.keys():
duration = key[1] - key[0] # duration = key[1] - key[0]
topic_times[ts_to_topic_mapping_top_1[key]] += duration # topic_times[ts_to_topic_mapping_top_1[key]] += duration
#
topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True) # topic_times = sorted(topic_times.items(), key=lambda x: x[1], reverse=True)
#
cat_1 = topic_times[0][0] # cat_1 = topic_times[0][0]
cat_1_name = topic_times[0][0] # cat_1_name = topic_times[0][0]
cat_2_name = topic_times[1][0] # cat_2_name = topic_times[1][0]
#
# Scatter plot of topics # # Scatter plot of topics
df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)) # df = df.assign(parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))
corpus = st.CorpusFromParsedDocuments( # corpus = st.CorpusFromParsedDocuments(
df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse' # df, category_col='ts_to_topic_mapping_top_1', parsed_col='parse'
).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) # ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
html = st.produce_scattertext_explorer( # html = st.produce_scattertext_explorer(
corpus, # corpus,
category=cat_1, # category=cat_1,
category_name=cat_1_name, # category_name=cat_1_name,
not_category_name=cat_2_name, # not_category_name=cat_2_name,
minimum_term_frequency=0, pmi_threshold_coefficient=0, # minimum_term_frequency=0, pmi_threshold_coefficient=0,
width_in_pixels=1000, # width_in_pixels=1000,
transform=st.Scalers.dense_rank # transform=st.Scalers.dense_rank
) # )
open('./demo_compact.html', 'w').write(html) # open('./demo_compact.html', 'w').write(html)
def main(): def main():
@@ -333,7 +333,7 @@ def main():
# S3 : Push artefacts to S3 bucket # S3 : Push artefacts to S3 bucket
files_to_upload = ["transcript.txt", "transcript_timestamps.txt", files_to_upload = ["transcript.txt", "transcript_timestamps.txt",
"demo_compact.html", "df.pkl", "df.pkl",
"wordcloud.png", "mappings.pkl"] "wordcloud.png", "mappings.pkl"]
upload_files(files_to_upload) upload_files(files_to_upload)