organize imports

2025-12-21 20:59:05 +00:00 · 2023-07-25 10:02:25 +05:30
parent ab42858ec8
commit 25f34bf9e5
8 changed files with 79 additions and 80 deletions
--- a/utils/config.ini
+++ b/utils/config.ini
@@ -1,24 +1,24 @@
 [DEFAULT]
 #SetexceptionruleforOpenMPerrortoallowduplicatelibinitialization
-KMP_DUPLICATE_LIB_OK=TRUE
+KMP_DUPLICATE_LIB_OK = TRUE
 #ExportOpenAIAPIKey
-OPENAI_APIKEY=
+OPENAI_APIKEY =
 #ExportWhisperModelSize
-WHISPER_MODEL_SIZE=tiny
-WHISPER_REAL_TIME_MODEL_SIZE=tiny
+WHISPER_MODEL_SIZE = tiny
+WHISPER_REAL_TIME_MODEL_SIZE = tiny
 #AWSconfig
-AWS_ACCESS_KEY=***REMOVED***
-AWS_SECRET_KEY=***REMOVED***
-BUCKET_NAME=reflector-bucket
+AWS_ACCESS_KEY = ***REMOVED***
+AWS_SECRET_KEY = ***REMOVED***
+BUCKET_NAME = reflector-bucket
 #Summarizerconfig
-SUMMARY_MODEL=facebook/bart-large-cnn
-INPUT_ENCODING_MAX_LENGTH=1024
-MAX_LENGTH=2048
-BEAM_SIZE=6
-MAX_CHUNK_LENGTH=1024
-SUMMARIZE_USING_CHUNKS=YES
+SUMMARY_MODEL = facebook/bart-large-cnn
+INPUT_ENCODING_MAX_LENGTH = 1024
+MAX_LENGTH = 2048
+BEAM_SIZE = 6
+MAX_CHUNK_LENGTH = 1024
+SUMMARIZE_USING_CHUNKS = YES
 #Audiodevice
-BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME=aggregator
-AV_FOUNDATION_DEVICE_ID=1
+BLACKHOLE_INPUT_AGGREGATOR_DEVICE_NAME = aggregator
+AV_FOUNDATION_DEVICE_ID = 1
 # LLM PATH
-LLM_PATH=
+LLM_PATH =
--- a/utils/format_output.py
+++ b/utils/format_output.py
@@ -0,0 +1,32 @@
+import json
+
+with open("../artefacts/meeting_titles_and_summaries.txt", "r") as f:
+    outputs = f.read()
+
+outputs = json.loads(outputs)
+
+transcript_file = open("../artefacts/meeting_transcript.txt", "a")
+title_desc_file = open("../artefacts/meeting_title_description.txt", "a")
+summary_file = open("../artefacts/meeting_summary.txt", "a")
+
+for item in outputs["topics"]:
+    transcript_file.write(item["transcript"])
+    summary_file.write(item["description"])
+
+    title_desc_file.write("TITLE: \n")
+    title_desc_file.write(item["title"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("DESCRIPTION: \n")
+    title_desc_file.write(item["description"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("TRANSCRIPT: \n")
+    title_desc_file.write(item["transcript"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("---------------------------------------- \n\n")
+
+transcript_file.close()
+title_desc_file.close()
+summary_file.close()
--- a/utils/text_utilities.py
+++ b/utils/text_utilities.py
@@ -6,8 +6,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from transformers import BartForConditionalGeneration, BartTokenizer

-from utils.log_utils import logger
-from utils.run_utils import config
+from log_utils import logger
+from run_utils import config

 nltk.download('punkt', quiet=True)

@@ -171,7 +171,7 @@ def summarize(transcript_text, timestamp,
        output_filename = "real_time_" + output_filename

    if summarize_using_chunks != "YES":
-        inputs = tokenizer.\
+        inputs = tokenizer. \
            batch_encode_plus([transcript_text], truncation=True,
                              padding='longest',
                              max_length=int(config["DEFAULT"]["INPUT_ENCODING_MAX_LENGTH"]),
--- a/utils/viz_utilities.py
+++ b/utils/viz_utilities.py
@@ -13,7 +13,7 @@ from wordcloud import STOPWORDS, WordCloud
 en = spacy.load('en_core_web_md')
 spacy_stopwords = en.Defaults.stop_words

-STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))).\
+STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))). \
    union(set(spacy_stopwords))


@@ -24,7 +24,7 @@ def create_wordcloud(timestamp, real_time=False):
    """
    filename = "transcript"
    if real_time:
-        filename = "real_time_" + filename + "_" +\
+        filename = "real_time_" + filename + "_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    else:
        filename += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -47,7 +47,7 @@ def create_wordcloud(timestamp, real_time=False):

    wordcloud_name = "wordcloud"
    if real_time:
-        wordcloud_name = "real_time_" + wordcloud_name + "_" +\
+        wordcloud_name = "real_time_" + wordcloud_name + "_" + \
                         timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
    else:
        wordcloud_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".png"
@@ -57,12 +57,12 @@ def create_wordcloud(timestamp, real_time=False):

 def create_talk_diff_scatter_viz(timestamp, real_time=False):
    """
-    Perform agenda vs transription diff to see covered topics.
+    Perform agenda vs transcription diff to see covered topics.
    Create a scatter plot of words in topics.
    :return: None. Saved locally.
    """
-    spaCy_model = "en_core_web_md"
-    nlp = spacy.load(spaCy_model)
+    spacy_model = "en_core_web_md"
+    nlp = spacy.load(spacy_model)
    nlp.add_pipe('sentencizer')

    agenda_topics = []
@@ -75,12 +75,11 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
                agenda_topics.append(line.split(":")[0])

    # Load the transcription with timestamp
-    filename = ""
    if real_time:
-        filename = "./artefacts/real_time_transcript_with_timestamp_" +\
+        filename = "./artefacts/real_time_transcript_with_timestamp_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    else:
-        filename = "./artefacts/transcript_with_timestamp_" +\
+        filename = "./artefacts/transcript_with_timestamp_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
    with open(filename) as f:
        transcription_timestamp_text = f.read()
@@ -142,7 +141,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):

    df = df.apply(create_new_columns, axis=1)

-    # Count the number of items covered and calculatre the percentage
+    # Count the number of items covered and calculate the percentage
    num_covered_items = sum(covered_items.values())
    percentage_covered = num_covered_items / len(agenda) * 100

@@ -158,7 +157,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
    # Save df, mappings for further experimentation
    df_name = "df"
    if real_time:
-        df_name = "real_time_" + df_name + "_" +\
+        df_name = "real_time_" + df_name + "_" + \
                  timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
    else:
        df_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
@@ -169,7 +168,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):

    mappings_name = "mappings"
    if real_time:
-        mappings_name = "real_time_" + mappings_name + "_" +\
+        mappings_name = "real_time_" + mappings_name + "_" + \
                        timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"
    else:
        mappings_name += "_" + timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".pkl"