organize imports

2026-02-04 09:56:47 +00:00 · 2023-07-25 10:02:25 +05:30
parent ab42858ec8
commit 25f34bf9e5
8 changed files with 79 additions and 80 deletions
--- a/init.py
+++ b/init.py
--- a/format_output.py
+++ b/format_output.py
@@ -1,30 +0,0 @@
-import json
-
-with open("meeting_titles_and_summaries.txt", "r") as f:
-    outputs = f.read()
-
-outputs = json.loads(outputs)
-
-transcript_file = open("meeting_transcript.txt", "a")
-title_description_file = open("meeting_title_description.txt", "a")
-
-for item in outputs["topics"]:
-    transcript_file.write(item["transcript"])
-
-    title_description_file.write("TITLE: \n")
-    title_description_file.write(item["title"])
-    title_description_file.write("\n")
-
-    title_description_file.write("DESCRIPTION: \n")
-    title_description_file.write(item["description"])
-    title_description_file.write("\n")
-
-    title_description_file.write("TRANSCRIPT: \n")
-    title_description_file.write(item["transcript"])
-    title_description_file.write("\n")
-
-    title_description_file.write("---------------------------------------- \n\n")
-
-
-
-
--- a/server.py
+++ b/server.py
@@ -1,25 +1,23 @@
 import asyncio
 import datetime
-import os
-import io
-import numpy as np
 import json
+import os
 import uuid
 import wave
 from concurrent.futures import ThreadPoolExecutor
-from faster_whisper import WhisperModel
+
 import aiohttp_cors
-import jax.numpy as jnp
 import requests
 from aiohttp import web
 from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
 from aiortc.contrib.media import MediaRelay
 from av import AudioFifo
+from faster_whisper import WhisperModel
 from loguru import logger
-from whisper_jax import FlaxWhisperPipline
-from utils.run_utils import run_in_executor
 from sortedcontainers import SortedDict

+from utils.run_utils import run_in_executor
+
 pcs = set()
 relay = MediaRelay()
 data_channel = None
@@ -45,7 +43,7 @@ blacklisted_messages = [" Thank you.", " See you next time!",


 def get_title_and_summary(llm_input_text, last_timestamp):
-    print("Generating title and summary")
+    ("Generating title and summary")
    # output = llm.generate(prompt)

    # Use monadical-ml to fire this query to an LLM and get result
@@ -69,7 +67,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
            "prompt": prompt
    }

-    # To-do: Handle unexpected output formats from the model
+    # TODO : Handle unexpected output formats from the model
    try:
        response = requests.post(LLM_URL, headers=headers, json=data)
        output = json.loads(response.json()["results"][0]["text"])
@@ -84,13 +82,13 @@ def get_title_and_summary(llm_input_text, last_timestamp):
        }

    except Exception as e:
-        print("Exception" + str(e))
+        logger.info("Exception" + str(e))
        result = None
    return result


 def channel_log(channel, t, message):
-    print("channel(%s) %s %s" % (channel.label, t, message))
+    logger.info("channel(%s) %s %s" % (channel.label, t, message))


 def channel_send(channel, message):
@@ -120,17 +118,18 @@ def channel_send_transcript(channel):
                if len(sorted_transcripts) >= 3:
                    del sorted_transcripts[least_time]
        except Exception as e:
-            print("Exception", str(e))
+            logger.info("Exception", str(e))
            pass


 def get_transcription(frames):
-    print("Transcribing..")
+    logger.info("Transcribing..")
    sorted_transcripts[frames[0].time] = None

+    # TODO:
    # Passing IO objects instead of temporary files throws an error
    # Passing ndarrays (typecasted with float) does not give any
-    # transcription. Refer issue
+    # transcription. Refer issue,
    # https://github.com/guillaumekln/faster-whisper/issues/369
    audiofilename = "test" + str(datetime.datetime.now())
    wf = wave.open(audiofilename, "wb")
@@ -170,7 +169,7 @@ def get_transcription(frames):
        transcription_text += result_text

    except Exception as e:
-        print("Exception" + str(e))
+        logger.info("Exception" + str(e))
        pass

    result = {
@@ -195,7 +194,7 @@ def get_final_summary_response():
            "summary": final_summary
    }

-    with open("meeting_titles_and_summaries.txt", "a") as f:
+    with open("./artefacts/meeting_titles_and_summaries.txt", "a") as f:
        f.write(json.dumps(incremental_responses))
    return response

@@ -275,7 +274,6 @@ async def offer(request):
            if isinstance(message, str) and message.startswith("ping"):
                channel_send(channel, "pong" + message[4:])

-
    @pc.on("connectionstatechange")
    async def on_connectionstatechange():
        log_info("Connection state is " + pc.connectionState)
--- a/utils/format_output.py
+++ b/utils/format_output.py
@@ -0,0 +1,32 @@
+import json
+
+with open("../artefacts/meeting_titles_and_summaries.txt", "r") as f:
+    outputs = f.read()
+
+outputs = json.loads(outputs)
+
+transcript_file = open("../artefacts/meeting_transcript.txt", "a")
+title_desc_file = open("../artefacts/meeting_title_description.txt", "a")
+summary_file = open("../artefacts/meeting_summary.txt", "a")
+
+for item in outputs["topics"]:
+    transcript_file.write(item["transcript"])
+    summary_file.write(item["description"])
+
+    title_desc_file.write("TITLE: \n")
+    title_desc_file.write(item["title"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("DESCRIPTION: \n")
+    title_desc_file.write(item["description"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("TRANSCRIPT: \n")
+    title_desc_file.write(item["transcript"])
+    title_desc_file.write("\n")
+
+    title_desc_file.write("---------------------------------------- \n\n")
+
+transcript_file.close()
+title_desc_file.close()
+summary_file.close()
--- a/utils/text_utilities.py
+++ b/utils/text_utilities.py
@@ -6,8 +6,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from transformers import BartForConditionalGeneration, BartTokenizer

-from utils.log_utils import logger
-from utils.run_utils import config
+from log_utils import logger
+from run_utils import config

 nltk.download('punkt', quiet=True)

--- a/utils/viz_utilities.py
+++ b/utils/viz_utilities.py
@@ -57,12 +57,12 @@ def create_wordcloud(timestamp, real_time=False):

 def create_talk_diff_scatter_viz(timestamp, real_time=False):
    """
-    Perform agenda vs transription diff to see covered topics.
+    Perform agenda vs transcription diff to see covered topics.
    Create a scatter plot of words in topics.
    :return: None. Saved locally.
    """
-    spaCy_model = "en_core_web_md"
-    nlp = spacy.load(spaCy_model)
+    spacy_model = "en_core_web_md"
+    nlp = spacy.load(spacy_model)
    nlp.add_pipe('sentencizer')

    agenda_topics = []
@@ -75,7 +75,6 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
                agenda_topics.append(line.split(":")[0])

    # Load the transcription with timestamp
-    filename = ""
    if real_time:
        filename = "./artefacts/real_time_transcript_with_timestamp_" + \
                   timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -142,7 +141,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):

    df = df.apply(create_new_columns, axis=1)

-    # Count the number of items covered and calculatre the percentage
+    # Count the number of items covered and calculate the percentage
    num_covered_items = sum(covered_items.values())
    percentage_covered = num_covered_items / len(agenda) * 100