organize imports

This commit is contained in:
Gokul Mohanarangan
2023-07-25 10:02:25 +05:30
parent ab42858ec8
commit 25f34bf9e5
8 changed files with 79 additions and 80 deletions

0
__init__.py Normal file
View File

View File

@@ -1,30 +0,0 @@
import json
with open("meeting_titles_and_summaries.txt", "r") as f:
outputs = f.read()
outputs = json.loads(outputs)
transcript_file = open("meeting_transcript.txt", "a")
title_description_file = open("meeting_title_description.txt", "a")
for item in outputs["topics"]:
transcript_file.write(item["transcript"])
title_description_file.write("TITLE: \n")
title_description_file.write(item["title"])
title_description_file.write("\n")
title_description_file.write("DESCRIPTION: \n")
title_description_file.write(item["description"])
title_description_file.write("\n")
title_description_file.write("TRANSCRIPT: \n")
title_description_file.write(item["transcript"])
title_description_file.write("\n")
title_description_file.write("---------------------------------------- \n\n")

View File

@@ -1,25 +1,23 @@
import asyncio import asyncio
import datetime import datetime
import os
import io
import numpy as np
import json import json
import os
import uuid import uuid
import wave import wave
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from faster_whisper import WhisperModel
import aiohttp_cors import aiohttp_cors
import jax.numpy as jnp
import requests import requests
from aiohttp import web from aiohttp import web
from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription from aiortc import MediaStreamTrack, RTCPeerConnection, RTCSessionDescription
from aiortc.contrib.media import MediaRelay from aiortc.contrib.media import MediaRelay
from av import AudioFifo from av import AudioFifo
from faster_whisper import WhisperModel
from loguru import logger from loguru import logger
from whisper_jax import FlaxWhisperPipline
from utils.run_utils import run_in_executor
from sortedcontainers import SortedDict from sortedcontainers import SortedDict
from utils.run_utils import run_in_executor
pcs = set() pcs = set()
relay = MediaRelay() relay = MediaRelay()
data_channel = None data_channel = None
@@ -45,7 +43,7 @@ blacklisted_messages = [" Thank you.", " See you next time!",
def get_title_and_summary(llm_input_text, last_timestamp): def get_title_and_summary(llm_input_text, last_timestamp):
print("Generating title and summary") ("Generating title and summary")
# output = llm.generate(prompt) # output = llm.generate(prompt)
# Use monadical-ml to fire this query to an LLM and get result # Use monadical-ml to fire this query to an LLM and get result
@@ -69,7 +67,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
"prompt": prompt "prompt": prompt
} }
# To-do: Handle unexpected output formats from the model # TODO : Handle unexpected output formats from the model
try: try:
response = requests.post(LLM_URL, headers=headers, json=data) response = requests.post(LLM_URL, headers=headers, json=data)
output = json.loads(response.json()["results"][0]["text"]) output = json.loads(response.json()["results"][0]["text"])
@@ -84,13 +82,13 @@ def get_title_and_summary(llm_input_text, last_timestamp):
} }
except Exception as e: except Exception as e:
print("Exception" + str(e)) logger.info("Exception" + str(e))
result = None result = None
return result return result
def channel_log(channel, t, message): def channel_log(channel, t, message):
print("channel(%s) %s %s" % (channel.label, t, message)) logger.info("channel(%s) %s %s" % (channel.label, t, message))
def channel_send(channel, message): def channel_send(channel, message):
@@ -120,17 +118,18 @@ def channel_send_transcript(channel):
if len(sorted_transcripts) >= 3: if len(sorted_transcripts) >= 3:
del sorted_transcripts[least_time] del sorted_transcripts[least_time]
except Exception as e: except Exception as e:
print("Exception", str(e)) logger.info("Exception", str(e))
pass pass
def get_transcription(frames): def get_transcription(frames):
print("Transcribing..") logger.info("Transcribing..")
sorted_transcripts[frames[0].time] = None sorted_transcripts[frames[0].time] = None
# TODO:
# Passing IO objects instead of temporary files throws an error # Passing IO objects instead of temporary files throws an error
# Passing ndarrays (typecasted with float) does not give any # Passing ndarrays (typecasted with float) does not give any
# transcription. Refer issue # transcription. Refer issue,
# https://github.com/guillaumekln/faster-whisper/issues/369 # https://github.com/guillaumekln/faster-whisper/issues/369
audiofilename = "test" + str(datetime.datetime.now()) audiofilename = "test" + str(datetime.datetime.now())
wf = wave.open(audiofilename, "wb") wf = wave.open(audiofilename, "wb")
@@ -170,7 +169,7 @@ def get_transcription(frames):
transcription_text += result_text transcription_text += result_text
except Exception as e: except Exception as e:
print("Exception" + str(e)) logger.info("Exception" + str(e))
pass pass
result = { result = {
@@ -195,7 +194,7 @@ def get_final_summary_response():
"summary": final_summary "summary": final_summary
} }
with open("meeting_titles_and_summaries.txt", "a") as f: with open("./artefacts/meeting_titles_and_summaries.txt", "a") as f:
f.write(json.dumps(incremental_responses)) f.write(json.dumps(incremental_responses))
return response return response
@@ -275,7 +274,6 @@ async def offer(request):
if isinstance(message, str) and message.startswith("ping"): if isinstance(message, str) and message.startswith("ping"):
channel_send(channel, "pong" + message[4:]) channel_send(channel, "pong" + message[4:])
@pc.on("connectionstatechange") @pc.on("connectionstatechange")
async def on_connectionstatechange(): async def on_connectionstatechange():
log_info("Connection state is " + pc.connectionState) log_info("Connection state is " + pc.connectionState)

32
utils/format_output.py Normal file
View File

@@ -0,0 +1,32 @@
import json
with open("../artefacts/meeting_titles_and_summaries.txt", "r") as f:
outputs = f.read()
outputs = json.loads(outputs)
transcript_file = open("../artefacts/meeting_transcript.txt", "a")
title_desc_file = open("../artefacts/meeting_title_description.txt", "a")
summary_file = open("../artefacts/meeting_summary.txt", "a")
for item in outputs["topics"]:
transcript_file.write(item["transcript"])
summary_file.write(item["description"])
title_desc_file.write("TITLE: \n")
title_desc_file.write(item["title"])
title_desc_file.write("\n")
title_desc_file.write("DESCRIPTION: \n")
title_desc_file.write(item["description"])
title_desc_file.write("\n")
title_desc_file.write("TRANSCRIPT: \n")
title_desc_file.write(item["transcript"])
title_desc_file.write("\n")
title_desc_file.write("---------------------------------------- \n\n")
transcript_file.close()
title_desc_file.close()
summary_file.close()

View File

@@ -6,8 +6,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
from transformers import BartForConditionalGeneration, BartTokenizer from transformers import BartForConditionalGeneration, BartTokenizer
from utils.log_utils import logger from log_utils import logger
from utils.run_utils import config from run_utils import config
nltk.download('punkt', quiet=True) nltk.download('punkt', quiet=True)

View File

@@ -57,12 +57,12 @@ def create_wordcloud(timestamp, real_time=False):
def create_talk_diff_scatter_viz(timestamp, real_time=False): def create_talk_diff_scatter_viz(timestamp, real_time=False):
""" """
Perform agenda vs transription diff to see covered topics. Perform agenda vs transcription diff to see covered topics.
Create a scatter plot of words in topics. Create a scatter plot of words in topics.
:return: None. Saved locally. :return: None. Saved locally.
""" """
spaCy_model = "en_core_web_md" spacy_model = "en_core_web_md"
nlp = spacy.load(spaCy_model) nlp = spacy.load(spacy_model)
nlp.add_pipe('sentencizer') nlp.add_pipe('sentencizer')
agenda_topics = [] agenda_topics = []
@@ -75,7 +75,6 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
agenda_topics.append(line.split(":")[0]) agenda_topics.append(line.split(":")[0])
# Load the transcription with timestamp # Load the transcription with timestamp
filename = ""
if real_time: if real_time:
filename = "./artefacts/real_time_transcript_with_timestamp_" + \ filename = "./artefacts/real_time_transcript_with_timestamp_" + \
timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt" timestamp.strftime("%m-%d-%Y_%H:%M:%S") + ".txt"
@@ -142,7 +141,7 @@ def create_talk_diff_scatter_viz(timestamp, real_time=False):
df = df.apply(create_new_columns, axis=1) df = df.apply(create_new_columns, axis=1)
# Count the number of items covered and calculatre the percentage # Count the number of items covered and calculate the percentage
num_covered_items = sum(covered_items.values()) num_covered_items = sum(covered_items.values())
percentage_covered = num_covered_items / len(agenda) * 100 percentage_covered = num_covered_items / len(agenda) * 100