From 02c928a7cfd6ab155539b536fae8b1b49af857b7 Mon Sep 17 00:00:00 2001 From: Gokul Mohanarangan Date: Mon, 24 Jul 2023 13:19:24 +0530 Subject: [PATCH] use faster-whisper pipeline --- format_output.py | 30 ++++++++++ server_executor_cleaned.py => server.py | 77 ++++++++++++++++++------- trials/api.py | 56 ++++++++++++++++++ 3 files changed, 142 insertions(+), 21 deletions(-) create mode 100644 format_output.py rename server_executor_cleaned.py => server.py (82%) diff --git a/format_output.py b/format_output.py new file mode 100644 index 00000000..6cc3006c --- /dev/null +++ b/format_output.py @@ -0,0 +1,30 @@ +import json + +with open("meeting_titles_and_summaries.txt", "r") as f: + outputs = f.read() + +outputs = json.loads(outputs) + +transcript_file = open("meeting_transcript.txt", "a") +title_description_file = open("meeting_title_description.txt", "a") + +for item in outputs["topics"]: + transcript_file.write(item["transcript"]) + + title_description_file.write("TITLE: \n") + title_description_file.write(item["title"]) + title_description_file.write("\n") + + title_description_file.write("DESCRIPTION: \n") + title_description_file.write(item["description"]) + title_description_file.write("\n") + + title_description_file.write("TRANSCRIPT: \n") + title_description_file.write(item["transcript"]) + title_description_file.write("\n") + + title_description_file.write("---------------------------------------- \n\n") + + + + diff --git a/server_executor_cleaned.py b/server.py similarity index 82% rename from server_executor_cleaned.py rename to server.py index 2d8f3747..52f98136 100644 --- a/server_executor_cleaned.py +++ b/server.py @@ -1,11 +1,13 @@ import asyncio import datetime +import os import io +import numpy as np import json import uuid import wave from concurrent.futures import ThreadPoolExecutor - +from faster_whisper import WhisperModel import aiohttp_cors import jax.numpy as jnp import requests @@ -21,9 +23,9 @@ from sortedcontainers import SortedDict pcs = set() relay = MediaRelay() data_channel = None -pipeline = FlaxWhisperPipline("openai/whisper-tiny", - dtype=jnp.float16, - batch_size=16) +model = WhisperModel("tiny", device="cpu", + compute_type="float32", + num_workers=12) CHANNELS = 2 RATE = 48000 @@ -80,6 +82,7 @@ def get_title_and_summary(llm_input_text, last_timestamp): "cmd": "UPDATE_TOPICS", "topics": incremental_responses, } + except Exception as e: print("Exception" + str(e)) result = None @@ -113,18 +116,21 @@ def channel_send_transcript(channel): # Due to exceptions if one of the earlier batches can't return # a transcript, we don't want to be stuck waiting for the result # With the threshold size of 3, we pop the first(lost) element - elif len(sorted_transcripts) >= 3: - del sorted_transcripts[least_time] + else: + if len(sorted_transcripts) >= 3: + del sorted_transcripts[least_time] except Exception as e: print("Exception", str(e)) pass def get_transcription(frames): + print(type(frames)) + print(type(frames[0])) print("Transcribing..") sorted_transcripts[frames[0].time] = None - out_file = io.BytesIO() - wf = wave.open(out_file, "wb") + audiofilename = "test" + str(datetime.datetime.now()) + wf = wave.open(audiofilename, "wb") wf.setnchannels(CHANNELS) wf.setframerate(RATE) wf.setsampwidth(2) @@ -133,22 +139,48 @@ def get_transcription(frames): wf.writeframes(b"".join(frame.to_ndarray())) wf.close() - # To-Do: Look into WhisperTimeStampLogitsProcessor exception - try: - whisper_result = pipeline(out_file.getvalue(), return_timestamps=True) - except Exception as e: - return + result_text = "" - global transcription_text, last_transcribed_time - transcription_text += whisper_result["text"] - duration = whisper_result["chunks"][0]["timestamp"][1] - if not duration: - duration = 5.0 - last_transcribed_time += duration + try: + segments, _ = model.transcribe(audiofilename, + language="en", + beam_size=5, + vad_filter=True, + vad_parameters=dict(min_silence_duration_ms=500) + ) + segments = list(segments) + result_text = "" + duration = 0.0 + for segment in segments: + result_text += segment.text + start_time = segment.start + end_time = segment.end + if not segment.start: + start_time = 0.0 + if not segment.end: + end_time = 5.5 + duration += (end_time - start_time) + + global last_transcribed_time + last_transcribed_time += duration + + except Exception as e: + print("Exception" + str(e)) + pass + + # + try: + os.remove(audiofilename) + except Exception as e: + print("Exception :", str(e)) + pass + + global transcription_text + transcription_text += result_text result = { "cmd": "SHOW_TRANSCRIPTION", - "text": whisper_result["text"] + "text": result_text } sorted_transcripts[frames[0].time] = result return result @@ -167,6 +199,9 @@ def get_final_summary_response(): seconds=round(last_transcribed_time))), "summary": final_summary } + + with open("meeting_titles_and_summaries.txt", "a") as f: + f.write(json.dumps(incremental_responses)) return response @@ -196,7 +231,7 @@ class AudioStreamTrack(MediaStreamTrack): else None ) - if len(transcription_text) > 500: + if len(transcription_text) > 750: llm_input_text = transcription_text transcription_text = "" llm_result = run_in_executor(get_title_and_summary, diff --git a/trials/api.py b/trials/api.py index e69de29b..5e25f4d1 100644 --- a/trials/api.py +++ b/trials/api.py @@ -0,0 +1,56 @@ +import requests +import spacy + +# This is the URL of text-generation-webui +URL = "http://216.153.52.83:5000/api/v1/generate" + +headers = { + "Content-Type": "application/json" +} + + +def split_text_file(filename, token_count): + nlp = spacy.load('en_core_web_md') + + with open(filename, 'r') as file: + text = file.read() + + doc = nlp(text) + total_tokens = len(doc) + + parts = [] + start_index = 0 + + while start_index < total_tokens: + end_index = start_index + token_count + part_tokens = doc[start_index:end_index-5] + part = ' '.join(token.text for token in part_tokens) + parts.append(part) + start_index = end_index + + return parts + + +final_summary = "" +parts = split_text_file("transcript.txt", 1600) +previous_summary = "" + +for part in parts: + prompt = f""" + ### Human: + Given the following text, distill the most important information + into a short summary: {part} + + ### Assistant: + """ + data = { + "prompt": prompt + } + try: + response = requests.post(URL, headers=headers, json=data) + print(response.json()) + except Exception as e: + print(str(e)) + +with open("sum.txt", "w") as sum: + sum.write(" ".join(final_summary)) \ No newline at end of file