From 02c928a7cfd6ab155539b536fae8b1b49af857b7 Mon Sep 17 00:00:00 2001
From: Gokul Mohanarangan <gokul@monadical.com>
Date: Mon, 24 Jul 2023 13:19:24 +0530
Subject: [PATCH] use faster-whisper pipeline

---
 format_output.py                        | 30 ++++++++++
 server_executor_cleaned.py => server.py | 77 ++++++++++++++++++-------
 trials/api.py                           | 56 ++++++++++++++++++
 3 files changed, 142 insertions(+), 21 deletions(-)
 create mode 100644 format_output.py
 rename server_executor_cleaned.py => server.py (82%)

diff --git a/format_output.py b/format_output.py
new file mode 100644
index 00000000..6cc3006c
--- /dev/null
+++ b/format_output.py
@@ -0,0 +1,30 @@
+import json
+
+with open("meeting_titles_and_summaries.txt", "r") as f:
+    outputs = f.read()
+
+outputs = json.loads(outputs)
+
+transcript_file = open("meeting_transcript.txt", "a")
+title_description_file = open("meeting_title_description.txt", "a")
+
+for item in outputs["topics"]:
+    transcript_file.write(item["transcript"])
+
+    title_description_file.write("TITLE: \n")
+    title_description_file.write(item["title"])
+    title_description_file.write("\n")
+
+    title_description_file.write("DESCRIPTION: \n")
+    title_description_file.write(item["description"])
+    title_description_file.write("\n")
+
+    title_description_file.write("TRANSCRIPT: \n")
+    title_description_file.write(item["transcript"])
+    title_description_file.write("\n")
+
+    title_description_file.write("---------------------------------------- \n\n")
+
+
+
+
diff --git a/server_executor_cleaned.py b/server.py
similarity index 82%
rename from server_executor_cleaned.py
rename to server.py
index 2d8f3747..52f98136 100644
--- a/server_executor_cleaned.py
+++ b/server.py
@@ -1,11 +1,13 @@
 import asyncio
 import datetime
+import os
 import io
+import numpy as np
 import json
 import uuid
 import wave
 from concurrent.futures import ThreadPoolExecutor
-
+from faster_whisper import WhisperModel
 import aiohttp_cors
 import jax.numpy as jnp
 import requests
@@ -21,9 +23,9 @@ from sortedcontainers import SortedDict
 pcs = set()
 relay = MediaRelay()
 data_channel = None
-pipeline = FlaxWhisperPipline("openai/whisper-tiny",
-                              dtype=jnp.float16,
-                              batch_size=16)
+model = WhisperModel("tiny", device="cpu",
+                     compute_type="float32",
+                     num_workers=12)
 
 CHANNELS = 2
 RATE = 48000
@@ -80,6 +82,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
                 "cmd": "UPDATE_TOPICS",
                 "topics": incremental_responses,
         }
+
     except Exception as e:
         print("Exception" + str(e))
         result = None
@@ -113,18 +116,21 @@ def channel_send_transcript(channel):
             # Due to exceptions if one of the earlier batches can't return
             # a transcript, we don't want to be stuck waiting for the result
             # With the threshold size of 3, we pop the first(lost) element
-            elif len(sorted_transcripts) >= 3:
-                del sorted_transcripts[least_time]
+            else:
+                if len(sorted_transcripts) >= 3:
+                    del sorted_transcripts[least_time]
         except Exception as e:
             print("Exception", str(e))
             pass
 
 
 def get_transcription(frames):
+    print(type(frames))
+    print(type(frames[0]))
     print("Transcribing..")
     sorted_transcripts[frames[0].time] = None
-    out_file = io.BytesIO()
-    wf = wave.open(out_file, "wb")
+    audiofilename = "test" + str(datetime.datetime.now())
+    wf = wave.open(audiofilename, "wb")
     wf.setnchannels(CHANNELS)
     wf.setframerate(RATE)
     wf.setsampwidth(2)
@@ -133,22 +139,48 @@ def get_transcription(frames):
         wf.writeframes(b"".join(frame.to_ndarray()))
     wf.close()
 
-    # To-Do: Look into WhisperTimeStampLogitsProcessor exception
-    try:
-        whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
-    except Exception as e:
-        return
+    result_text = ""
 
-    global transcription_text, last_transcribed_time
-    transcription_text += whisper_result["text"]
-    duration = whisper_result["chunks"][0]["timestamp"][1]
-    if not duration:
-        duration = 5.0
-    last_transcribed_time += duration
+    try:
+        segments, _ = model.transcribe(audiofilename,
+                                       language="en",
+                                       beam_size=5,
+                                       vad_filter=True,
+                                       vad_parameters=dict(min_silence_duration_ms=500)
+                                       )
+        segments = list(segments)
+        result_text = ""
+        duration = 0.0
+        for segment in segments:
+            result_text += segment.text
+            start_time = segment.start
+            end_time = segment.end
+            if not segment.start:
+                start_time = 0.0
+            if not segment.end:
+                end_time = 5.5
+            duration += (end_time - start_time)
+
+        global last_transcribed_time
+        last_transcribed_time += duration
+
+    except Exception as e:
+        print("Exception" + str(e))
+        pass
+
+    #
+    try:
+        os.remove(audiofilename)
+    except Exception as e:
+        print("Exception :", str(e))
+        pass
+
+    global transcription_text
+    transcription_text += result_text
 
     result = {
             "cmd": "SHOW_TRANSCRIPTION",
-            "text": whisper_result["text"]
+            "text": result_text
     }
     sorted_transcripts[frames[0].time] = result
     return result
@@ -167,6 +199,9 @@ def get_final_summary_response():
                     seconds=round(last_transcribed_time))),
             "summary": final_summary
     }
+
+    with open("meeting_titles_and_summaries.txt", "a") as f:
+        f.write(json.dumps(incremental_responses))
     return response
 
 
@@ -196,7 +231,7 @@ class AudioStreamTrack(MediaStreamTrack):
                     else None
             )
 
-        if len(transcription_text) > 500:
+        if len(transcription_text) > 750:
             llm_input_text = transcription_text
             transcription_text = ""
             llm_result = run_in_executor(get_title_and_summary,
diff --git a/trials/api.py b/trials/api.py
index e69de29b..5e25f4d1 100644
--- a/trials/api.py
+++ b/trials/api.py
@@ -0,0 +1,56 @@
+import requests
+import spacy
+
+# This is the URL of text-generation-webui
+URL = "http://216.153.52.83:5000/api/v1/generate"
+
+headers = {
+    "Content-Type": "application/json"
+}
+
+
+def split_text_file(filename, token_count):
+    nlp = spacy.load('en_core_web_md')
+
+    with open(filename, 'r') as file:
+        text = file.read()
+
+    doc = nlp(text)
+    total_tokens = len(doc)
+
+    parts = []
+    start_index = 0
+
+    while start_index < total_tokens:
+        end_index = start_index + token_count
+        part_tokens = doc[start_index:end_index-5]
+        part = ' '.join(token.text for token in part_tokens)
+        parts.append(part)
+        start_index = end_index
+
+    return parts
+
+
+final_summary = ""
+parts = split_text_file("transcript.txt", 1600)
+previous_summary = ""
+
+for part in parts:
+    prompt = f"""
+              ### Human:
+             Given the following text, distill the most important information 
+             into a short summary:  {part}
+
+              ### Assistant:
+              """
+    data = {
+            "prompt": prompt
+    }
+    try:
+        response = requests.post(URL, headers=headers, json=data)
+        print(response.json())
+    except Exception as e:
+        print(str(e))
+
+with open("sum.txt", "w") as sum:
+    sum.write(" ".join(final_summary))
\ No newline at end of file