From 1cd2c20a7be141d2cd1d518178de53e70246d8b5 Mon Sep 17 00:00:00 2001
From: gokul <gokul@monadical.com>
Date: Fri, 23 Jun 2023 19:58:41 +0530
Subject: [PATCH] add start end tags and stats

---
 requirements.txt    |  3 ++-
 whisjax_realtime.py | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 2e918b28..490ee05d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -47,4 +47,5 @@ scattertext
 pandas
 jupyter
 seaborn
-matplotlib
\ No newline at end of file
+matplotlib
+termcolor
\ No newline at end of file
diff --git a/whisjax_realtime.py b/whisjax_realtime.py
index b0503b17..6bbe0f19 100644
--- a/whisjax_realtime.py
+++ b/whisjax_realtime.py
@@ -12,6 +12,9 @@ from viz_utilities import create_wordcloud, create_talk_diff_scatter_viz
 from text_utilities import summarize, post_process_transcription
 from loguru import logger
 import nltk
+import time
+from termcolor import colored
+
 nltk.download('stopwords', quiet=True)
 
 config = configparser.ConfigParser()
@@ -68,9 +71,11 @@ def main():
     try:
         while proceed:
             frames = []
+            start_time = time.time()
             for i in range(0, int(RATE / FRAMES_PER_BUFFER * RECORD_SECONDS)):
                 data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
                 frames.append(data)
+            end_time = time.time()
 
             wf = wave.open(TEMP_AUDIO_FILE, 'wb')
             wf.setnchannels(CHANNELS)
@@ -80,8 +85,6 @@ def main():
             wf.close()
 
             whisper_result = pipeline(TEMP_AUDIO_FILE, return_timestamps=True)
-            print(whisper_result['text'])
-
             timestamp = whisper_result["chunks"][0]["timestamp"]
             start = timestamp[0]
             end = timestamp[1]
@@ -89,12 +92,18 @@ def main():
                 end = start + 15.0
             duration = end - start
             item = {'timestamp': (last_transcribed_time, last_transcribed_time + duration),
-                    'text': whisper_result['text']}
+                    'text': whisper_result['text'],
+                    'stats': (str(end_time - start_time), str(duration))
+                    }
             last_transcribed_time = last_transcribed_time + duration
             transcript_with_timestamp["chunks"].append(item)
-
             transcription += whisper_result['text']
 
+            print(colored("<START>", "yellow"))
+            print(colored(whisper_result['text'], 'green'))
+            print(colored("<END> Recorded duration: " + str(end_time - start_time) + " | Transcribed duration: " +
+                          str(duration), "yellow"))
+
     except Exception as e:
         print(e)
     finally: