add start end tags and stats

This commit is contained in:
gokul
2023-06-23 19:58:41 +05:30
parent 2fe02bb70b
commit 1cd2c20a7b
2 changed files with 15 additions and 5 deletions

View File

@@ -47,4 +47,5 @@ scattertext
pandas
jupyter
seaborn
matplotlib
matplotlib
termcolor

View File

@@ -12,6 +12,9 @@ from viz_utilities import create_wordcloud, create_talk_diff_scatter_viz
from text_utilities import summarize, post_process_transcription
from loguru import logger
import nltk
import time
from termcolor import colored
nltk.download('stopwords', quiet=True)
config = configparser.ConfigParser()
@@ -68,9 +71,11 @@ def main():
try:
while proceed:
frames = []
start_time = time.time()
for i in range(0, int(RATE / FRAMES_PER_BUFFER * RECORD_SECONDS)):
data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
frames.append(data)
end_time = time.time()
wf = wave.open(TEMP_AUDIO_FILE, 'wb')
wf.setnchannels(CHANNELS)
@@ -80,8 +85,6 @@ def main():
wf.close()
whisper_result = pipeline(TEMP_AUDIO_FILE, return_timestamps=True)
print(whisper_result['text'])
timestamp = whisper_result["chunks"][0]["timestamp"]
start = timestamp[0]
end = timestamp[1]
@@ -89,12 +92,18 @@ def main():
end = start + 15.0
duration = end - start
item = {'timestamp': (last_transcribed_time, last_transcribed_time + duration),
'text': whisper_result['text']}
'text': whisper_result['text'],
'stats': (str(end_time - start_time), str(duration))
}
last_transcribed_time = last_transcribed_time + duration
transcript_with_timestamp["chunks"].append(item)
transcription += whisper_result['text']
print(colored("<START>", "yellow"))
print(colored(whisper_result['text'], 'green'))
print(colored("<END> Recorded duration: " + str(end_time - start_time) + " | Transcribed duration: " +
str(duration), "yellow"))
except Exception as e:
print(e)
finally: