use faster-whisper pipeline

This commit is contained in:
Gokul Mohanarangan
2023-07-24 13:19:24 +05:30
parent d2454b6d2d
commit 02c928a7cf
3 changed files with 142 additions and 21 deletions

30
format_output.py Normal file
View File

@@ -0,0 +1,30 @@
import json
with open("meeting_titles_and_summaries.txt", "r") as f:
outputs = f.read()
outputs = json.loads(outputs)
transcript_file = open("meeting_transcript.txt", "a")
title_description_file = open("meeting_title_description.txt", "a")
for item in outputs["topics"]:
transcript_file.write(item["transcript"])
title_description_file.write("TITLE: \n")
title_description_file.write(item["title"])
title_description_file.write("\n")
title_description_file.write("DESCRIPTION: \n")
title_description_file.write(item["description"])
title_description_file.write("\n")
title_description_file.write("TRANSCRIPT: \n")
title_description_file.write(item["transcript"])
title_description_file.write("\n")
title_description_file.write("---------------------------------------- \n\n")

View File

@@ -1,11 +1,13 @@
import asyncio import asyncio
import datetime import datetime
import os
import io import io
import numpy as np
import json import json
import uuid import uuid
import wave import wave
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from faster_whisper import WhisperModel
import aiohttp_cors import aiohttp_cors
import jax.numpy as jnp import jax.numpy as jnp
import requests import requests
@@ -21,9 +23,9 @@ from sortedcontainers import SortedDict
pcs = set() pcs = set()
relay = MediaRelay() relay = MediaRelay()
data_channel = None data_channel = None
pipeline = FlaxWhisperPipline("openai/whisper-tiny", model = WhisperModel("tiny", device="cpu",
dtype=jnp.float16, compute_type="float32",
batch_size=16) num_workers=12)
CHANNELS = 2 CHANNELS = 2
RATE = 48000 RATE = 48000
@@ -80,6 +82,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
"cmd": "UPDATE_TOPICS", "cmd": "UPDATE_TOPICS",
"topics": incremental_responses, "topics": incremental_responses,
} }
except Exception as e: except Exception as e:
print("Exception" + str(e)) print("Exception" + str(e))
result = None result = None
@@ -113,18 +116,21 @@ def channel_send_transcript(channel):
# Due to exceptions if one of the earlier batches can't return # Due to exceptions if one of the earlier batches can't return
# a transcript, we don't want to be stuck waiting for the result # a transcript, we don't want to be stuck waiting for the result
# With the threshold size of 3, we pop the first(lost) element # With the threshold size of 3, we pop the first(lost) element
elif len(sorted_transcripts) >= 3: else:
del sorted_transcripts[least_time] if len(sorted_transcripts) >= 3:
del sorted_transcripts[least_time]
except Exception as e: except Exception as e:
print("Exception", str(e)) print("Exception", str(e))
pass pass
def get_transcription(frames): def get_transcription(frames):
print(type(frames))
print(type(frames[0]))
print("Transcribing..") print("Transcribing..")
sorted_transcripts[frames[0].time] = None sorted_transcripts[frames[0].time] = None
out_file = io.BytesIO() audiofilename = "test" + str(datetime.datetime.now())
wf = wave.open(out_file, "wb") wf = wave.open(audiofilename, "wb")
wf.setnchannels(CHANNELS) wf.setnchannels(CHANNELS)
wf.setframerate(RATE) wf.setframerate(RATE)
wf.setsampwidth(2) wf.setsampwidth(2)
@@ -133,22 +139,48 @@ def get_transcription(frames):
wf.writeframes(b"".join(frame.to_ndarray())) wf.writeframes(b"".join(frame.to_ndarray()))
wf.close() wf.close()
# To-Do: Look into WhisperTimeStampLogitsProcessor exception result_text = ""
try:
whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
except Exception as e:
return
global transcription_text, last_transcribed_time try:
transcription_text += whisper_result["text"] segments, _ = model.transcribe(audiofilename,
duration = whisper_result["chunks"][0]["timestamp"][1] language="en",
if not duration: beam_size=5,
duration = 5.0 vad_filter=True,
last_transcribed_time += duration vad_parameters=dict(min_silence_duration_ms=500)
)
segments = list(segments)
result_text = ""
duration = 0.0
for segment in segments:
result_text += segment.text
start_time = segment.start
end_time = segment.end
if not segment.start:
start_time = 0.0
if not segment.end:
end_time = 5.5
duration += (end_time - start_time)
global last_transcribed_time
last_transcribed_time += duration
except Exception as e:
print("Exception" + str(e))
pass
#
try:
os.remove(audiofilename)
except Exception as e:
print("Exception :", str(e))
pass
global transcription_text
transcription_text += result_text
result = { result = {
"cmd": "SHOW_TRANSCRIPTION", "cmd": "SHOW_TRANSCRIPTION",
"text": whisper_result["text"] "text": result_text
} }
sorted_transcripts[frames[0].time] = result sorted_transcripts[frames[0].time] = result
return result return result
@@ -167,6 +199,9 @@ def get_final_summary_response():
seconds=round(last_transcribed_time))), seconds=round(last_transcribed_time))),
"summary": final_summary "summary": final_summary
} }
with open("meeting_titles_and_summaries.txt", "a") as f:
f.write(json.dumps(incremental_responses))
return response return response
@@ -196,7 +231,7 @@ class AudioStreamTrack(MediaStreamTrack):
else None else None
) )
if len(transcription_text) > 500: if len(transcription_text) > 750:
llm_input_text = transcription_text llm_input_text = transcription_text
transcription_text = "" transcription_text = ""
llm_result = run_in_executor(get_title_and_summary, llm_result = run_in_executor(get_title_and_summary,

View File

@@ -0,0 +1,56 @@
import requests
import spacy
# This is the URL of text-generation-webui
URL = "http://216.153.52.83:5000/api/v1/generate"
headers = {
"Content-Type": "application/json"
}
def split_text_file(filename, token_count):
nlp = spacy.load('en_core_web_md')
with open(filename, 'r') as file:
text = file.read()
doc = nlp(text)
total_tokens = len(doc)
parts = []
start_index = 0
while start_index < total_tokens:
end_index = start_index + token_count
part_tokens = doc[start_index:end_index-5]
part = ' '.join(token.text for token in part_tokens)
parts.append(part)
start_index = end_index
return parts
final_summary = ""
parts = split_text_file("transcript.txt", 1600)
previous_summary = ""
for part in parts:
prompt = f"""
### Human:
Given the following text, distill the most important information
into a short summary: {part}
### Assistant:
"""
data = {
"prompt": prompt
}
try:
response = requests.post(URL, headers=headers, json=data)
print(response.json())
except Exception as e:
print(str(e))
with open("sum.txt", "w") as sum:
sum.write(" ".join(final_summary))