mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
use faster-whisper pipeline
This commit is contained in:
30
format_output.py
Normal file
30
format_output.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import json
|
||||
|
||||
with open("meeting_titles_and_summaries.txt", "r") as f:
|
||||
outputs = f.read()
|
||||
|
||||
outputs = json.loads(outputs)
|
||||
|
||||
transcript_file = open("meeting_transcript.txt", "a")
|
||||
title_description_file = open("meeting_title_description.txt", "a")
|
||||
|
||||
for item in outputs["topics"]:
|
||||
transcript_file.write(item["transcript"])
|
||||
|
||||
title_description_file.write("TITLE: \n")
|
||||
title_description_file.write(item["title"])
|
||||
title_description_file.write("\n")
|
||||
|
||||
title_description_file.write("DESCRIPTION: \n")
|
||||
title_description_file.write(item["description"])
|
||||
title_description_file.write("\n")
|
||||
|
||||
title_description_file.write("TRANSCRIPT: \n")
|
||||
title_description_file.write(item["transcript"])
|
||||
title_description_file.write("\n")
|
||||
|
||||
title_description_file.write("---------------------------------------- \n\n")
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import os
|
||||
import io
|
||||
import numpy as np
|
||||
import json
|
||||
import uuid
|
||||
import wave
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
import aiohttp_cors
|
||||
import jax.numpy as jnp
|
||||
import requests
|
||||
@@ -21,9 +23,9 @@ from sortedcontainers import SortedDict
|
||||
pcs = set()
|
||||
relay = MediaRelay()
|
||||
data_channel = None
|
||||
pipeline = FlaxWhisperPipline("openai/whisper-tiny",
|
||||
dtype=jnp.float16,
|
||||
batch_size=16)
|
||||
model = WhisperModel("tiny", device="cpu",
|
||||
compute_type="float32",
|
||||
num_workers=12)
|
||||
|
||||
CHANNELS = 2
|
||||
RATE = 48000
|
||||
@@ -80,6 +82,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
|
||||
"cmd": "UPDATE_TOPICS",
|
||||
"topics": incremental_responses,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print("Exception" + str(e))
|
||||
result = None
|
||||
@@ -113,18 +116,21 @@ def channel_send_transcript(channel):
|
||||
# Due to exceptions if one of the earlier batches can't return
|
||||
# a transcript, we don't want to be stuck waiting for the result
|
||||
# With the threshold size of 3, we pop the first(lost) element
|
||||
elif len(sorted_transcripts) >= 3:
|
||||
del sorted_transcripts[least_time]
|
||||
else:
|
||||
if len(sorted_transcripts) >= 3:
|
||||
del sorted_transcripts[least_time]
|
||||
except Exception as e:
|
||||
print("Exception", str(e))
|
||||
pass
|
||||
|
||||
|
||||
def get_transcription(frames):
|
||||
print(type(frames))
|
||||
print(type(frames[0]))
|
||||
print("Transcribing..")
|
||||
sorted_transcripts[frames[0].time] = None
|
||||
out_file = io.BytesIO()
|
||||
wf = wave.open(out_file, "wb")
|
||||
audiofilename = "test" + str(datetime.datetime.now())
|
||||
wf = wave.open(audiofilename, "wb")
|
||||
wf.setnchannels(CHANNELS)
|
||||
wf.setframerate(RATE)
|
||||
wf.setsampwidth(2)
|
||||
@@ -133,22 +139,48 @@ def get_transcription(frames):
|
||||
wf.writeframes(b"".join(frame.to_ndarray()))
|
||||
wf.close()
|
||||
|
||||
# To-Do: Look into WhisperTimeStampLogitsProcessor exception
|
||||
try:
|
||||
whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
|
||||
except Exception as e:
|
||||
return
|
||||
result_text = ""
|
||||
|
||||
global transcription_text, last_transcribed_time
|
||||
transcription_text += whisper_result["text"]
|
||||
duration = whisper_result["chunks"][0]["timestamp"][1]
|
||||
if not duration:
|
||||
duration = 5.0
|
||||
last_transcribed_time += duration
|
||||
try:
|
||||
segments, _ = model.transcribe(audiofilename,
|
||||
language="en",
|
||||
beam_size=5,
|
||||
vad_filter=True,
|
||||
vad_parameters=dict(min_silence_duration_ms=500)
|
||||
)
|
||||
segments = list(segments)
|
||||
result_text = ""
|
||||
duration = 0.0
|
||||
for segment in segments:
|
||||
result_text += segment.text
|
||||
start_time = segment.start
|
||||
end_time = segment.end
|
||||
if not segment.start:
|
||||
start_time = 0.0
|
||||
if not segment.end:
|
||||
end_time = 5.5
|
||||
duration += (end_time - start_time)
|
||||
|
||||
global last_transcribed_time
|
||||
last_transcribed_time += duration
|
||||
|
||||
except Exception as e:
|
||||
print("Exception" + str(e))
|
||||
pass
|
||||
|
||||
#
|
||||
try:
|
||||
os.remove(audiofilename)
|
||||
except Exception as e:
|
||||
print("Exception :", str(e))
|
||||
pass
|
||||
|
||||
global transcription_text
|
||||
transcription_text += result_text
|
||||
|
||||
result = {
|
||||
"cmd": "SHOW_TRANSCRIPTION",
|
||||
"text": whisper_result["text"]
|
||||
"text": result_text
|
||||
}
|
||||
sorted_transcripts[frames[0].time] = result
|
||||
return result
|
||||
@@ -167,6 +199,9 @@ def get_final_summary_response():
|
||||
seconds=round(last_transcribed_time))),
|
||||
"summary": final_summary
|
||||
}
|
||||
|
||||
with open("meeting_titles_and_summaries.txt", "a") as f:
|
||||
f.write(json.dumps(incremental_responses))
|
||||
return response
|
||||
|
||||
|
||||
@@ -196,7 +231,7 @@ class AudioStreamTrack(MediaStreamTrack):
|
||||
else None
|
||||
)
|
||||
|
||||
if len(transcription_text) > 500:
|
||||
if len(transcription_text) > 750:
|
||||
llm_input_text = transcription_text
|
||||
transcription_text = ""
|
||||
llm_result = run_in_executor(get_title_and_summary,
|
||||
@@ -0,0 +1,56 @@
|
||||
import requests
|
||||
import spacy
|
||||
|
||||
# This is the URL of text-generation-webui
|
||||
URL = "http://216.153.52.83:5000/api/v1/generate"
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
|
||||
def split_text_file(filename, token_count):
|
||||
nlp = spacy.load('en_core_web_md')
|
||||
|
||||
with open(filename, 'r') as file:
|
||||
text = file.read()
|
||||
|
||||
doc = nlp(text)
|
||||
total_tokens = len(doc)
|
||||
|
||||
parts = []
|
||||
start_index = 0
|
||||
|
||||
while start_index < total_tokens:
|
||||
end_index = start_index + token_count
|
||||
part_tokens = doc[start_index:end_index-5]
|
||||
part = ' '.join(token.text for token in part_tokens)
|
||||
parts.append(part)
|
||||
start_index = end_index
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
final_summary = ""
|
||||
parts = split_text_file("transcript.txt", 1600)
|
||||
previous_summary = ""
|
||||
|
||||
for part in parts:
|
||||
prompt = f"""
|
||||
### Human:
|
||||
Given the following text, distill the most important information
|
||||
into a short summary: {part}
|
||||
|
||||
### Assistant:
|
||||
"""
|
||||
data = {
|
||||
"prompt": prompt
|
||||
}
|
||||
try:
|
||||
response = requests.post(URL, headers=headers, json=data)
|
||||
print(response.json())
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
|
||||
with open("sum.txt", "w") as sum:
|
||||
sum.write(" ".join(final_summary))
|
||||
Reference in New Issue
Block a user