mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
use faster-whisper pipeline
This commit is contained in:
30
format_output.py
Normal file
30
format_output.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
with open("meeting_titles_and_summaries.txt", "r") as f:
|
||||||
|
outputs = f.read()
|
||||||
|
|
||||||
|
outputs = json.loads(outputs)
|
||||||
|
|
||||||
|
transcript_file = open("meeting_transcript.txt", "a")
|
||||||
|
title_description_file = open("meeting_title_description.txt", "a")
|
||||||
|
|
||||||
|
for item in outputs["topics"]:
|
||||||
|
transcript_file.write(item["transcript"])
|
||||||
|
|
||||||
|
title_description_file.write("TITLE: \n")
|
||||||
|
title_description_file.write(item["title"])
|
||||||
|
title_description_file.write("\n")
|
||||||
|
|
||||||
|
title_description_file.write("DESCRIPTION: \n")
|
||||||
|
title_description_file.write(item["description"])
|
||||||
|
title_description_file.write("\n")
|
||||||
|
|
||||||
|
title_description_file.write("TRANSCRIPT: \n")
|
||||||
|
title_description_file.write(item["transcript"])
|
||||||
|
title_description_file.write("\n")
|
||||||
|
|
||||||
|
title_description_file.write("---------------------------------------- \n\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,11 +1,13 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
|
import os
|
||||||
import io
|
import io
|
||||||
|
import numpy as np
|
||||||
import json
|
import json
|
||||||
import uuid
|
import uuid
|
||||||
import wave
|
import wave
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
import aiohttp_cors
|
import aiohttp_cors
|
||||||
import jax.numpy as jnp
|
import jax.numpy as jnp
|
||||||
import requests
|
import requests
|
||||||
@@ -21,9 +23,9 @@ from sortedcontainers import SortedDict
|
|||||||
pcs = set()
|
pcs = set()
|
||||||
relay = MediaRelay()
|
relay = MediaRelay()
|
||||||
data_channel = None
|
data_channel = None
|
||||||
pipeline = FlaxWhisperPipline("openai/whisper-tiny",
|
model = WhisperModel("tiny", device="cpu",
|
||||||
dtype=jnp.float16,
|
compute_type="float32",
|
||||||
batch_size=16)
|
num_workers=12)
|
||||||
|
|
||||||
CHANNELS = 2
|
CHANNELS = 2
|
||||||
RATE = 48000
|
RATE = 48000
|
||||||
@@ -80,6 +82,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
|
|||||||
"cmd": "UPDATE_TOPICS",
|
"cmd": "UPDATE_TOPICS",
|
||||||
"topics": incremental_responses,
|
"topics": incremental_responses,
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Exception" + str(e))
|
print("Exception" + str(e))
|
||||||
result = None
|
result = None
|
||||||
@@ -113,7 +116,8 @@ def channel_send_transcript(channel):
|
|||||||
# Due to exceptions if one of the earlier batches can't return
|
# Due to exceptions if one of the earlier batches can't return
|
||||||
# a transcript, we don't want to be stuck waiting for the result
|
# a transcript, we don't want to be stuck waiting for the result
|
||||||
# With the threshold size of 3, we pop the first(lost) element
|
# With the threshold size of 3, we pop the first(lost) element
|
||||||
elif len(sorted_transcripts) >= 3:
|
else:
|
||||||
|
if len(sorted_transcripts) >= 3:
|
||||||
del sorted_transcripts[least_time]
|
del sorted_transcripts[least_time]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Exception", str(e))
|
print("Exception", str(e))
|
||||||
@@ -121,10 +125,12 @@ def channel_send_transcript(channel):
|
|||||||
|
|
||||||
|
|
||||||
def get_transcription(frames):
|
def get_transcription(frames):
|
||||||
|
print(type(frames))
|
||||||
|
print(type(frames[0]))
|
||||||
print("Transcribing..")
|
print("Transcribing..")
|
||||||
sorted_transcripts[frames[0].time] = None
|
sorted_transcripts[frames[0].time] = None
|
||||||
out_file = io.BytesIO()
|
audiofilename = "test" + str(datetime.datetime.now())
|
||||||
wf = wave.open(out_file, "wb")
|
wf = wave.open(audiofilename, "wb")
|
||||||
wf.setnchannels(CHANNELS)
|
wf.setnchannels(CHANNELS)
|
||||||
wf.setframerate(RATE)
|
wf.setframerate(RATE)
|
||||||
wf.setsampwidth(2)
|
wf.setsampwidth(2)
|
||||||
@@ -133,22 +139,48 @@ def get_transcription(frames):
|
|||||||
wf.writeframes(b"".join(frame.to_ndarray()))
|
wf.writeframes(b"".join(frame.to_ndarray()))
|
||||||
wf.close()
|
wf.close()
|
||||||
|
|
||||||
# To-Do: Look into WhisperTimeStampLogitsProcessor exception
|
result_text = ""
|
||||||
try:
|
|
||||||
whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
|
|
||||||
except Exception as e:
|
|
||||||
return
|
|
||||||
|
|
||||||
global transcription_text, last_transcribed_time
|
try:
|
||||||
transcription_text += whisper_result["text"]
|
segments, _ = model.transcribe(audiofilename,
|
||||||
duration = whisper_result["chunks"][0]["timestamp"][1]
|
language="en",
|
||||||
if not duration:
|
beam_size=5,
|
||||||
duration = 5.0
|
vad_filter=True,
|
||||||
|
vad_parameters=dict(min_silence_duration_ms=500)
|
||||||
|
)
|
||||||
|
segments = list(segments)
|
||||||
|
result_text = ""
|
||||||
|
duration = 0.0
|
||||||
|
for segment in segments:
|
||||||
|
result_text += segment.text
|
||||||
|
start_time = segment.start
|
||||||
|
end_time = segment.end
|
||||||
|
if not segment.start:
|
||||||
|
start_time = 0.0
|
||||||
|
if not segment.end:
|
||||||
|
end_time = 5.5
|
||||||
|
duration += (end_time - start_time)
|
||||||
|
|
||||||
|
global last_transcribed_time
|
||||||
last_transcribed_time += duration
|
last_transcribed_time += duration
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception" + str(e))
|
||||||
|
pass
|
||||||
|
|
||||||
|
#
|
||||||
|
try:
|
||||||
|
os.remove(audiofilename)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception :", str(e))
|
||||||
|
pass
|
||||||
|
|
||||||
|
global transcription_text
|
||||||
|
transcription_text += result_text
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"cmd": "SHOW_TRANSCRIPTION",
|
"cmd": "SHOW_TRANSCRIPTION",
|
||||||
"text": whisper_result["text"]
|
"text": result_text
|
||||||
}
|
}
|
||||||
sorted_transcripts[frames[0].time] = result
|
sorted_transcripts[frames[0].time] = result
|
||||||
return result
|
return result
|
||||||
@@ -167,6 +199,9 @@ def get_final_summary_response():
|
|||||||
seconds=round(last_transcribed_time))),
|
seconds=round(last_transcribed_time))),
|
||||||
"summary": final_summary
|
"summary": final_summary
|
||||||
}
|
}
|
||||||
|
|
||||||
|
with open("meeting_titles_and_summaries.txt", "a") as f:
|
||||||
|
f.write(json.dumps(incremental_responses))
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
@@ -196,7 +231,7 @@ class AudioStreamTrack(MediaStreamTrack):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(transcription_text) > 500:
|
if len(transcription_text) > 750:
|
||||||
llm_input_text = transcription_text
|
llm_input_text = transcription_text
|
||||||
transcription_text = ""
|
transcription_text = ""
|
||||||
llm_result = run_in_executor(get_title_and_summary,
|
llm_result = run_in_executor(get_title_and_summary,
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
import requests
|
||||||
|
import spacy
|
||||||
|
|
||||||
|
# This is the URL of text-generation-webui
|
||||||
|
URL = "http://216.153.52.83:5000/api/v1/generate"
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def split_text_file(filename, token_count):
|
||||||
|
nlp = spacy.load('en_core_web_md')
|
||||||
|
|
||||||
|
with open(filename, 'r') as file:
|
||||||
|
text = file.read()
|
||||||
|
|
||||||
|
doc = nlp(text)
|
||||||
|
total_tokens = len(doc)
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
start_index = 0
|
||||||
|
|
||||||
|
while start_index < total_tokens:
|
||||||
|
end_index = start_index + token_count
|
||||||
|
part_tokens = doc[start_index:end_index-5]
|
||||||
|
part = ' '.join(token.text for token in part_tokens)
|
||||||
|
parts.append(part)
|
||||||
|
start_index = end_index
|
||||||
|
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
final_summary = ""
|
||||||
|
parts = split_text_file("transcript.txt", 1600)
|
||||||
|
previous_summary = ""
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
prompt = f"""
|
||||||
|
### Human:
|
||||||
|
Given the following text, distill the most important information
|
||||||
|
into a short summary: {part}
|
||||||
|
|
||||||
|
### Assistant:
|
||||||
|
"""
|
||||||
|
data = {
|
||||||
|
"prompt": prompt
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.post(URL, headers=headers, json=data)
|
||||||
|
print(response.json())
|
||||||
|
except Exception as e:
|
||||||
|
print(str(e))
|
||||||
|
|
||||||
|
with open("sum.txt", "w") as sum:
|
||||||
|
sum.write(" ".join(final_summary))
|
||||||
Reference in New Issue
Block a user