use faster-whisper pipeline

2026-02-04 18:06:48 +00:00 · 2023-07-24 13:19:24 +05:30
parent d2454b6d2d
commit 02c928a7cf
3 changed files with 142 additions and 21 deletions
--- a/format_output.py
+++ b/format_output.py
@@ -0,0 +1,30 @@
 import json
 with open("meeting_titles_and_summaries.txt", "r") as f:
    outputs = f.read()
 outputs = json.loads(outputs)
 transcript_file = open("meeting_transcript.txt", "a")
 title_description_file = open("meeting_title_description.txt", "a")
 for item in outputs["topics"]:
    transcript_file.write(item["transcript"])
    title_description_file.write("TITLE: \n")
    title_description_file.write(item["title"])
    title_description_file.write("\n")
    title_description_file.write("DESCRIPTION: \n")
    title_description_file.write(item["description"])
    title_description_file.write("\n")
    title_description_file.write("TRANSCRIPT: \n")
    title_description_file.write(item["transcript"])
    title_description_file.write("\n")
    title_description_file.write("---------------------------------------- \n\n")
--- a/server_executor_cleaned.py
+++ b/server_executor_cleaned.py
@@ -1,11 +1,13 @@
 import asyncio
 import datetime
 import os
 import io
 import numpy as np
 import json
 import uuid
 import wave
 from concurrent.futures import ThreadPoolExecutor
-
+from faster_whisper import WhisperModel
 import aiohttp_cors
 import jax.numpy as jnp
 import requests
@@ -21,9 +23,9 @@ from sortedcontainers import SortedDict
 pcs = set()
 relay = MediaRelay()
 data_channel = None
-pipeline = FlaxWhisperPipline("openai/whisper-tiny",
+model = WhisperModel("tiny", device="cpu",
-                              dtype=jnp.float16,
+                     compute_type="float32",
-                              batch_size=16)
+                     num_workers=12)
 CHANNELS = 2
 RATE = 48000
@@ -80,6 +82,7 @@ def get_title_and_summary(llm_input_text, last_timestamp):
                "cmd": "UPDATE_TOPICS",
                "topics": incremental_responses,
        }
    except Exception as e:
        print("Exception" + str(e))
        result = None
@@ -113,7 +116,8 @@ def channel_send_transcript(channel):
            # Due to exceptions if one of the earlier batches can't return
            # a transcript, we don't want to be stuck waiting for the result
            # With the threshold size of 3, we pop the first(lost) element
-            elif len(sorted_transcripts) >= 3:
+            else:
                if len(sorted_transcripts) >= 3:
                    del sorted_transcripts[least_time]
        except Exception as e:
            print("Exception", str(e))
@@ -121,10 +125,12 @@ def channel_send_transcript(channel):
 def get_transcription(frames):
    print(type(frames))
    print(type(frames[0]))
    print("Transcribing..")
    sorted_transcripts[frames[0].time] = None
-    out_file = io.BytesIO()
+    audiofilename = "test" + str(datetime.datetime.now())
-    wf = wave.open(out_file, "wb")
+    wf = wave.open(audiofilename, "wb")
    wf.setnchannels(CHANNELS)
    wf.setframerate(RATE)
    wf.setsampwidth(2)
@@ -133,22 +139,48 @@ def get_transcription(frames):
        wf.writeframes(b"".join(frame.to_ndarray()))
    wf.close()
-    # To-Do: Look into WhisperTimeStampLogitsProcessor exception
+    result_text = ""
    try:
        whisper_result = pipeline(out_file.getvalue(), return_timestamps=True)
    except Exception as e:
        return
-    global transcription_text, last_transcribed_time
+    try:
-    transcription_text += whisper_result["text"]
+        segments, _ = model.transcribe(audiofilename,
-    duration = whisper_result["chunks"][0]["timestamp"][1]
+                                       language="en",
-    if not duration:
+                                       beam_size=5,
-        duration = 5.0
+                                       vad_filter=True,
                                       vad_parameters=dict(min_silence_duration_ms=500)
                                       )
        segments = list(segments)
        result_text = ""
        duration = 0.0
        for segment in segments:
            result_text += segment.text
            start_time = segment.start
            end_time = segment.end
            if not segment.start:
                start_time = 0.0
            if not segment.end:
                end_time = 5.5
            duration += (end_time - start_time)
        global last_transcribed_time
        last_transcribed_time += duration
    except Exception as e:
        print("Exception" + str(e))
        pass
    #
    try:
        os.remove(audiofilename)
    except Exception as e:
        print("Exception :", str(e))
        pass
    global transcription_text
    transcription_text += result_text
    result = {
            "cmd": "SHOW_TRANSCRIPTION",
-            "text": whisper_result["text"]
+            "text": result_text
    }
    sorted_transcripts[frames[0].time] = result
    return result
@@ -167,6 +199,9 @@ def get_final_summary_response():
                    seconds=round(last_transcribed_time))),
            "summary": final_summary
    }
    with open("meeting_titles_and_summaries.txt", "a") as f:
        f.write(json.dumps(incremental_responses))
    return response
@@ -196,7 +231,7 @@ class AudioStreamTrack(MediaStreamTrack):
                    else None
            )
-        if len(transcription_text) > 500:
+        if len(transcription_text) > 750:
            llm_input_text = transcription_text
            transcription_text = ""
            llm_result = run_in_executor(get_title_and_summary,
--- a/trials/api.py
+++ b/trials/api.py
@@ -0,0 +1,56 @@
 import requests
 import spacy
 # This is the URL of text-generation-webui
 URL = "http://216.153.52.83:5000/api/v1/generate"
 headers = {
    "Content-Type": "application/json"
 }
 def split_text_file(filename, token_count):
    nlp = spacy.load('en_core_web_md')
    with open(filename, 'r') as file:
        text = file.read()
    doc = nlp(text)
    total_tokens = len(doc)
    parts = []
    start_index = 0
    while start_index < total_tokens:
        end_index = start_index + token_count
        part_tokens = doc[start_index:end_index-5]
        part = ' '.join(token.text for token in part_tokens)
        parts.append(part)
        start_index = end_index
    return parts
 final_summary = ""
 parts = split_text_file("transcript.txt", 1600)
 previous_summary = ""
 for part in parts:
    prompt = f"""
              ### Human:
             Given the following text, distill the most important information 
             into a short summary:  {part}
              ### Assistant:
              """
    data = {
            "prompt": prompt
    }
    try:
        response = requests.post(URL, headers=headers, json=data)
        print(response.json())
    except Exception as e:
        print(str(e))
 with open("sum.txt", "w") as sum:
    sum.write(" ".join(final_summary))