diff --git a/requirements.txt b/requirements.txt index 21fdd61a..b134b0d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -pyaudio==0.2.13 keyboard==0.13.5 pynput==1.7.6 wave==0.0.2 @@ -57,3 +56,4 @@ stamina==23.1.0 httpx==0.24.1 sortedcontainers==2.4.0 https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz +gpt4all==1.0.5 diff --git a/server_executor_cleaned.py b/server_executor_cleaned.py index 83015983..fc4dd1ee 100644 --- a/server_executor_cleaned.py +++ b/server_executor_cleaned.py @@ -1,6 +1,7 @@ import asyncio import io import json +import time import uuid import wave from concurrent.futures import ThreadPoolExecutor @@ -12,7 +13,7 @@ from aiortc.contrib.media import MediaRelay from av import AudioFifo from loguru import logger from whisper_jax import FlaxWhisperPipline - +from gpt4all import GPT4All from utils.run_utils import run_in_executor pcs = set() @@ -26,6 +27,28 @@ CHANNELS = 2 RATE = 48000 audio_buffer = AudioFifo() executor = ThreadPoolExecutor() +transcription_text = "" +llm = GPT4All("/Users/gokulmohanarangan/Library/Application Support/nomic.ai/GPT4All/ggml-vicuna-13b-1.1-q4_2.bin") + + +def get_title_and_summary(): + global transcription_text + output = None + if len(transcription_text) > 1000: + print("Generating title and summary") + prompt = f""" + ### Human: + Create a JSON object having 2 fields: title and summary. For the title field generate a short title for the given + text and for the summary field, summarize the given text by creating 3 key points. + + {transcription_text} + + ### Assistant: + """ + transcription_text = "" + output = llm.generate(prompt) + return str(output) + return output def channel_log(channel, t, message): @@ -34,8 +57,8 @@ def channel_log(channel, t, message): def channel_send(channel, message): # channel_log(channel, ">", message) - if channel: - channel.send(message) + if channel and message: + channel.send(str(message)) def get_transcription(frames): @@ -50,9 +73,9 @@ def get_transcription(frames): wf.writeframes(b"".join(frame.to_ndarray())) wf.close() whisper_result = pipeline(out_file.getvalue(), return_timestamps=True) - with open("test_exec.txt", "a") as f: - f.write(whisper_result["text"]) - whisper_result['start_time'] = [f.time for f in frames] + # whisper_result['start_time'] = [f.time for f in frames] + global transcription_text + transcription_text += whisper_result["text"] return whisper_result @@ -75,9 +98,15 @@ class AudioStreamTrack(MediaStreamTrack): get_transcription, local_frames, executor=executor ) whisper_result.add_done_callback( - lambda f: channel_send(data_channel, - str(whisper_result.result())) - if (f.result()) + lambda f: channel_send(data_channel, whisper_result.result()) + if f.result() + else None + ) + llm_result = run_in_executor(get_title_and_summary, + executor=executor) + llm_result.add_done_callback( + lambda f: channel_send(data_channel, llm_result.result()) + if f.result() else None ) return frame diff --git a/stream_client.py b/stream_client.py index 628ee69e..87a7340c 100644 --- a/stream_client.py +++ b/stream_client.py @@ -11,10 +11,7 @@ from aiortc import (RTCPeerConnection, RTCSessionDescription) from aiortc.contrib.media import (MediaPlayer, MediaRelay) from utils.log_utils import logger -from utils.run_utils import config, Mutex - -file_lock = Mutex(open("test_sm_6.txt", "a")) - +from utils.run_utils import config class StreamClient: def __init__( @@ -146,10 +143,7 @@ class StreamClient: async def worker(self, name, queue): while True: msg = await self.queue.get() - msg = ast.literal_eval(msg) - with file_lock.lock() as file: - file.write(msg["text"]) - yield msg["text"] + yield msg self.queue.task_done() async def start(self):