diff --git a/.gitignore b/.gitignore index 6d4b0ab1..5c2931cb 100644 --- a/.gitignore +++ b/.gitignore @@ -167,7 +167,7 @@ transcript_timestamps.txt *.pkl transcript_*.txt test_*.txt -*.png +wordcloud*.png *.ini test_samples/ *.wav diff --git a/scripts/clear_artefacts.sh b/scripts/clear_artefacts.sh index b179154c..c06c4c2c 100755 --- a/scripts/clear_artefacts.sh +++ b/scripts/clear_artefacts.sh @@ -7,7 +7,7 @@ directory="." text_file_pattern="transcript_*.txt" pickle_file_pattern="*.pkl" html_file_pattern="*.html" -png_file_pattern="*.png" +png_file_pattern="wordcloud*.png" find "$directory" -type f -name "$text_file_pattern" -delete find "$directory" -type f -name "$pickle_file_pattern" -delete diff --git a/stream_client.py b/stream_client.py index f6f12e8c..508f3092 100644 --- a/stream_client.py +++ b/stream_client.py @@ -11,9 +11,10 @@ import ast import stamina from aiortc import (RTCPeerConnection, RTCSessionDescription) from aiortc.contrib.media import (MediaPlayer, MediaRelay) +from utils.server_utils import Mutex logger = logging.getLogger("pc") -file_lock = threading.Lock() +file_lock = Mutex(open("test_sm_6.txt", "a")) config = configparser.ConfigParser() config.read('config.ini') @@ -24,8 +25,7 @@ class StreamClient: signaling, url="http://127.0.0.1:1250", play_from=None, - ping_pong=False, - audio_stream=None + ping_pong=False ): self.signaling = signaling self.server_url = url @@ -36,7 +36,6 @@ class StreamClient: self.pc = RTCPeerConnection() self.loop = asyncio.get_event_loop() - # self.loop = asyncio.new_event_loop() self.relay = None self.pcs = set() self.time_start = None @@ -68,7 +67,6 @@ class StreamClient: channel.send(message) def current_stamp(self): - if self.time_start is None: self.time_start = time.time() return 0 @@ -94,9 +92,7 @@ class StreamClient: @pc.on("track") def on_track(track): print("Sending %s" % track.kind) - # Trials self.pc.addTrack(track) - # self.pc.addTrack(self.microphone) @track.on("ended") async def on_ended(): @@ -104,7 +100,6 @@ class StreamClient: self.pc.addTrack(audio) - # DataChannel channel = pc.createDataChannel("data-channel") self.channel_log(channel, "-", "created by local party") @@ -155,14 +150,12 @@ class StreamClient: while True: msg = await self.queue.get() msg = ast.literal_eval(msg) - with file_lock: - with open("test_sm_6.txt", "a") as f: - f.write(msg["text"]) + with file_lock.lock() as file: + file.write(msg["text"]) yield msg["text"] self.queue.task_done() async def start(self): - print("Starting stream client") coro = self.run_offer(self.pc, self.signaling) task = asyncio.create_task(coro) await task diff --git a/utils/server_utils.py b/utils/server_utils.py index b15f992b..2e46e094 100644 --- a/utils/server_utils.py +++ b/utils/server_utils.py @@ -1,7 +1,25 @@ import asyncio from functools import partial +import contextlib +from threading import Lock +from typing import ContextManager, Generic, TypeVar def run_in_executor(func, *args, executor=None, **kwargs): callback = partial(func, *args, **kwargs) loop = asyncio.get_event_loop() return asyncio.get_event_loop().run_in_executor(executor, callback) + + +T = TypeVar("T") +class Mutex(Generic[T]): + def __init__(self, value: T): + self.__value = value + self.__lock = Lock() + + @contextlib.contextmanager + def lock(self) -> ContextManager[T]: + self.__lock.acquire() + try: + yield self.__value + finally: + self.__lock.release() \ No newline at end of file diff --git a/utils/text_utilities.py b/utils/text_utilities.py index 0d523dd3..dbd6c6cc 100644 --- a/utils/text_utilities.py +++ b/utils/text_utilities.py @@ -19,6 +19,9 @@ def preprocess_sentence(sentence): return ' '.join(tokens) def compute_similarity(sent1, sent2): + """ + Compute the similarity + """ tfidf_vectorizer = TfidfVectorizer() if sent1 is not None and sent2 is not None: tfidf_matrix = tfidf_vectorizer.fit_transform([sent1, sent2]) diff --git a/utils/viz_utilities.py b/utils/viz_utilities.py index 94e774c6..f6a2baf7 100644 --- a/utils/viz_utilities.py +++ b/utils/viz_utilities.py @@ -19,6 +19,7 @@ spacy_stopwords = en.Defaults.stop_words STOPWORDS = set(STOPWORDS).union(set(stopwords.words("english"))).union(set(spacy_stopwords)) + def create_wordcloud(timestamp, real_time=False): """ Create a basic word cloud visualization of transcribed text