Big file upload (#349)

This commit is contained in:
projects-g
2024-01-30 20:51:58 +05:30
committed by GitHub
parent 529984c198
commit 1522d60cbc
11 changed files with 64 additions and 45 deletions

View File

@@ -19,7 +19,7 @@ repos:
- id: detect-private-key - id: detect-private-key
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 23.1.0 rev: 24.1.1
hooks: hooks:
- id: black - id: black
files: ^server/(reflector|tests)/ files: ^server/(reflector|tests)/

52
server/poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]] [[package]]
name = "aioboto3" name = "aioboto3"
@@ -488,33 +488,33 @@ files = [
[[package]] [[package]]
name = "black" name = "black"
version = "23.9.1" version = "24.1.1"
description = "The uncompromising code formatter." description = "The uncompromising code formatter."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, {file = "black-24.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2588021038bd5ada078de606f2a804cadd0a3cc6a79cb3e9bb3a8bf581325a4c"},
{file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, {file = "black-24.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a95915c98d6e32ca43809d46d932e2abc5f1f7d582ffbe65a5b4d1588af7445"},
{file = "black-23.9.1-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:75a2dc41b183d4872d3a500d2b9c9016e67ed95738a3624f4751a0cb4818fe71"}, {file = "black-24.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fa6a0e965779c8f2afb286f9ef798df770ba2b6cee063c650b96adec22c056a"},
{file = "black-23.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13a2e4a93bb8ca74a749b6974925c27219bb3df4d42fc45e948a5d9feb5122b7"}, {file = "black-24.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5242ecd9e990aeb995b6d03dc3b2d112d4a78f2083e5a8e86d566340ae80fec4"},
{file = "black-23.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:adc3e4442eef57f99b5590b245a328aad19c99552e0bdc7f0b04db6656debd80"}, {file = "black-24.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fc1ec9aa6f4d98d022101e015261c056ddebe3da6a8ccfc2c792cbe0349d48b7"},
{file = "black-23.9.1-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:8431445bf62d2a914b541da7ab3e2b4f3bc052d2ccbf157ebad18ea126efb91f"}, {file = "black-24.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0269dfdea12442022e88043d2910429bed717b2d04523867a85dacce535916b8"},
{file = "black-23.9.1-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:8fc1ddcf83f996247505db6b715294eba56ea9372e107fd54963c7553f2b6dfe"}, {file = "black-24.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3d64db762eae4a5ce04b6e3dd745dcca0fb9560eb931a5be97472e38652a161"},
{file = "black-23.9.1-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:7d30ec46de88091e4316b17ae58bbbfc12b2de05e069030f6b747dfc649ad186"}, {file = "black-24.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5d7b06ea8816cbd4becfe5f70accae953c53c0e53aa98730ceccb0395520ee5d"},
{file = "black-23.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f"}, {file = "black-24.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e2c8dfa14677f90d976f68e0c923947ae68fa3961d61ee30976c388adc0b02c8"},
{file = "black-23.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:538efb451cd50f43aba394e9ec7ad55a37598faae3348d723b59ea8e91616300"}, {file = "black-24.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a21725862d0e855ae05da1dd25e3825ed712eaaccef6b03017fe0853a01aa45e"},
{file = "black-23.9.1-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:638619a559280de0c2aa4d76f504891c9860bb8fa214267358f0a20f27c12948"}, {file = "black-24.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07204d078e25327aad9ed2c64790d681238686bce254c910de640c7cc4fc3aa6"},
{file = "black-23.9.1-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:a732b82747235e0542c03bf352c126052c0fbc458d8a239a94701175b17d4855"}, {file = "black-24.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:a83fe522d9698d8f9a101b860b1ee154c1d25f8a82ceb807d319f085b2627c5b"},
{file = "black-23.9.1-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:cf3a4d00e4cdb6734b64bf23cd4341421e8953615cba6b3670453737a72ec204"}, {file = "black-24.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08b34e85170d368c37ca7bf81cf67ac863c9d1963b2c1780c39102187ec8dd62"},
{file = "black-23.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf99f3de8b3273a8317681d8194ea222f10e0133a24a7548c73ce44ea1679377"}, {file = "black-24.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7258c27115c1e3b5de9ac6c4f9957e3ee2c02c0b39222a24dc7aa03ba0e986f5"},
{file = "black-23.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:14f04c990259576acd093871e7e9b14918eb28f1866f91968ff5524293f9c573"}, {file = "black-24.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40657e1b78212d582a0edecafef133cf1dd02e6677f539b669db4746150d38f6"},
{file = "black-23.9.1-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:c619f063c2d68f19b2d7270f4cf3192cb81c9ec5bc5ba02df91471d0b88c4c5c"}, {file = "black-24.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e298d588744efda02379521a19639ebcd314fba7a49be22136204d7ed1782717"},
{file = "black-23.9.1-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:6a3b50e4b93f43b34a9d3ef00d9b6728b4a722c997c99ab09102fd5efdb88325"}, {file = "black-24.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34afe9da5056aa123b8bfda1664bfe6fb4e9c6f311d8e4a6eb089da9a9173bf9"},
{file = "black-23.9.1-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c46767e8df1b7beefb0899c4a95fb43058fa8500b6db144f4ff3ca38eb2f6393"}, {file = "black-24.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:854c06fb86fd854140f37fb24dbf10621f5dab9e3b0c29a690ba595e3d543024"},
{file = "black-23.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50254ebfa56aa46a9fdd5d651f9637485068a1adf42270148cd101cdf56e0ad9"}, {file = "black-24.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3897ae5a21ca132efa219c029cce5e6bfc9c3d34ed7e892113d199c0b1b444a2"},
{file = "black-23.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:403397c033adbc45c2bd41747da1f7fc7eaa44efbee256b53842470d4ac5a70f"}, {file = "black-24.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:ecba2a15dfb2d97105be74bbfe5128bc5e9fa8477d8c46766505c1dda5883aac"},
{file = "black-23.9.1-py3-none-any.whl", hash = "sha256:6ccd59584cc834b6d127628713e4b6b968e5f79572da66284532525a042549f9"}, {file = "black-24.1.1-py3-none-any.whl", hash = "sha256:5cdc2e2195212208fbcae579b931407c1fa9997584f0a415421748aeafff1168"},
{file = "black-23.9.1.tar.gz", hash = "sha256:24b6b3ff5c6d9ea08a8888f6977eae858e1f340d7260cf56d70a49823236b62d"}, {file = "black-24.1.1.tar.gz", hash = "sha256:48b5760dcbfe5cf97fd4fba23946681f3a81514c6ab8a45b50da67ac8fbc6c7b"},
] ]
[package.dependencies] [package.dependencies]
@@ -526,7 +526,7 @@ platformdirs = ">=2"
[package.extras] [package.extras]
colorama = ["colorama (>=0.4.3)"] colorama = ["colorama (>=0.4.3)"]
d = ["aiohttp (>=3.7.4)"] d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
uvloop = ["uvloop (>=0.15.2)"] uvloop = ["uvloop (>=0.15.2)"]
@@ -4296,4 +4296,4 @@ multidict = ">=4.0"
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "a0d9362ca5fc6e5c310bba39ce9bac720880d4dbb884a9f6625e51c952c54ffa" content-hash = "8b64f7a5e8282cedf8f508c9f85ed233222045bdccc49d7c4ea96cf4bf8f902b"

View File

@@ -37,10 +37,11 @@ python-jose = {extras = ["cryptography"], version = "^3.3.0"}
python-multipart = "^0.0.6" python-multipart = "^0.0.6"
faster-whisper = "^0.10.0" faster-whisper = "^0.10.0"
transformers = "^4.36.2" transformers = "^4.36.2"
black = "24.1.1"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
black = "^23.7.0" black = "^24.1.1"
stamina = "^23.1.0" stamina = "^23.1.0"
pyinstrument = "^4.6.1" pyinstrument = "^4.6.1"

View File

@@ -384,8 +384,8 @@ class PipelineMainDiarization(PipelineMainBase):
# to let the start just do one job. # to let the start just do one job.
pipeline.logger.bind(transcript_id=transcript.id) pipeline.logger.bind(transcript_id=transcript.id)
pipeline.logger.info("Diarization pipeline created") pipeline.logger.info("Diarization pipeline created")
self.push(audio_diarization_input) await self.push(audio_diarization_input)
self.flush() await self.flush()
return pipeline return pipeline
@@ -414,9 +414,9 @@ class PipelineMainFromTopics(PipelineMainBase):
# push topics # push topics
topics = self.get_transcript_topics(transcript) topics = self.get_transcript_topics(transcript)
for topic in topics: for topic in topics:
self.push(topic) await self.push(topic)
self.flush() await self.flush()
return pipeline return pipeline
@@ -653,10 +653,10 @@ async def pipeline_upload(transcript: Transcript, logger: Logger):
try: try:
logger.info("Start pushing audio into the pipeline") logger.info("Start pushing audio into the pipeline")
for frame in container.decode(audio=0): for frame in container.decode(audio=0):
pipeline.push(frame) await pipeline.push(frame)
finally: finally:
logger.info("Flushing the pipeline") logger.info("Flushing the pipeline")
pipeline.flush() await pipeline.flush()
logger.info("Waiting for the pipeline to end") logger.info("Waiting for the pipeline to end")
await pipeline.join() await pipeline.join()

View File

@@ -66,17 +66,17 @@ class PipelineRunner(BaseModel):
coro = self.run() coro = self.run()
asyncio.run(coro) asyncio.run(coro)
def push(self, data): async def push(self, data):
""" """
Push data to the pipeline Push data to the pipeline
""" """
self._add_cmd("PUSH", data) await self._add_cmd("PUSH", data)
def flush(self): async def flush(self):
""" """
Flush the pipeline Flush the pipeline
""" """
self._add_cmd("FLUSH", None) await self._add_cmd("FLUSH", None)
async def on_status(self, status): async def on_status(self, status):
""" """
@@ -90,12 +90,26 @@ class PipelineRunner(BaseModel):
""" """
pass pass
def _add_cmd(self, cmd: str, data): async def _add_cmd(
self, cmd: str, data, max_retries: int = 3, retry_time_limit: int = 3
):
""" """
Enqueue a command to be executed in the runner. Enqueue a command to be executed in the runner.
Currently supported commands: PUSH, FLUSH Currently supported commands: PUSH, FLUSH
""" """
for _ in range(max_retries):
try:
self._q_cmd.put_nowait([cmd, data]) self._q_cmd.put_nowait([cmd, data])
break # Break if put succeeds
except asyncio.queues.QueueFull:
# Handle only the QueueFull exception, retry after a small delay
self._logger.debug(
f"Encountered a full queue, while trying to add [{cmd, data}]. "
f"Retrying in {retry_time_limit} seconds"
)
await asyncio.sleep(retry_time_limit)
else:
print(f"Failed to add [{cmd, data}] after {max_retries} attempts.")
async def _set_status(self, status): async def _set_status(self, status):
self._logger.debug("Runner status updated", status=status) self._logger.debug("Runner status updated", status=status)

View File

@@ -1,20 +1,20 @@
""" """
Utility file for all text processing related functionalities Utility file for all text processing related functionalities
""" """
import datetime import datetime
from typing import List from typing import List
import nltk import nltk
import torch import torch
from log_utils import LOGGER
from nltk.corpus import stopwords from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize from nltk.tokenize import word_tokenize
from run_utils import CONFIG
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
from transformers import BartForConditionalGeneration, BartTokenizer from transformers import BartForConditionalGeneration, BartTokenizer
from log_utils import LOGGER
from run_utils import CONFIG
nltk.download("punkt", quiet=True) nltk.download("punkt", quiet=True)

View File

@@ -40,7 +40,7 @@ class AudioStreamTrack(MediaStreamTrack):
ctx = self.ctx ctx = self.ctx
frame = await self.track.recv() frame = await self.track.recv()
try: try:
ctx.pipeline_runner.push(frame) await ctx.pipeline_runner.push(frame)
except Exception as e: except Exception as e:
ctx.logger.error("Pipeline error", error=e) ctx.logger.error("Pipeline error", error=e)
return frame return frame
@@ -76,7 +76,7 @@ async def rtc_offer_base(
# - when we receive the close event, we do nothing. # - when we receive the close event, we do nothing.
# 2. or the client close the connection # 2. or the client close the connection
# and there is nothing to do because it is already closed # and there is nothing to do because it is already closed
ctx.pipeline_runner.flush() await ctx.pipeline_runner.flush()
if close: if close:
ctx.logger.debug("Closing peer connection") ctx.logger.debug("Closing peer connection")
await pc.close() await pc.close()

View File

@@ -3,6 +3,7 @@ Transcripts audio related endpoints
=================================== ===================================
""" """
from typing import Annotated, Optional from typing import Annotated, Optional
import httpx import httpx

View File

@@ -3,6 +3,7 @@ Transcript participants API endpoints
===================================== =====================================
""" """
from typing import Annotated, Optional from typing import Annotated, Optional
import reflector.auth as auth import reflector.auth as auth

View File

@@ -3,6 +3,7 @@ Reassign speakers in a transcript
================================= =================================
""" """
from typing import Annotated, Optional from typing import Annotated, Optional
import reflector.auth as auth import reflector.auth as auth

View File

@@ -3,6 +3,7 @@ Transcripts websocket API
========================= =========================
""" """
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
from reflector.db.transcripts import transcripts_controller from reflector.db.transcripts import transcripts_controller
from reflector.ws_manager import get_ws_manager from reflector.ws_manager import get_ws_manager