mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 20:59:05 +00:00
Big file upload (#349)
This commit is contained in:
@@ -384,8 +384,8 @@ class PipelineMainDiarization(PipelineMainBase):
|
||||
# to let the start just do one job.
|
||||
pipeline.logger.bind(transcript_id=transcript.id)
|
||||
pipeline.logger.info("Diarization pipeline created")
|
||||
self.push(audio_diarization_input)
|
||||
self.flush()
|
||||
await self.push(audio_diarization_input)
|
||||
await self.flush()
|
||||
|
||||
return pipeline
|
||||
|
||||
@@ -414,9 +414,9 @@ class PipelineMainFromTopics(PipelineMainBase):
|
||||
# push topics
|
||||
topics = self.get_transcript_topics(transcript)
|
||||
for topic in topics:
|
||||
self.push(topic)
|
||||
await self.push(topic)
|
||||
|
||||
self.flush()
|
||||
await self.flush()
|
||||
|
||||
return pipeline
|
||||
|
||||
@@ -653,10 +653,10 @@ async def pipeline_upload(transcript: Transcript, logger: Logger):
|
||||
try:
|
||||
logger.info("Start pushing audio into the pipeline")
|
||||
for frame in container.decode(audio=0):
|
||||
pipeline.push(frame)
|
||||
await pipeline.push(frame)
|
||||
finally:
|
||||
logger.info("Flushing the pipeline")
|
||||
pipeline.flush()
|
||||
await pipeline.flush()
|
||||
|
||||
logger.info("Waiting for the pipeline to end")
|
||||
await pipeline.join()
|
||||
|
||||
@@ -66,17 +66,17 @@ class PipelineRunner(BaseModel):
|
||||
coro = self.run()
|
||||
asyncio.run(coro)
|
||||
|
||||
def push(self, data):
|
||||
async def push(self, data):
|
||||
"""
|
||||
Push data to the pipeline
|
||||
"""
|
||||
self._add_cmd("PUSH", data)
|
||||
await self._add_cmd("PUSH", data)
|
||||
|
||||
def flush(self):
|
||||
async def flush(self):
|
||||
"""
|
||||
Flush the pipeline
|
||||
"""
|
||||
self._add_cmd("FLUSH", None)
|
||||
await self._add_cmd("FLUSH", None)
|
||||
|
||||
async def on_status(self, status):
|
||||
"""
|
||||
@@ -90,12 +90,26 @@ class PipelineRunner(BaseModel):
|
||||
"""
|
||||
pass
|
||||
|
||||
def _add_cmd(self, cmd: str, data):
|
||||
async def _add_cmd(
|
||||
self, cmd: str, data, max_retries: int = 3, retry_time_limit: int = 3
|
||||
):
|
||||
"""
|
||||
Enqueue a command to be executed in the runner.
|
||||
Currently supported commands: PUSH, FLUSH
|
||||
"""
|
||||
self._q_cmd.put_nowait([cmd, data])
|
||||
for _ in range(max_retries):
|
||||
try:
|
||||
self._q_cmd.put_nowait([cmd, data])
|
||||
break # Break if put succeeds
|
||||
except asyncio.queues.QueueFull:
|
||||
# Handle only the QueueFull exception, retry after a small delay
|
||||
self._logger.debug(
|
||||
f"Encountered a full queue, while trying to add [{cmd, data}]. "
|
||||
f"Retrying in {retry_time_limit} seconds"
|
||||
)
|
||||
await asyncio.sleep(retry_time_limit)
|
||||
else:
|
||||
print(f"Failed to add [{cmd, data}] after {max_retries} attempts.")
|
||||
|
||||
async def _set_status(self, status):
|
||||
self._logger.debug("Runner status updated", status=status)
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
"""
|
||||
Utility file for all text processing related functionalities
|
||||
"""
|
||||
|
||||
import datetime
|
||||
from typing import List
|
||||
|
||||
import nltk
|
||||
import torch
|
||||
from log_utils import LOGGER
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.tokenize import word_tokenize
|
||||
from run_utils import CONFIG
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
from transformers import BartForConditionalGeneration, BartTokenizer
|
||||
|
||||
from log_utils import LOGGER
|
||||
from run_utils import CONFIG
|
||||
|
||||
nltk.download("punkt", quiet=True)
|
||||
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ class AudioStreamTrack(MediaStreamTrack):
|
||||
ctx = self.ctx
|
||||
frame = await self.track.recv()
|
||||
try:
|
||||
ctx.pipeline_runner.push(frame)
|
||||
await ctx.pipeline_runner.push(frame)
|
||||
except Exception as e:
|
||||
ctx.logger.error("Pipeline error", error=e)
|
||||
return frame
|
||||
@@ -76,7 +76,7 @@ async def rtc_offer_base(
|
||||
# - when we receive the close event, we do nothing.
|
||||
# 2. or the client close the connection
|
||||
# and there is nothing to do because it is already closed
|
||||
ctx.pipeline_runner.flush()
|
||||
await ctx.pipeline_runner.flush()
|
||||
if close:
|
||||
ctx.logger.debug("Closing peer connection")
|
||||
await pc.close()
|
||||
|
||||
@@ -3,6 +3,7 @@ Transcripts audio related endpoints
|
||||
===================================
|
||||
|
||||
"""
|
||||
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -3,6 +3,7 @@ Transcript participants API endpoints
|
||||
=====================================
|
||||
|
||||
"""
|
||||
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import reflector.auth as auth
|
||||
|
||||
@@ -3,6 +3,7 @@ Reassign speakers in a transcript
|
||||
=================================
|
||||
|
||||
"""
|
||||
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import reflector.auth as auth
|
||||
|
||||
@@ -3,6 +3,7 @@ Transcripts websocket API
|
||||
=========================
|
||||
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.ws_manager import get_ws_manager
|
||||
|
||||
Reference in New Issue
Block a user