mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-05 21:36:48 +00:00
feat: mixdown modal services + processor pattern (#936)
* allow memory flags and per service config * feat: mixdown modal services + processor pattern
This commit is contained in:
committed by
GitHub
parent
12bf0c2d77
commit
d164e486cc
@@ -84,7 +84,7 @@ from reflector.hatchet.workflows.topic_chunk_processing import (
|
||||
from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines import topic_processing
|
||||
from reflector.processors import AudioFileWriterProcessor
|
||||
from reflector.processors.audio_mixdown_auto import AudioMixdownAutoProcessor
|
||||
from reflector.processors.summary.models import ActionItemsResponse
|
||||
from reflector.processors.summary.prompts import (
|
||||
RECAP_PROMPT,
|
||||
@@ -99,10 +99,6 @@ from reflector.utils.audio_constants import (
|
||||
PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
WAVEFORM_SEGMENTS,
|
||||
)
|
||||
from reflector.utils.audio_mixdown import (
|
||||
detect_sample_rate_from_tracks,
|
||||
mixdown_tracks_pyav,
|
||||
)
|
||||
from reflector.utils.audio_waveform import get_audio_waveform
|
||||
from reflector.utils.daily import (
|
||||
filter_cam_audio_tracks,
|
||||
@@ -539,7 +535,7 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
||||
)
|
||||
@with_error_handling(TaskName.MIXDOWN_TRACKS)
|
||||
async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
|
||||
"""Mix all padded tracks into single audio file using PyAV (same as Celery)."""
|
||||
"""Mix all padded tracks into single audio file via configured backend."""
|
||||
ctx.log("mixdown_tracks: mixing padded tracks into single audio file")
|
||||
|
||||
track_result = ctx.task_output(process_tracks)
|
||||
@@ -579,37 +575,33 @@ async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
|
||||
if not valid_urls:
|
||||
raise ValueError("No valid padded tracks to mixdown")
|
||||
|
||||
target_sample_rate = detect_sample_rate_from_tracks(valid_urls, logger=logger)
|
||||
if not target_sample_rate:
|
||||
logger.error("Mixdown failed - no decodable audio frames found")
|
||||
raise ValueError("No decodable audio frames in any track")
|
||||
|
||||
output_path = tempfile.mktemp(suffix=".mp3")
|
||||
duration_ms_callback_capture_container = [0.0]
|
||||
|
||||
async def capture_duration(d):
|
||||
duration_ms_callback_capture_container[0] = d
|
||||
|
||||
writer = AudioFileWriterProcessor(path=output_path, on_duration=capture_duration)
|
||||
|
||||
await mixdown_tracks_pyav(
|
||||
valid_urls,
|
||||
writer,
|
||||
target_sample_rate,
|
||||
offsets_seconds=None,
|
||||
logger=logger,
|
||||
progress_callback=make_audio_progress_logger(ctx, TaskName.MIXDOWN_TRACKS),
|
||||
expected_duration_sec=recording_duration if recording_duration > 0 else None,
|
||||
)
|
||||
await writer.flush()
|
||||
|
||||
file_size = Path(output_path).stat().st_size
|
||||
storage_path = f"{input.transcript_id}/audio.mp3"
|
||||
|
||||
with open(output_path, "rb") as mixed_file:
|
||||
await storage.put_file(storage_path, mixed_file)
|
||||
# Generate presigned PUT URL for the output (used by modal backend;
|
||||
# pyav backend ignores it and writes locally instead)
|
||||
output_url = await storage.get_file_url(
|
||||
storage_path,
|
||||
operation="put_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
)
|
||||
|
||||
Path(output_path).unlink(missing_ok=True)
|
||||
processor = AudioMixdownAutoProcessor()
|
||||
result = await processor.mixdown_tracks(
|
||||
valid_urls, output_url, offsets_seconds=None
|
||||
)
|
||||
|
||||
if result.output_path:
|
||||
# Pyav backend wrote locally — upload to storage ourselves
|
||||
output_file = Path(result.output_path)
|
||||
with open(output_file, "rb") as mixed_file:
|
||||
await storage.put_file(storage_path, mixed_file)
|
||||
output_file.unlink(missing_ok=True)
|
||||
# Clean up the temp directory the pyav processor created
|
||||
try:
|
||||
output_file.parent.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
# else: modal backend already uploaded to output_url
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
@@ -620,11 +612,11 @@ async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
|
||||
transcript, {"audio_location": "storage"}
|
||||
)
|
||||
|
||||
ctx.log(f"mixdown_tracks complete: uploaded {file_size} bytes to {storage_path}")
|
||||
ctx.log(f"mixdown_tracks complete: {result.size} bytes to {storage_path}")
|
||||
|
||||
return MixdownResult(
|
||||
audio_key=storage_path,
|
||||
duration=duration_ms_callback_capture_container[0],
|
||||
duration=result.duration_ms,
|
||||
tracks_mixed=len(valid_urls),
|
||||
)
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ from .audio_diarization_auto import AudioDiarizationAutoProcessor # noqa: F401
|
||||
from .audio_downscale import AudioDownscaleProcessor # noqa: F401
|
||||
from .audio_file_writer import AudioFileWriterProcessor # noqa: F401
|
||||
from .audio_merge import AudioMergeProcessor # noqa: F401
|
||||
from .audio_mixdown import AudioMixdownProcessor # noqa: F401
|
||||
from .audio_mixdown_auto import AudioMixdownAutoProcessor # noqa: F401
|
||||
from .audio_padding import AudioPaddingProcessor # noqa: F401
|
||||
from .audio_padding_auto import AudioPaddingAutoProcessor # noqa: F401
|
||||
from .audio_transcript import AudioTranscriptProcessor # noqa: F401
|
||||
|
||||
27
server/reflector/processors/audio_mixdown.py
Normal file
27
server/reflector/processors/audio_mixdown.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""
|
||||
Base class for audio mixdown processors.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class MixdownResponse(BaseModel):
|
||||
size: int
|
||||
duration_ms: float = 0.0
|
||||
cancelled: bool = False
|
||||
output_path: str | None = (
|
||||
None # Local file path (pyav sets this; modal leaves None)
|
||||
)
|
||||
|
||||
|
||||
class AudioMixdownProcessor:
|
||||
"""Base class for audio mixdown processors."""
|
||||
|
||||
async def mixdown_tracks(
|
||||
self,
|
||||
track_urls: list[str],
|
||||
output_url: str,
|
||||
target_sample_rate: int | None = None,
|
||||
offsets_seconds: list[float] | None = None,
|
||||
) -> MixdownResponse:
|
||||
raise NotImplementedError
|
||||
32
server/reflector/processors/audio_mixdown_auto.py
Normal file
32
server/reflector/processors/audio_mixdown_auto.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import importlib
|
||||
|
||||
from reflector.processors.audio_mixdown import AudioMixdownProcessor
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
class AudioMixdownAutoProcessor(AudioMixdownProcessor):
|
||||
_registry = {}
|
||||
|
||||
@classmethod
|
||||
def register(cls, name, kclass):
|
||||
cls._registry[name] = kclass
|
||||
|
||||
def __new__(cls, name: str | None = None, **kwargs):
|
||||
if name is None:
|
||||
name = settings.MIXDOWN_BACKEND
|
||||
if name not in cls._registry:
|
||||
module_name = f"reflector.processors.audio_mixdown_{name}"
|
||||
importlib.import_module(module_name)
|
||||
|
||||
# gather specific configuration for the processor
|
||||
# search `MIXDOWN_XXX_YYY`, push to constructor as `xxx_yyy`
|
||||
config = {}
|
||||
name_upper = name.upper()
|
||||
settings_prefix = "MIXDOWN_"
|
||||
config_prefix = f"{settings_prefix}{name_upper}_"
|
||||
for key, value in settings:
|
||||
if key.startswith(config_prefix):
|
||||
config_name = key[len(settings_prefix) :].lower()
|
||||
config[config_name] = value
|
||||
|
||||
return cls._registry[name](**config | kwargs)
|
||||
110
server/reflector/processors/audio_mixdown_modal.py
Normal file
110
server/reflector/processors/audio_mixdown_modal.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Modal.com backend for audio mixdown.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
from reflector.hatchet.constants import TIMEOUT_HEAVY_HTTP
|
||||
from reflector.logger import logger
|
||||
from reflector.processors.audio_mixdown import AudioMixdownProcessor, MixdownResponse
|
||||
from reflector.processors.audio_mixdown_auto import AudioMixdownAutoProcessor
|
||||
|
||||
|
||||
class AudioMixdownModalProcessor(AudioMixdownProcessor):
|
||||
"""Audio mixdown processor using Modal.com/self-hosted backend via HTTP."""
|
||||
|
||||
def __init__(
|
||||
self, mixdown_url: str | None = None, modal_api_key: str | None = None
|
||||
):
|
||||
self.mixdown_url = mixdown_url or os.getenv("MIXDOWN_URL")
|
||||
if not self.mixdown_url:
|
||||
raise ValueError(
|
||||
"MIXDOWN_URL required to use AudioMixdownModalProcessor. "
|
||||
"Set MIXDOWN_URL environment variable or pass mixdown_url parameter."
|
||||
)
|
||||
|
||||
self.modal_api_key = modal_api_key or os.getenv("MODAL_API_KEY")
|
||||
|
||||
async def mixdown_tracks(
|
||||
self,
|
||||
track_urls: list[str],
|
||||
output_url: str,
|
||||
target_sample_rate: int | None = None,
|
||||
offsets_seconds: list[float] | None = None,
|
||||
) -> MixdownResponse:
|
||||
"""Mix audio tracks via remote Modal/self-hosted backend.
|
||||
|
||||
Args:
|
||||
track_urls: Presigned GET URLs for source audio tracks
|
||||
output_url: Presigned PUT URL for output MP3
|
||||
target_sample_rate: Sample rate for output (Hz), auto-detected if None
|
||||
offsets_seconds: Optional per-track delays in seconds for alignment
|
||||
"""
|
||||
valid_count = len([u for u in track_urls if u])
|
||||
log = logger.bind(track_count=valid_count)
|
||||
log.info("Sending Modal mixdown HTTP request")
|
||||
|
||||
url = f"{self.mixdown_url}/mixdown"
|
||||
|
||||
headers = {}
|
||||
if self.modal_api_key:
|
||||
headers["Authorization"] = f"Bearer {self.modal_api_key}"
|
||||
|
||||
# Scale timeout with track count: base TIMEOUT_HEAVY_HTTP + 60s per track beyond 2
|
||||
extra_timeout = max(0, (valid_count - 2)) * 60
|
||||
timeout = TIMEOUT_HEAVY_HTTP + extra_timeout
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.post(
|
||||
url,
|
||||
headers=headers,
|
||||
json={
|
||||
"track_urls": track_urls,
|
||||
"output_url": output_url,
|
||||
"target_sample_rate": target_sample_rate,
|
||||
"offsets_seconds": offsets_seconds,
|
||||
},
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_body = response.text
|
||||
log.error(
|
||||
"Modal mixdown API error",
|
||||
status_code=response.status_code,
|
||||
error_body=error_body,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Check if work was cancelled
|
||||
if result.get("cancelled"):
|
||||
log.warning("Modal mixdown was cancelled by disconnect detection")
|
||||
raise asyncio.CancelledError(
|
||||
"Mixdown cancelled due to client disconnect"
|
||||
)
|
||||
|
||||
log.info("Modal mixdown complete", size=result["size"])
|
||||
return MixdownResponse(**result)
|
||||
except asyncio.CancelledError:
|
||||
log.warning(
|
||||
"Modal mixdown cancelled (Hatchet timeout, disconnect detected on Modal side)"
|
||||
)
|
||||
raise
|
||||
except httpx.TimeoutException as e:
|
||||
log.error("Modal mixdown timeout", error=str(e), exc_info=True)
|
||||
raise Exception(f"Modal mixdown timeout: {e}") from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
log.error("Modal mixdown HTTP error", error=str(e), exc_info=True)
|
||||
raise Exception(f"Modal mixdown HTTP error: {e}") from e
|
||||
except Exception as e:
|
||||
log.error("Modal mixdown unexpected error", error=str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
AudioMixdownAutoProcessor.register("modal", AudioMixdownModalProcessor)
|
||||
101
server/reflector/processors/audio_mixdown_pyav.py
Normal file
101
server/reflector/processors/audio_mixdown_pyav.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
PyAV audio mixdown processor.
|
||||
|
||||
Mixes N tracks in-process using the existing utility from reflector.utils.audio_mixdown.
|
||||
Writes to a local temp file (does NOT upload to S3 — the pipeline handles upload).
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from reflector.logger import logger
|
||||
from reflector.processors.audio_file_writer import AudioFileWriterProcessor
|
||||
from reflector.processors.audio_mixdown import AudioMixdownProcessor, MixdownResponse
|
||||
from reflector.processors.audio_mixdown_auto import AudioMixdownAutoProcessor
|
||||
from reflector.utils.audio_mixdown import (
|
||||
detect_sample_rate_from_tracks,
|
||||
mixdown_tracks_pyav,
|
||||
)
|
||||
|
||||
|
||||
class AudioMixdownPyavProcessor(AudioMixdownProcessor):
|
||||
"""Audio mixdown processor using PyAV (no HTTP backend).
|
||||
|
||||
Writes the mixed output to a local temp file and returns its path
|
||||
in MixdownResponse.output_path. The caller is responsible for
|
||||
uploading the file and cleaning it up.
|
||||
"""
|
||||
|
||||
async def mixdown_tracks(
|
||||
self,
|
||||
track_urls: list[str],
|
||||
output_url: str,
|
||||
target_sample_rate: int | None = None,
|
||||
offsets_seconds: list[float] | None = None,
|
||||
) -> MixdownResponse:
|
||||
log = logger.bind(track_count=len(track_urls))
|
||||
log.info("Starting local PyAV mixdown")
|
||||
|
||||
valid_urls = [url for url in track_urls if url]
|
||||
if not valid_urls:
|
||||
raise ValueError("No valid track URLs provided")
|
||||
|
||||
# Auto-detect sample rate if not provided
|
||||
if target_sample_rate is None:
|
||||
target_sample_rate = detect_sample_rate_from_tracks(
|
||||
valid_urls, logger=logger
|
||||
)
|
||||
if not target_sample_rate:
|
||||
raise ValueError("No decodable audio frames in any track")
|
||||
|
||||
# Write to temp MP3 file
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
output_path = os.path.join(temp_dir, "mixed.mp3")
|
||||
duration_ms_container = [0.0]
|
||||
|
||||
async def capture_duration(d):
|
||||
duration_ms_container[0] = d
|
||||
|
||||
writer = AudioFileWriterProcessor(
|
||||
path=output_path, on_duration=capture_duration
|
||||
)
|
||||
|
||||
try:
|
||||
await mixdown_tracks_pyav(
|
||||
valid_urls,
|
||||
writer,
|
||||
target_sample_rate,
|
||||
offsets_seconds=offsets_seconds,
|
||||
logger=logger,
|
||||
)
|
||||
await writer.flush()
|
||||
|
||||
file_size = os.path.getsize(output_path)
|
||||
log.info(
|
||||
"Local mixdown complete",
|
||||
size=file_size,
|
||||
duration_ms=duration_ms_container[0],
|
||||
)
|
||||
|
||||
return MixdownResponse(
|
||||
size=file_size,
|
||||
duration_ms=duration_ms_container[0],
|
||||
output_path=output_path,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup on failure
|
||||
if os.path.exists(output_path):
|
||||
try:
|
||||
os.unlink(output_path)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
os.rmdir(temp_dir)
|
||||
except Exception:
|
||||
pass
|
||||
log.error("Local mixdown failed", error=str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
AudioMixdownAutoProcessor.register("pyav", AudioMixdownPyavProcessor)
|
||||
@@ -127,6 +127,14 @@ class Settings(BaseSettings):
|
||||
PADDING_URL: str | None = None
|
||||
PADDING_MODAL_API_KEY: str | None = None
|
||||
|
||||
# Audio Mixdown
|
||||
# backends:
|
||||
# - pyav: in-process PyAV mixdown (no HTTP, runs in same process)
|
||||
# - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
|
||||
MIXDOWN_BACKEND: str = "pyav"
|
||||
MIXDOWN_URL: str | None = None
|
||||
MIXDOWN_MODAL_API_KEY: str | None = None
|
||||
|
||||
# Sentry
|
||||
SENTRY_DSN: str | None = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user