From b51b7aa9176c1a53ba57ad99f5e976c804a1e80c Mon Sep 17 00:00:00 2001 From: Igor Monadical Date: Mon, 1 Dec 2025 23:35:12 -0500 Subject: [PATCH] fix: Skip mixdown for multitrack (#760) * multitrack mixdown optimisation * skip mixdown for multitrack * skip mixdown for multitrack --------- Co-authored-by: Igor Loskutov --- .../pipelines/main_multitrack_pipeline.py | 79 +++++++++++-------- server/reflector/settings.py | 8 ++ .../(app)/transcripts/[transcriptId]/page.tsx | 16 ++-- 3 files changed, 60 insertions(+), 43 deletions(-) diff --git a/server/reflector/pipelines/main_multitrack_pipeline.py b/server/reflector/pipelines/main_multitrack_pipeline.py index d202206c..2b23c7b6 100644 --- a/server/reflector/pipelines/main_multitrack_pipeline.py +++ b/server/reflector/pipelines/main_multitrack_pipeline.py @@ -31,6 +31,7 @@ from reflector.processors import AudioFileWriterProcessor from reflector.processors.audio_waveform_processor import AudioWaveformProcessor from reflector.processors.types import TitleSummary from reflector.processors.types import Transcript as TranscriptType +from reflector.settings import settings from reflector.storage import Storage, get_transcripts_storage from reflector.utils.daily import ( filter_cam_audio_tracks, @@ -631,43 +632,55 @@ class PipelineMainMultitrack(PipelineMainBase): transcript.data_path.mkdir(parents=True, exist_ok=True) - mp3_writer = AudioFileWriterProcessor( - path=str(transcript.audio_mp3_filename), - on_duration=self.on_duration, - ) - await self.mixdown_tracks(padded_track_urls, mp3_writer, offsets_seconds=None) - await mp3_writer.flush() + if settings.SKIP_MIXDOWN: + self.logger.warning( + "SKIP_MIXDOWN enabled: Skipping mixdown and waveform generation. " + "UI will have no audio playback or waveform.", + num_tracks=len(padded_track_urls), + transcript_id=transcript.id, + ) + else: + mp3_writer = AudioFileWriterProcessor( + path=str(transcript.audio_mp3_filename), + on_duration=self.on_duration, + ) + await self.mixdown_tracks( + padded_track_urls, mp3_writer, offsets_seconds=None + ) + await mp3_writer.flush() - if not transcript.audio_mp3_filename.exists(): - raise Exception( - "Mixdown failed - no MP3 file generated. Cannot proceed without playable audio." + if not transcript.audio_mp3_filename.exists(): + raise Exception( + "Mixdown failed - no MP3 file generated. Cannot proceed without playable audio." + ) + + storage_path = f"{transcript.id}/audio.mp3" + # Use file handle streaming to avoid loading entire MP3 into memory + mp3_size = transcript.audio_mp3_filename.stat().st_size + with open(transcript.audio_mp3_filename, "rb") as mp3_file: + await transcript_storage.put_file(storage_path, mp3_file) + mp3_url = await transcript_storage.get_file_url(storage_path) + + await transcripts_controller.update( + transcript, {"audio_location": "storage"} ) - storage_path = f"{transcript.id}/audio.mp3" - # Use file handle streaming to avoid loading entire MP3 into memory - mp3_size = transcript.audio_mp3_filename.stat().st_size - with open(transcript.audio_mp3_filename, "rb") as mp3_file: - await transcript_storage.put_file(storage_path, mp3_file) - mp3_url = await transcript_storage.get_file_url(storage_path) + self.logger.info( + f"Uploaded mixed audio to storage", + storage_path=storage_path, + size=mp3_size, + url=mp3_url, + ) - await transcripts_controller.update(transcript, {"audio_location": "storage"}) - - self.logger.info( - f"Uploaded mixed audio to storage", - storage_path=storage_path, - size=mp3_size, - url=mp3_url, - ) - - self.logger.info("Generating waveform from mixed audio") - waveform_processor = AudioWaveformProcessor( - audio_path=transcript.audio_mp3_filename, - waveform_path=transcript.audio_waveform_filename, - on_waveform=self.on_waveform, - ) - waveform_processor.set_pipeline(self.empty_pipeline) - await waveform_processor.flush() - self.logger.info("Waveform generated successfully") + self.logger.info("Generating waveform from mixed audio") + waveform_processor = AudioWaveformProcessor( + audio_path=transcript.audio_mp3_filename, + waveform_path=transcript.audio_waveform_filename, + on_waveform=self.on_waveform, + ) + waveform_processor.set_pipeline(self.empty_pipeline) + await waveform_processor.flush() + self.logger.info("Waveform generated successfully") speaker_transcripts: list[TranscriptType] = [] for idx, padded_url in enumerate(padded_track_urls): diff --git a/server/reflector/settings.py b/server/reflector/settings.py index 1ec46d94..338e1da9 100644 --- a/server/reflector/settings.py +++ b/server/reflector/settings.py @@ -138,6 +138,14 @@ class Settings(BaseSettings): DAILY_WEBHOOK_UUID: str | None = ( None # Webhook UUID for this environment. Not used by production code ) + + # Multitrack processing + # SKIP_MIXDOWN: When True, skips audio mixdown and waveform generation. + # Transcription still works using individual tracks. Useful for: + # - Diagnosing OOM issues in mixdown + # - Fast processing when audio playback is not needed + # Note: UI will have no audio playback or waveform when enabled. + SKIP_MIXDOWN: bool = True # Platform Configuration DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM diff --git a/www/app/(app)/transcripts/[transcriptId]/page.tsx b/www/app/(app)/transcripts/[transcriptId]/page.tsx index 1e020f1c..ead2d259 100644 --- a/www/app/(app)/transcripts/[transcriptId]/page.tsx +++ b/www/app/(app)/transcripts/[transcriptId]/page.tsx @@ -117,15 +117,6 @@ export default function TranscriptDetails(details: TranscriptDetails) { return ; } - if (mp3.error) { - return ( - - ); - } - return ( <> ) : !mp3.loading && (waveform.error || mp3.error) ? ( - Error loading this recording + + Error loading{" "} + {[waveform.error && "waveform", mp3.error && "mp3"] + .filter(Boolean) + .join(" and ")} + ) : (