Compare commits

...

2 Commits

Author SHA1 Message Date
af921ce927 chore(main): release 0.22.4 (#765) 2025-12-02 17:11:48 -05:00
Igor Monadical
bd5df1ce2e fix: Multitrack mixdown optimisation 2 (#764)
* Revert "fix: Skip mixdown for multitrack (#760)"

This reverts commit b51b7aa917.

* multitrack mixdown optimisation

* return the "good" ui part of "skip mixdown"

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2025-12-02 17:10:06 -05:00
3 changed files with 51 additions and 57 deletions

View File

@@ -1,5 +1,12 @@
# Changelog # Changelog
## [0.22.4](https://github.com/Monadical-SAS/reflector/compare/v0.22.3...v0.22.4) (2025-12-02)
### Bug Fixes
* Multitrack mixdown optimisation 2 ([#764](https://github.com/Monadical-SAS/reflector/issues/764)) ([bd5df1c](https://github.com/Monadical-SAS/reflector/commit/bd5df1ce2ebf35d7f3413b295e56937a9a28ef7b))
## [0.22.3](https://github.com/Monadical-SAS/reflector/compare/v0.22.2...v0.22.3) (2025-12-02) ## [0.22.3](https://github.com/Monadical-SAS/reflector/compare/v0.22.2...v0.22.3) (2025-12-02)

View File

@@ -31,7 +31,6 @@ from reflector.processors import AudioFileWriterProcessor
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
from reflector.processors.types import TitleSummary from reflector.processors.types import TitleSummary
from reflector.processors.types import Transcript as TranscriptType from reflector.processors.types import Transcript as TranscriptType
from reflector.settings import settings
from reflector.storage import Storage, get_transcripts_storage from reflector.storage import Storage, get_transcripts_storage
from reflector.utils.daily import ( from reflector.utils.daily import (
filter_cam_audio_tracks, filter_cam_audio_tracks,
@@ -423,7 +422,15 @@ class PipelineMainMultitrack(PipelineMainBase):
# Open all containers with cleanup guaranteed # Open all containers with cleanup guaranteed
for i, url in enumerate(valid_track_urls): for i, url in enumerate(valid_track_urls):
try: try:
c = av.open(url) c = av.open(
url,
options={
# it's trying to stream from s3 by default
"reconnect": "1",
"reconnect_streamed": "1",
"reconnect_delay_max": "5",
},
)
containers.append(c) containers.append(c)
except Exception as e: except Exception as e:
self.logger.warning( self.logger.warning(
@@ -452,6 +459,8 @@ class PipelineMainMultitrack(PipelineMainBase):
frame = next(dec) frame = next(dec)
except StopIteration: except StopIteration:
active[i] = False active[i] = False
# causes stream to move on / unclogs memory
inputs[i].push(None)
continue continue
if frame.sample_rate != target_sample_rate: if frame.sample_rate != target_sample_rate:
@@ -471,8 +480,6 @@ class PipelineMainMultitrack(PipelineMainBase):
mixed.time_base = Fraction(1, target_sample_rate) mixed.time_base = Fraction(1, target_sample_rate)
await writer.push(mixed) await writer.push(mixed)
for in_ctx in inputs:
in_ctx.push(None)
while True: while True:
try: try:
mixed = sink.pull() mixed = sink.pull()
@@ -632,55 +639,43 @@ class PipelineMainMultitrack(PipelineMainBase):
transcript.data_path.mkdir(parents=True, exist_ok=True) transcript.data_path.mkdir(parents=True, exist_ok=True)
if settings.SKIP_MIXDOWN: mp3_writer = AudioFileWriterProcessor(
self.logger.warning( path=str(transcript.audio_mp3_filename),
"SKIP_MIXDOWN enabled: Skipping mixdown and waveform generation. " on_duration=self.on_duration,
"UI will have no audio playback or waveform.", )
num_tracks=len(padded_track_urls), await self.mixdown_tracks(padded_track_urls, mp3_writer, offsets_seconds=None)
transcript_id=transcript.id, await mp3_writer.flush()
)
else:
mp3_writer = AudioFileWriterProcessor(
path=str(transcript.audio_mp3_filename),
on_duration=self.on_duration,
)
await self.mixdown_tracks(
padded_track_urls, mp3_writer, offsets_seconds=None
)
await mp3_writer.flush()
if not transcript.audio_mp3_filename.exists(): if not transcript.audio_mp3_filename.exists():
raise Exception( raise Exception(
"Mixdown failed - no MP3 file generated. Cannot proceed without playable audio." "Mixdown failed - no MP3 file generated. Cannot proceed without playable audio."
)
storage_path = f"{transcript.id}/audio.mp3"
# Use file handle streaming to avoid loading entire MP3 into memory
mp3_size = transcript.audio_mp3_filename.stat().st_size
with open(transcript.audio_mp3_filename, "rb") as mp3_file:
await transcript_storage.put_file(storage_path, mp3_file)
mp3_url = await transcript_storage.get_file_url(storage_path)
await transcripts_controller.update(
transcript, {"audio_location": "storage"}
) )
self.logger.info( storage_path = f"{transcript.id}/audio.mp3"
f"Uploaded mixed audio to storage", # Use file handle streaming to avoid loading entire MP3 into memory
storage_path=storage_path, mp3_size = transcript.audio_mp3_filename.stat().st_size
size=mp3_size, with open(transcript.audio_mp3_filename, "rb") as mp3_file:
url=mp3_url, await transcript_storage.put_file(storage_path, mp3_file)
) mp3_url = await transcript_storage.get_file_url(storage_path)
self.logger.info("Generating waveform from mixed audio") await transcripts_controller.update(transcript, {"audio_location": "storage"})
waveform_processor = AudioWaveformProcessor(
audio_path=transcript.audio_mp3_filename, self.logger.info(
waveform_path=transcript.audio_waveform_filename, f"Uploaded mixed audio to storage",
on_waveform=self.on_waveform, storage_path=storage_path,
) size=mp3_size,
waveform_processor.set_pipeline(self.empty_pipeline) url=mp3_url,
await waveform_processor.flush() )
self.logger.info("Waveform generated successfully")
self.logger.info("Generating waveform from mixed audio")
waveform_processor = AudioWaveformProcessor(
audio_path=transcript.audio_mp3_filename,
waveform_path=transcript.audio_waveform_filename,
on_waveform=self.on_waveform,
)
waveform_processor.set_pipeline(self.empty_pipeline)
await waveform_processor.flush()
self.logger.info("Waveform generated successfully")
speaker_transcripts: list[TranscriptType] = [] speaker_transcripts: list[TranscriptType] = []
for idx, padded_url in enumerate(padded_track_urls): for idx, padded_url in enumerate(padded_track_urls):

View File

@@ -138,14 +138,6 @@ class Settings(BaseSettings):
DAILY_WEBHOOK_UUID: str | None = ( DAILY_WEBHOOK_UUID: str | None = (
None # Webhook UUID for this environment. Not used by production code None # Webhook UUID for this environment. Not used by production code
) )
# Multitrack processing
# SKIP_MIXDOWN: When True, skips audio mixdown and waveform generation.
# Transcription still works using individual tracks. Useful for:
# - Diagnosing OOM issues in mixdown
# - Fast processing when audio playback is not needed
# Note: UI will have no audio playback or waveform when enabled.
SKIP_MIXDOWN: bool = True
# Platform Configuration # Platform Configuration
DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM