diff --git a/server/reflector/views/livekit.py b/server/reflector/views/livekit.py index a89e8f17..2f031229 100644 --- a/server/reflector/views/livekit.py +++ b/server/reflector/views/livekit.py @@ -151,7 +151,12 @@ async def _handle_egress_started(event): async def _handle_egress_ended(event): - """Log Track Egress completion. Files are on S3 already; pipeline uses S3 listing.""" + """Handle Track Egress completion. Delete video files immediately to save storage. + + AutoTrackEgress records ALL tracks (audio + video). Audio is kept for the + transcription pipeline. Video files are unused and deleted on completion. + This saves ~50x storage (video is 98% of egress output for HD cameras). + """ egress = event.egress_info if not egress: logger.warning("egress_ended: no egress info in payload") @@ -177,6 +182,30 @@ async def _handle_egress_ended(event): filenames=[f.filename for f in file_results] if file_results else [], ) + # Delete video files (.webm) immediately — only audio (.ogg) is needed for transcription. + # Video tracks are 50-90x larger than audio and unused by the pipeline. + # JSON manifests are kept (lightweight metadata, ~430 bytes each). + for file_result in file_results: + filename = file_result.filename + if filename and filename.endswith(".webm"): + try: + from reflector.storage import get_source_storage # noqa: PLC0415 + + storage = get_source_storage("livekit") + await storage.delete_file(filename) + logger.info( + "Deleted video egress file", + filename=filename, + room_name=egress.room_name, + ) + except Exception as e: + # Non-critical — pipeline filters these out anyway + logger.warning( + "Failed to delete video egress file", + filename=filename, + error=str(e), + ) + async def _handle_room_finished(event): """Fast-path: trigger multitrack processing when room closes. diff --git a/server/tests/test_livekit_backend.py b/server/tests/test_livekit_backend.py index be3ecaed..29efd19f 100644 --- a/server/tests/test_livekit_backend.py +++ b/server/tests/test_livekit_backend.py @@ -300,6 +300,83 @@ class TestParticipantIdentityMapping: pytest.skip("Redis not available") +# ── Egress video cleanup safety ──────────────────────────────── + + +class TestEgressVideoCleanup: + """Ensure video cleanup logic NEVER deletes audio files.""" + + AUDIO_FILES = [ + "livekit/room-20260401/juan-abc123-2026-04-01T100000-TR_AMR3SWs74Divho.ogg", + "livekit/room-20260401/alice-def456-2026-04-01T100030-TR_AMirKjdAvLteAZ.ogg", + "livekit/room-20260401/bob-789abc-2026-04-01T100100-TR_AMyoSbM7tAQbYj.ogg", + ] + + VIDEO_FILES = [ + "livekit/room-20260401/juan-abc123-2026-04-01T100000-TR_VC679dgMQBdfhT.webm", + "livekit/room-20260401/alice-def456-2026-04-01T100030-TR_VCLsuRuxLp4eik.webm", + ] + + MANIFEST_FILES = [ + "livekit/room-20260401/EG_K5sipvfB5fTM.json", + "livekit/room-20260401/EG_nzwBsH9xzgoj.json", + ] + + def _should_delete(self, filename: str) -> bool: + """Replicate the deletion logic from _handle_egress_ended.""" + return filename.endswith(".webm") + + def test_audio_files_never_deleted(self): + """CRITICAL: Audio files must NEVER be marked for deletion.""" + for f in self.AUDIO_FILES: + assert not self._should_delete(f), f"Audio file would be deleted: {f}" + + def test_video_files_are_deleted(self): + for f in self.VIDEO_FILES: + assert self._should_delete(f), f"Video file NOT marked for deletion: {f}" + + def test_manifests_are_kept(self): + for f in self.MANIFEST_FILES: + assert not self._should_delete(f), f"Manifest would be deleted: {f}" + + def test_ogg_extension_never_matches_delete(self): + """Double-check: no .ogg file ever matches the deletion condition.""" + test_names = [ + "anything.ogg", + "livekit/room/track.ogg", + "video.ogg", # Even if someone names it "video.ogg" + ".ogg", + "TR_VC_fake_video.ogg", # Video-like track ID but .ogg extension + ] + for f in test_names: + assert not self._should_delete(f), f".ogg file would be deleted: {f}" + + def test_webm_always_matches_delete(self): + test_names = [ + "anything.webm", + "livekit/room/track.webm", + "audio.webm", # Even if someone names it "audio.webm" + ".webm", + ] + for f in test_names: + assert self._should_delete(f), f".webm file NOT marked for deletion: {f}" + + def test_unknown_extensions_are_kept(self): + """Unknown file types should NOT be deleted (safe by default).""" + test_names = [ + "file.mp4", + "file.wav", + "file.mp3", + "file.txt", + "file", + "", + ] + for f in test_names: + assert not self._should_delete( + f + ), f"Unknown file type would be deleted: {f}" + + # ── Platform detection ────────────────────────────────────────