dailico track merge vibe

2025-12-21 04:39:06 +00:00 · 2025-10-21 10:30:19 -04:00
parent f844b9fc1f
commit 7d239fe380
12 changed files with 1993 additions and 124 deletions
--- a/server/test_multitrack_ffmpeg_local.py
+++ b/server/test_multitrack_ffmpeg_local.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+"""
+Test script to trigger multitrack recording processing with ffmpeg padding fix
+This version loads tracks from local filesystem instead of S3
+"""
+
+import asyncio
+import os
+
+from reflector.pipelines.main_multitrack_pipeline import PipelineMainMultitrack
+
+
+async def test_processing():
+    """Manually trigger multitrack processing for the test recording"""
+
+    # Initialize database connection
+    from reflector.db import get_database
+
+    db = get_database()
+    await db.connect()
+
+    try:
+        # Create a new transcript ID
+        import uuid
+
+        transcript_id = str(uuid.uuid4())
+
+        # Create transcript directly with SQL
+        from reflector.db.transcripts import (
+            Transcript,
+            transcripts,
+            transcripts_controller,
+        )
+
+        pipeline = PipelineMainMultitrack(transcript_id=transcript_id)
+
+        # Create transcript model
+        transcript = Transcript(
+            id=transcript_id,
+            name="FFMPEG Test - Daily Multitrack Recording",
+            source_kind="file",
+            source_language="en",
+            target_language="en",
+            status="idle",
+            events=[],
+            title="",
+        )
+        # Insert into database
+        query = transcripts.insert().values(**transcript.model_dump())
+        await db.execute(query)
+        print(f"Created transcript: {transcript_id}")
+
+        # Read track files from local filesystem (in the container they'll be at /app/)
+        tracks_dir = "/app"
+        track_files = [
+            "1760988935484-52f7f48b-fbab-431f-9a50-87b9abfc8255-cam-audio-1760988935922.webm",
+            "1760988935484-a37c35e3-6f8e-4274-a482-e9d0f102a732-cam-audio-1760988943823.webm",
+        ]
+
+        # Read track data
+        track_datas = []
+        for track_file in track_files:
+            file_path = os.path.join(tracks_dir, track_file)
+            if os.path.exists(file_path):
+                with open(file_path, "rb") as f:
+                    track_datas.append(f.read())
+                print(f"Loaded track: {track_file} ({len(track_datas[-1])} bytes)")
+            else:
+                print(f"Track file not found: {file_path}")
+                track_datas.append(b"")
+
+        # Process the tracks using the pipeline
+        print(f"\nProcessing multitrack recording with ffmpeg padding...")
+        print(f"Track 0: ...935922.webm (expected to start at ~2s)")
+        print(f"Track 1: ...943823.webm (expected to start at ~51s)")
+
+        # Call the process method directly with track data
+        # We'll need to mock S3 operations and directly work with the data
+
+        # Save tracks to temporary files and process them
+
+        try:
+            await pipeline.set_status(transcript_id, "processing")
+
+            # Create a mock bucket and keys setup
+            bucket_name = "test-bucket"
+            track_keys = ["track0.webm", "track1.webm"]
+
+            # Mock S3 client to return our local data
+            from unittest.mock import MagicMock, patch
+
+            mock_s3 = MagicMock()
+
+            def mock_get_object(Bucket, Key):
+                idx = 0 if "track0" in Key else 1
+                return {"Body": MagicMock(read=lambda: track_datas[idx])}
+
+            mock_s3.get_object = mock_get_object
+
+            # Patch boto3.client to return our mock
+            with patch("boto3.client", return_value=mock_s3):
+                await pipeline.process(bucket_name, track_keys)
+
+            print(f"Processing complete!")
+        except Exception as e:
+            await pipeline.set_status(transcript_id, "error")
+            print(f"Error during processing: {e}")
+            import traceback
+
+            traceback.print_exc()
+            raise
+
+        # Check the results
+        final_transcript = await transcripts_controller.get(transcript_id)
+        print(f"\nTranscript status: {final_transcript.status}")
+        print(f"Transcript title: {final_transcript.title}")
+
+        # Extract timeline from events
+        if final_transcript.events:
+            for event in final_transcript.events:
+                if event.get("event") == "TRANSCRIPT":
+                    text = event.get("data", {}).get("text", "")
+                    # Show first 500 chars to check if speakers are properly separated
+                    print(f"\nTranscript text (first 500 chars):")
+                    print(text[:500])
+
+                    # Show last 500 chars too to see if second speaker is at the end
+                    print(f"\nTranscript text (last 500 chars):")
+                    print(text[-500:])
+
+                    # Count words per speaker
+                    words = text.split()
+                    print(f"\nTotal words in transcript: {len(words)}")
+
+                    # Check if text has proper speaker separation
+                    # Expected: First ~45% from speaker 0, then ~35% from speaker 1, then ~20% from speaker 0
+                    first_third = " ".join(words[: len(words) // 3])
+                    middle_third = " ".join(
+                        words[len(words) // 3 : 2 * len(words) // 3]
+                    )
+                    last_third = " ".join(words[2 * len(words) // 3 :])
+
+                    print(f"\nFirst third preview: {first_third[:100]}...")
+                    print(f"Middle third preview: {middle_third[:100]}...")
+                    print(f"Last third preview: {last_third[:100]}...")
+                    break
+
+        return transcript_id
+    finally:
+        await db.disconnect()
+
+
+if __name__ == "__main__":
+    transcript_id = asyncio.run(test_processing())
+    print(f"\n✅ Test complete! Transcript ID: {transcript_id}")
+    print(f"\nExpected timeline:")
+    print(f"  Speaker 0: ~2s to ~49s (first participant speaks)")
+    print(f"  Speaker 1: ~51s to ~70s (second participant speaks)")
+    print(f"  Speaker 0: ~73s to end (first participant speaks again)")
+    print(
+        f"\nIf the text shows proper chronological order (not interleaved), the fix worked!"
+    )