feat: Livekit - Selfhost video room solution (#946)

* feat: Livekit bare no recording nor pipeline * feat: full livekit pipeline * fix: caddy hatchet with livekit * fix: caddy livekit * fix: hatchet tls * fix: agg to webm for no padding * fix: reflector user id on participants and duration fix * fix: better docs and internal review fixes * fix: remove video files livekit
2026-04-08 23:06:47 +00:00 · 2026-04-07 11:55:16 -05:00
parent b570d202dc
commit bc8338fa4f
41 changed files with 3731 additions and 146 deletions
--- a/server/tests/test_livekit_backend.py
+++ b/server/tests/test_livekit_backend.py
@@ -0,0 +1,408 @@
+"""
+Tests for LiveKit backend: webhook verification, token generation,
+display_name sanitization, and platform client behavior.
+"""
+
+import re
+
+import pytest
+
+from reflector.livekit_api.webhooks import create_webhook_receiver, verify_webhook
+
+# ── Webhook verification ──────────────────────────────────────
+
+
+class TestWebhookVerification:
+    def _make_receiver(self):
+        """Create a receiver with test credentials."""
+        return create_webhook_receiver(
+            api_key="test_key",
+            api_secret="test_secret_that_is_long_enough_for_hmac",
+        )
+
+    def test_rejects_empty_auth_header(self):
+        receiver = self._make_receiver()
+        result = verify_webhook(receiver, b'{"event":"test"}', "")
+        assert result is None
+
+    def test_rejects_garbage_auth_header(self):
+        receiver = self._make_receiver()
+        result = verify_webhook(receiver, b'{"event":"test"}', "not-a-jwt")
+        assert result is None
+
+    def test_rejects_empty_body(self):
+        receiver = self._make_receiver()
+        result = verify_webhook(receiver, b"", "Bearer some.jwt.token")
+        assert result is None
+
+    def test_handles_bytes_body(self):
+        receiver = self._make_receiver()
+        # Should not crash on bytes input
+        result = verify_webhook(receiver, b'{"event":"test"}', "invalid")
+        assert result is None
+
+    def test_handles_string_body(self):
+        receiver = self._make_receiver()
+        result = verify_webhook(receiver, '{"event":"test"}', "invalid")
+        assert result is None
+
+    def test_rejects_wrong_secret(self):
+        """Webhook signed with different secret should be rejected."""
+        receiver = self._make_receiver()
+        # A JWT signed with a different secret
+        fake_jwt = "eyJhbGciOiJIUzI1NiJ9.eyJ0ZXN0IjoxfQ.wrong_signature"
+        result = verify_webhook(receiver, b"{}", fake_jwt)
+        assert result is None
+
+
+# ── Token generation ──────────────────────────────────────────
+
+
+class TestTokenGeneration:
+    """Test token generation using the LiveKit SDK directly (no client instantiation)."""
+
+    def _generate_token(
+        self, room_name="room", identity="user", name=None, admin=False, ttl=86400
+    ):
+        """Generate a token using the SDK directly, avoiding LiveKitAPI client session."""
+        from datetime import timedelta
+
+        from livekit.api import AccessToken, VideoGrants
+
+        token = AccessToken(
+            api_key="test_key", api_secret="test_secret_that_is_long_enough_for_hmac"
+        )
+        token.identity = identity
+        token.name = name or identity
+        token.ttl = timedelta(seconds=ttl)
+        token.with_grants(
+            VideoGrants(
+                room_join=True,
+                room=room_name,
+                can_publish=True,
+                can_subscribe=True,
+                room_admin=admin,
+            )
+        )
+        return token.to_jwt()
+
+    def _decode_claims(self, token):
+        import base64
+        import json
+
+        payload = token.split(".")[1]
+        payload += "=" * (4 - len(payload) % 4)
+        return json.loads(base64.b64decode(payload))
+
+    def test_creates_valid_jwt(self):
+        token = self._generate_token(
+            room_name="test-room", identity="user123", name="Test User"
+        )
+        assert isinstance(token, str)
+        assert len(token.split(".")) == 3
+
+    def test_token_includes_room_name(self):
+        token = self._generate_token(room_name="my-room-20260401", identity="alice")
+        claims = self._decode_claims(token)
+        assert claims.get("video", {}).get("room") == "my-room-20260401"
+        assert claims.get("sub") == "alice"
+
+    def test_token_respects_admin_flag(self):
+        token = self._generate_token(identity="admin", admin=True)
+        claims = self._decode_claims(token)
+        assert claims["video"]["roomAdmin"] is True
+
+    def test_token_non_admin_by_default(self):
+        token = self._generate_token(identity="user")
+        claims = self._decode_claims(token)
+        assert claims.get("video", {}).get("roomAdmin") in (None, False)
+
+    def test_ttl_is_timedelta(self):
+        """Verify ttl as timedelta works (previous bug: int caused TypeError)."""
+        token = self._generate_token(ttl=3600)
+        assert isinstance(token, str)
+
+
+# ── Display name sanitization ─────────────────────────────────
+
+
+class TestDisplayNameSanitization:
+    """Test the sanitization logic from rooms.py join endpoint."""
+
+    def _sanitize(self, display_name: str) -> str:
+        """Replicate the sanitization from rooms_join_meeting."""
+        safe_name = re.sub(r"[^a-zA-Z0-9_-]", "_", display_name.strip())[:40]
+        return safe_name
+
+    def test_normal_name(self):
+        assert self._sanitize("Alice") == "Alice"
+
+    def test_name_with_spaces(self):
+        assert self._sanitize("John Doe") == "John_Doe"
+
+    def test_name_with_special_chars(self):
+        assert self._sanitize("user@email.com") == "user_email_com"
+
+    def test_name_with_unicode(self):
+        result = self._sanitize("José García")
+        assert result == "Jos__Garc_a"
+        assert all(
+            c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-"
+            for c in result
+        )
+
+    def test_name_with_emoji(self):
+        result = self._sanitize("👋 Hello")
+        assert "_" in result  # Emoji replaced with underscore
+        assert "Hello" in result
+
+    def test_very_long_name(self):
+        long_name = "A" * 100
+        result = self._sanitize(long_name)
+        assert len(result) == 40
+
+    def test_empty_name(self):
+        result = self._sanitize("")
+        assert result == ""
+
+    def test_only_special_chars(self):
+        result = self._sanitize("!!!")
+        assert result == "___"
+
+    def test_whitespace_stripped(self):
+        result = self._sanitize("  Alice  ")
+        assert result == "Alice"
+
+    def test_hyphens_preserved(self):
+        assert self._sanitize("first-last") == "first-last"
+
+    def test_underscores_preserved(self):
+        assert self._sanitize("first_last") == "first_last"
+
+    def test_html_injection(self):
+        result = self._sanitize("<script>alert('xss')</script>")
+        assert "<" not in result
+        assert ">" not in result
+        assert "'" not in result
+
+
+# ── S3 egress configuration ───────────────────────────────────
+
+
+class TestS3EgressConfig:
+    """Test S3Upload construction using the SDK directly."""
+
+    def test_build_s3_upload_requires_all_fields(self):
+        # Missing fields should raise or produce invalid config
+        # The validation happens in our client wrapper, not the SDK
+        # Test the validation logic directly
+        s3_bucket = None
+        s3_access_key = "AKID"
+        s3_secret_key = "secret"
+        assert not all([s3_bucket, s3_access_key, s3_secret_key])
+
+    def test_s3_upload_with_credentials(self):
+        from livekit.api import S3Upload
+
+        upload = S3Upload(
+            access_key="AKID",
+            secret="secret123",
+            bucket="test-bucket",
+            region="us-east-1",
+            force_path_style=True,
+        )
+        assert upload.bucket == "test-bucket"
+        assert upload.force_path_style is True
+
+    def test_s3_upload_with_endpoint(self):
+        from livekit.api import S3Upload
+
+        upload = S3Upload(
+            access_key="AKID",
+            secret="secret",
+            bucket="bucket",
+            region="us-east-1",
+            force_path_style=True,
+            endpoint="http://garage:3900",
+        )
+        assert upload.endpoint == "http://garage:3900"
+
+
+# ── Platform detection ────────────────────────────────────────
+
+
+# ── Redis participant mapping ──────────────────────────────
+
+
+class TestParticipantIdentityMapping:
+    """Test the identity → user_id Redis mapping pattern."""
+
+    def test_mapping_key_format(self):
+        room_name = "myroom-20260401172036"
+        mapping_key = f"livekit:participant_map:{room_name}"
+        assert mapping_key == "livekit:participant_map:myroom-20260401172036"
+
+    def test_identity_with_uuid_suffix_is_unique(self):
+        import uuid
+
+        name = "Juan"
+        id1 = f"{name}-{uuid.uuid4().hex[:6]}"
+        id2 = f"{name}-{uuid.uuid4().hex[:6]}"
+        assert id1 != id2
+        assert id1.startswith("Juan-")
+        assert id2.startswith("Juan-")
+
+    def test_strip_uuid_suffix_for_display(self):
+        """Pipeline strips UUID suffix for display name."""
+        identity = "Juan-2bcea0"
+        display_name = identity.rsplit("-", 1)[0] if "-" in identity else identity
+        assert display_name == "Juan"
+
+    def test_strip_uuid_preserves_hyphenated_names(self):
+        identity = "Mary-Jane-abc123"
+        display_name = identity.rsplit("-", 1)[0] if "-" in identity else identity
+        assert display_name == "Mary-Jane"
+
+    def test_anon_identity_no_user_id(self):
+        """Anonymous participants should not have a user_id mapping."""
+        identity = "anon-abc123"
+        # In the pipeline, anon identities don't get looked up
+        assert identity.startswith("anon-")
+
+    @pytest.mark.asyncio
+    async def test_redis_hset_hgetall_roundtrip(self):
+        """Test the actual Redis operations used for participant mapping."""
+        try:
+            from reflector.redis_cache import get_async_redis_client
+
+            redis_client = await get_async_redis_client()
+            test_key = "livekit:participant_map:__test_room__"
+
+            # Write
+            await redis_client.hset(test_key, "Juan-abc123", "user-id-1")
+            await redis_client.hset(test_key, "Alice-def456", "user-id-2")
+
+            # Read
+            raw_map = await redis_client.hgetall(test_key)
+            decoded = {
+                k.decode() if isinstance(k, bytes) else k: v.decode()
+                if isinstance(v, bytes)
+                else v
+                for k, v in raw_map.items()
+            }
+
+            assert decoded["Juan-abc123"] == "user-id-1"
+            assert decoded["Alice-def456"] == "user-id-2"
+
+            # Cleanup
+            await redis_client.delete(test_key)
+        except Exception:
+            pytest.skip("Redis not available")
+
+
+# ── Egress video cleanup safety ────────────────────────────────
+
+
+class TestEgressVideoCleanup:
+    """Ensure video cleanup logic NEVER deletes audio files."""
+
+    AUDIO_FILES = [
+        "livekit/room-20260401/juan-abc123-2026-04-01T100000-TR_AMR3SWs74Divho.ogg",
+        "livekit/room-20260401/alice-def456-2026-04-01T100030-TR_AMirKjdAvLteAZ.ogg",
+        "livekit/room-20260401/bob-789abc-2026-04-01T100100-TR_AMyoSbM7tAQbYj.ogg",
+    ]
+
+    VIDEO_FILES = [
+        "livekit/room-20260401/juan-abc123-2026-04-01T100000-TR_VC679dgMQBdfhT.webm",
+        "livekit/room-20260401/alice-def456-2026-04-01T100030-TR_VCLsuRuxLp4eik.webm",
+    ]
+
+    MANIFEST_FILES = [
+        "livekit/room-20260401/EG_K5sipvfB5fTM.json",
+        "livekit/room-20260401/EG_nzwBsH9xzgoj.json",
+    ]
+
+    def _should_delete(self, filename: str) -> bool:
+        """Replicate the deletion logic from _handle_egress_ended."""
+        return filename.endswith(".webm")
+
+    def test_audio_files_never_deleted(self):
+        """CRITICAL: Audio files must NEVER be marked for deletion."""
+        for f in self.AUDIO_FILES:
+            assert not self._should_delete(f), f"Audio file would be deleted: {f}"
+
+    def test_video_files_are_deleted(self):
+        for f in self.VIDEO_FILES:
+            assert self._should_delete(f), f"Video file NOT marked for deletion: {f}"
+
+    def test_manifests_are_kept(self):
+        for f in self.MANIFEST_FILES:
+            assert not self._should_delete(f), f"Manifest would be deleted: {f}"
+
+    def test_ogg_extension_never_matches_delete(self):
+        """Double-check: no .ogg file ever matches the deletion condition."""
+        test_names = [
+            "anything.ogg",
+            "livekit/room/track.ogg",
+            "video.ogg",  # Even if someone names it "video.ogg"
+            ".ogg",
+            "TR_VC_fake_video.ogg",  # Video-like track ID but .ogg extension
+        ]
+        for f in test_names:
+            assert not self._should_delete(f), f".ogg file would be deleted: {f}"
+
+    def test_webm_always_matches_delete(self):
+        test_names = [
+            "anything.webm",
+            "livekit/room/track.webm",
+            "audio.webm",  # Even if someone names it "audio.webm"
+            ".webm",
+        ]
+        for f in test_names:
+            assert self._should_delete(f), f".webm file NOT marked for deletion: {f}"
+
+    def test_unknown_extensions_are_kept(self):
+        """Unknown file types should NOT be deleted (safe by default)."""
+        test_names = [
+            "file.mp4",
+            "file.wav",
+            "file.mp3",
+            "file.txt",
+            "file",
+            "",
+        ]
+        for f in test_names:
+            assert not self._should_delete(
+                f
+            ), f"Unknown file type would be deleted: {f}"
+
+
+# ── Platform detection ────────────────────────────────────────
+
+
+class TestSourcePlatformDetection:
+    """Test the recording ID prefix-based platform detection from transcript_process.py."""
+
+    def test_livekit_prefix(self):
+        recording_id = "lk-livekit-20260401234423"
+        platform = "livekit" if recording_id.startswith("lk-") else "daily"
+        assert platform == "livekit"
+
+    def test_daily_no_prefix(self):
+        recording_id = "08fa0b24-9220-44c5-846c-3f116cf8e738"
+        platform = "livekit" if recording_id.startswith("lk-") else "daily"
+        assert platform == "daily"
+
+    def test_none_recording_id(self):
+        recording_id = None
+        platform = (
+            "livekit" if recording_id and recording_id.startswith("lk-") else "daily"
+        )
+        assert platform == "daily"
+
+    def test_empty_recording_id(self):
+        recording_id = ""
+        platform = (
+            "livekit" if recording_id and recording_id.startswith("lk-") else "daily"
+        )
+        assert platform == "daily"
--- a/server/tests/test_livekit_track_processing.py
+++ b/server/tests/test_livekit_track_processing.py
@@ -0,0 +1,393 @@
+"""
+Tests for LiveKit track processing: filepath parsing, offset calculation,
+and pad_track padding_seconds behavior.
+"""
+
+from datetime import datetime, timezone
+from fractions import Fraction
+
+import av
+import pytest
+
+from reflector.utils.livekit import (
+    LiveKitTrackFile,
+    calculate_track_offsets,
+    extract_livekit_base_room_name,
+    filter_audio_tracks,
+    parse_livekit_track_filepath,
+)
+
+# ── Filepath parsing ──────────────────────────────────────────
+
+
+class TestParseLiveKitTrackFilepath:
+    def test_parses_ogg_audio_track(self):
+        result = parse_livekit_track_filepath(
+            "livekit/myroom-20260401172036/juan-4b82ed-2026-04-01T195758-TR_AMR3SWs74Divho.ogg"
+        )
+        assert result.room_name == "myroom-20260401172036"
+        assert result.participant_identity == "juan-4b82ed"
+        assert result.track_id == "TR_AMR3SWs74Divho"
+        assert result.timestamp == datetime(2026, 4, 1, 19, 57, 58, tzinfo=timezone.utc)
+
+    def test_parses_different_identities(self):
+        r1 = parse_livekit_track_filepath(
+            "livekit/room-20260401/alice-a1b2c3-2026-04-01T100000-TR_abc123.ogg"
+        )
+        r2 = parse_livekit_track_filepath(
+            "livekit/room-20260401/bob_smith-d4e5f6-2026-04-01T100030-TR_def456.ogg"
+        )
+        assert r1.participant_identity == "alice-a1b2c3"
+        assert r2.participant_identity == "bob_smith-d4e5f6"
+
+    def test_rejects_json_manifest(self):
+        with pytest.raises(ValueError, match="doesn't match expected format"):
+            parse_livekit_track_filepath("livekit/myroom-20260401/EG_K5sipvfB5fTM.json")
+
+    def test_rejects_webm_video(self):
+        # webm files match the pattern but are filtered by filter_audio_tracks
+        result = parse_livekit_track_filepath(
+            "livekit/myroom-20260401/juan-4b82ed-2026-04-01T195727-TR_VC679dgMQBdfhT.webm"
+        )
+        # webm parses successfully (TR_ prefix matches video tracks too)
+        assert result.track_id == "TR_VC679dgMQBdfhT"
+
+    def test_rejects_invalid_path(self):
+        with pytest.raises(ValueError):
+            parse_livekit_track_filepath("not/a/valid/path.ogg")
+
+    def test_rejects_missing_track_id(self):
+        with pytest.raises(ValueError):
+            parse_livekit_track_filepath("livekit/room/user-2026-04-01T100000.ogg")
+
+    def test_parses_timestamp_correctly(self):
+        result = parse_livekit_track_filepath(
+            "livekit/room-20260401/user-abc123-2026-12-25T235959-TR_test.ogg"
+        )
+        assert result.timestamp == datetime(
+            2026, 12, 25, 23, 59, 59, tzinfo=timezone.utc
+        )
+
+
+# ── Audio track filtering ─────────────────────────────────────
+
+
+class TestFilterAudioTracks:
+    def test_filters_to_ogg_only(self):
+        keys = [
+            "livekit/room/EG_abc.json",
+            "livekit/room/user-abc-2026-04-01T100000-TR_audio.ogg",
+            "livekit/room/user-abc-2026-04-01T100000-TR_video.webm",
+            "livekit/room/EG_def.json",
+            "livekit/room/user2-def-2026-04-01T100030-TR_audio2.ogg",
+        ]
+        result = filter_audio_tracks(keys)
+        assert len(result) == 2
+        assert all(k.endswith(".ogg") for k in result)
+
+    def test_empty_input(self):
+        assert filter_audio_tracks([]) == []
+
+    def test_no_audio_tracks(self):
+        keys = ["livekit/room/EG_abc.json", "livekit/room/user-TR_v.webm"]
+        assert filter_audio_tracks(keys) == []
+
+
+# ── Offset calculation ─────────────────────────────────────────
+
+
+class TestCalculateTrackOffsets:
+    def test_single_track_zero_offset(self):
+        tracks = [
+            LiveKitTrackFile(
+                s3_key="k1",
+                room_name="r",
+                participant_identity="alice",
+                timestamp=datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc),
+                track_id="TR_1",
+            )
+        ]
+        offsets = calculate_track_offsets(tracks)
+        assert len(offsets) == 1
+        assert offsets[0][1] == 0.0
+
+    def test_two_tracks_correct_offset(self):
+        tracks = [
+            LiveKitTrackFile(
+                s3_key="k1",
+                room_name="r",
+                participant_identity="alice",
+                timestamp=datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc),
+                track_id="TR_1",
+            ),
+            LiveKitTrackFile(
+                s3_key="k2",
+                room_name="r",
+                participant_identity="bob",
+                timestamp=datetime(2026, 4, 1, 10, 1, 10, tzinfo=timezone.utc),
+                track_id="TR_2",
+            ),
+        ]
+        offsets = calculate_track_offsets(tracks)
+        assert offsets[0][1] == 0.0  # alice (earliest)
+        assert offsets[1][1] == 70.0  # bob (70 seconds later)
+
+    def test_three_tracks_earliest_is_zero(self):
+        tracks = [
+            LiveKitTrackFile(
+                s3_key="k2",
+                room_name="r",
+                participant_identity="bob",
+                timestamp=datetime(2026, 4, 1, 10, 0, 30, tzinfo=timezone.utc),
+                track_id="TR_2",
+            ),
+            LiveKitTrackFile(
+                s3_key="k1",
+                room_name="r",
+                participant_identity="alice",
+                timestamp=datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc),
+                track_id="TR_1",
+            ),
+            LiveKitTrackFile(
+                s3_key="k3",
+                room_name="r",
+                participant_identity="charlie",
+                timestamp=datetime(2026, 4, 1, 10, 1, 0, tzinfo=timezone.utc),
+                track_id="TR_3",
+            ),
+        ]
+        offsets = calculate_track_offsets(tracks)
+        offset_map = {t.participant_identity: o for t, o in offsets}
+        assert offset_map["alice"] == 0.0
+        assert offset_map["bob"] == 30.0
+        assert offset_map["charlie"] == 60.0
+
+    def test_empty_tracks(self):
+        assert calculate_track_offsets([]) == []
+
+    def test_simultaneous_tracks_zero_offsets(self):
+        ts = datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc)
+        tracks = [
+            LiveKitTrackFile(
+                s3_key="k1",
+                room_name="r",
+                participant_identity="a",
+                timestamp=ts,
+                track_id="TR_1",
+            ),
+            LiveKitTrackFile(
+                s3_key="k2",
+                room_name="r",
+                participant_identity="b",
+                timestamp=ts,
+                track_id="TR_2",
+            ),
+        ]
+        offsets = calculate_track_offsets(tracks)
+        assert all(o == 0.0 for _, o in offsets)
+
+
+# ── Room name extraction ───────────────────────────────────────
+
+
+class TestExtractLiveKitBaseRoomName:
+    def test_strips_timestamp_suffix(self):
+        assert extract_livekit_base_room_name("myroom-20260401172036") == "myroom"
+
+    def test_preserves_hyphenated_name(self):
+        assert (
+            extract_livekit_base_room_name("my-room-name-20260401172036")
+            == "my-room-name"
+        )
+
+    def test_single_segment(self):
+        assert extract_livekit_base_room_name("room-20260401") == "room"
+
+
+# ── pad_track padding_seconds behavior ─────────────────────────
+
+
+class TestPadTrackPaddingSeconds:
+    """Test that pad_track correctly uses pre-calculated padding_seconds
+    for LiveKit (skipping container metadata) vs extracting from container
+    for Daily (when padding_seconds is None).
+    """
+
+    def _make_test_ogg(self, path: str, duration_seconds: float = 5.0):
+        """Create a minimal OGG/Opus file for testing."""
+        with av.open(path, "w", format="ogg") as out:
+            stream = out.add_stream("libopus", rate=48000)
+            stream.bit_rate = 64000
+            samples_per_frame = 960  # Opus standard
+            total_samples = int(duration_seconds * 48000)
+            pts = 0
+            while pts < total_samples:
+                frame = av.AudioFrame(
+                    format="s16", layout="stereo", samples=samples_per_frame
+                )
+                # Fill with silence (zeros)
+                frame.planes[0].update(bytes(samples_per_frame * 2 * 2))  # s16 * stereo
+                frame.sample_rate = 48000
+                frame.pts = pts
+                frame.time_base = Fraction(1, 48000)
+                for packet in stream.encode(frame):
+                    out.mux(packet)
+                pts += samples_per_frame
+            for packet in stream.encode(None):
+                out.mux(packet)
+
+    def test_ogg_has_zero_start_time(self, tmp_path):
+        """Verify that OGG files (like LiveKit produces) have start_time=0,
+        confirming why pre-calculated padding is needed."""
+        ogg_path = str(tmp_path / "test.ogg")
+        self._make_test_ogg(ogg_path)
+
+        with av.open(ogg_path) as container:
+            from reflector.utils.audio_padding import (
+                extract_stream_start_time_from_container,
+            )
+
+            start_time = extract_stream_start_time_from_container(container, 0)
+
+        assert start_time <= 0.0, (
+            "OGG files should have start_time<=0 (no usable offset), confirming "
+            f"LiveKit tracks need pre-calculated padding_seconds. Got: {start_time}"
+        )
+
+    def test_precalculated_padding_skips_metadata_extraction(self, tmp_path):
+        """When padding_seconds is set, pad_track should use it directly
+        and NOT call extract_stream_start_time_from_container."""
+        from reflector.hatchet.workflows.track_processing import TrackInput
+
+        input_data = TrackInput(
+            track_index=0,
+            s3_key="livekit/room/user-abc-2026-04-01T100000-TR_audio.ogg",
+            bucket_name="test-bucket",
+            transcript_id="test-transcript",
+            source_platform="livekit",
+            padding_seconds=70.0,
+        )
+
+        assert input_data.padding_seconds == 70.0
+        # The pad_track function checks: if input.padding_seconds is not None → use it
+        # This means extract_stream_start_time_from_container is never called for LiveKit
+
+    def test_none_padding_falls_back_to_metadata(self, tmp_path):
+        """When padding_seconds is None (Daily), pad_track should extract
+        start_time from container metadata."""
+        from reflector.hatchet.workflows.track_processing import TrackInput
+
+        input_data = TrackInput(
+            track_index=0,
+            s3_key="daily/room/track.webm",
+            bucket_name="test-bucket",
+            transcript_id="test-transcript",
+            source_platform="daily",
+            padding_seconds=None,
+        )
+
+        assert input_data.padding_seconds is None
+        # pad_track will call extract_stream_start_time_from_container for this case
+
+    def test_zero_padding_returns_original_key(self):
+        """When padding_seconds=0.0, pad_track should return the original S3 key
+        without applying any padding (same as start_time=0 from metadata)."""
+        from reflector.hatchet.workflows.track_processing import TrackInput
+
+        input_data = TrackInput(
+            track_index=0,
+            s3_key="livekit/room/earliest-track.ogg",
+            bucket_name="test-bucket",
+            transcript_id="test-transcript",
+            source_platform="livekit",
+            padding_seconds=0.0,
+        )
+
+        # padding_seconds=0.0 → start_time_seconds=0.0 → "no padding needed" branch
+        assert input_data.padding_seconds == 0.0
+
+
+# ── Pipeline offset calculation (process_tracks logic) ─────────
+
+
+class TestProcessTracksOffsetCalculation:
+    """Test the offset calculation logic used in process_tracks
+    for LiveKit source_platform."""
+
+    def test_livekit_offsets_from_timestamps(self):
+        """Simulate the offset calculation done in process_tracks."""
+        tracks = [
+            {
+                "s3_key": "track1.ogg",
+                "participant_identity": "admin-0129c3",
+                "timestamp": "2026-04-01T23:44:50+00:00",
+            },
+            {
+                "s3_key": "track2.ogg",
+                "participant_identity": "juan-5a5b41",
+                "timestamp": "2026-04-01T23:46:00+00:00",
+            },
+        ]
+
+        # Replicate the logic from process_tracks
+        timestamps = []
+        for i, track in enumerate(tracks):
+            ts_str = track.get("timestamp")
+            if ts_str:
+                ts = datetime.fromisoformat(ts_str)
+                timestamps.append((i, ts))
+
+        earliest = min(ts for _, ts in timestamps)
+        track_padding = {}
+        for i, ts in timestamps:
+            track_padding[i] = (ts - earliest).total_seconds()
+
+        assert track_padding[0] == 0.0  # admin (earliest)
+        assert track_padding[1] == 70.0  # juan (70s later)
+
+    def test_daily_tracks_get_no_precalculated_padding(self):
+        """Daily tracks should NOT get padding_seconds (use container metadata)."""
+        tracks = [
+            {"s3_key": "daily-track1.webm"},
+            {"s3_key": "daily-track2.webm"},
+        ]
+
+        # Daily tracks don't have "timestamp" field
+        track_padding = {}
+        source_platform = "daily"
+
+        if source_platform == "livekit":
+            # This block should NOT execute for daily
+            pass
+
+        # Daily tracks get no pre-calculated padding
+        assert track_padding == {}
+        for i, _ in enumerate(tracks):
+            assert track_padding.get(i) is None
+
+    def test_livekit_missing_timestamp_graceful(self):
+        """If a LiveKit track is missing timestamp, it should be skipped."""
+        tracks = [
+            {
+                "s3_key": "track1.ogg",
+                "participant_identity": "alice",
+                "timestamp": "2026-04-01T10:00:00+00:00",
+            },
+            {"s3_key": "track2.ogg", "participant_identity": "bob"},  # no timestamp
+        ]
+
+        timestamps = []
+        for i, track in enumerate(tracks):
+            ts_str = track.get("timestamp")
+            if ts_str:
+                try:
+                    ts = datetime.fromisoformat(ts_str)
+                    timestamps.append((i, ts))
+                except (ValueError, TypeError):
+                    timestamps.append((i, None))
+            else:
+                timestamps.append((i, None))
+
+        valid = [(i, ts) for i, ts in timestamps if ts is not None]
+        assert len(valid) == 1  # only alice has a timestamp
+        assert valid[0][0] == 0  # track index 0