mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-08 23:06:47 +00:00
feat: Livekit - Selfhost video room solution (#946)
* feat: Livekit bare no recording nor pipeline * feat: full livekit pipeline * fix: caddy hatchet with livekit * fix: caddy livekit * fix: hatchet tls * fix: agg to webm for no padding * fix: reflector user id on participants and duration fix * fix: better docs and internal review fixes * fix: remove video files livekit
This commit is contained in:
committed by
GitHub
parent
b570d202dc
commit
bc8338fa4f
408
server/tests/test_livekit_backend.py
Normal file
408
server/tests/test_livekit_backend.py
Normal file
@@ -0,0 +1,408 @@
|
||||
"""
|
||||
Tests for LiveKit backend: webhook verification, token generation,
|
||||
display_name sanitization, and platform client behavior.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.livekit_api.webhooks import create_webhook_receiver, verify_webhook
|
||||
|
||||
# ── Webhook verification ──────────────────────────────────────
|
||||
|
||||
|
||||
class TestWebhookVerification:
|
||||
def _make_receiver(self):
|
||||
"""Create a receiver with test credentials."""
|
||||
return create_webhook_receiver(
|
||||
api_key="test_key",
|
||||
api_secret="test_secret_that_is_long_enough_for_hmac",
|
||||
)
|
||||
|
||||
def test_rejects_empty_auth_header(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, b'{"event":"test"}', "")
|
||||
assert result is None
|
||||
|
||||
def test_rejects_garbage_auth_header(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, b'{"event":"test"}', "not-a-jwt")
|
||||
assert result is None
|
||||
|
||||
def test_rejects_empty_body(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, b"", "Bearer some.jwt.token")
|
||||
assert result is None
|
||||
|
||||
def test_handles_bytes_body(self):
|
||||
receiver = self._make_receiver()
|
||||
# Should not crash on bytes input
|
||||
result = verify_webhook(receiver, b'{"event":"test"}', "invalid")
|
||||
assert result is None
|
||||
|
||||
def test_handles_string_body(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, '{"event":"test"}', "invalid")
|
||||
assert result is None
|
||||
|
||||
def test_rejects_wrong_secret(self):
|
||||
"""Webhook signed with different secret should be rejected."""
|
||||
receiver = self._make_receiver()
|
||||
# A JWT signed with a different secret
|
||||
fake_jwt = "eyJhbGciOiJIUzI1NiJ9.eyJ0ZXN0IjoxfQ.wrong_signature"
|
||||
result = verify_webhook(receiver, b"{}", fake_jwt)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ── Token generation ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestTokenGeneration:
|
||||
"""Test token generation using the LiveKit SDK directly (no client instantiation)."""
|
||||
|
||||
def _generate_token(
|
||||
self, room_name="room", identity="user", name=None, admin=False, ttl=86400
|
||||
):
|
||||
"""Generate a token using the SDK directly, avoiding LiveKitAPI client session."""
|
||||
from datetime import timedelta
|
||||
|
||||
from livekit.api import AccessToken, VideoGrants
|
||||
|
||||
token = AccessToken(
|
||||
api_key="test_key", api_secret="test_secret_that_is_long_enough_for_hmac"
|
||||
)
|
||||
token.identity = identity
|
||||
token.name = name or identity
|
||||
token.ttl = timedelta(seconds=ttl)
|
||||
token.with_grants(
|
||||
VideoGrants(
|
||||
room_join=True,
|
||||
room=room_name,
|
||||
can_publish=True,
|
||||
can_subscribe=True,
|
||||
room_admin=admin,
|
||||
)
|
||||
)
|
||||
return token.to_jwt()
|
||||
|
||||
def _decode_claims(self, token):
|
||||
import base64
|
||||
import json
|
||||
|
||||
payload = token.split(".")[1]
|
||||
payload += "=" * (4 - len(payload) % 4)
|
||||
return json.loads(base64.b64decode(payload))
|
||||
|
||||
def test_creates_valid_jwt(self):
|
||||
token = self._generate_token(
|
||||
room_name="test-room", identity="user123", name="Test User"
|
||||
)
|
||||
assert isinstance(token, str)
|
||||
assert len(token.split(".")) == 3
|
||||
|
||||
def test_token_includes_room_name(self):
|
||||
token = self._generate_token(room_name="my-room-20260401", identity="alice")
|
||||
claims = self._decode_claims(token)
|
||||
assert claims.get("video", {}).get("room") == "my-room-20260401"
|
||||
assert claims.get("sub") == "alice"
|
||||
|
||||
def test_token_respects_admin_flag(self):
|
||||
token = self._generate_token(identity="admin", admin=True)
|
||||
claims = self._decode_claims(token)
|
||||
assert claims["video"]["roomAdmin"] is True
|
||||
|
||||
def test_token_non_admin_by_default(self):
|
||||
token = self._generate_token(identity="user")
|
||||
claims = self._decode_claims(token)
|
||||
assert claims.get("video", {}).get("roomAdmin") in (None, False)
|
||||
|
||||
def test_ttl_is_timedelta(self):
|
||||
"""Verify ttl as timedelta works (previous bug: int caused TypeError)."""
|
||||
token = self._generate_token(ttl=3600)
|
||||
assert isinstance(token, str)
|
||||
|
||||
|
||||
# ── Display name sanitization ─────────────────────────────────
|
||||
|
||||
|
||||
class TestDisplayNameSanitization:
|
||||
"""Test the sanitization logic from rooms.py join endpoint."""
|
||||
|
||||
def _sanitize(self, display_name: str) -> str:
|
||||
"""Replicate the sanitization from rooms_join_meeting."""
|
||||
safe_name = re.sub(r"[^a-zA-Z0-9_-]", "_", display_name.strip())[:40]
|
||||
return safe_name
|
||||
|
||||
def test_normal_name(self):
|
||||
assert self._sanitize("Alice") == "Alice"
|
||||
|
||||
def test_name_with_spaces(self):
|
||||
assert self._sanitize("John Doe") == "John_Doe"
|
||||
|
||||
def test_name_with_special_chars(self):
|
||||
assert self._sanitize("user@email.com") == "user_email_com"
|
||||
|
||||
def test_name_with_unicode(self):
|
||||
result = self._sanitize("José García")
|
||||
assert result == "Jos__Garc_a"
|
||||
assert all(
|
||||
c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-"
|
||||
for c in result
|
||||
)
|
||||
|
||||
def test_name_with_emoji(self):
|
||||
result = self._sanitize("👋 Hello")
|
||||
assert "_" in result # Emoji replaced with underscore
|
||||
assert "Hello" in result
|
||||
|
||||
def test_very_long_name(self):
|
||||
long_name = "A" * 100
|
||||
result = self._sanitize(long_name)
|
||||
assert len(result) == 40
|
||||
|
||||
def test_empty_name(self):
|
||||
result = self._sanitize("")
|
||||
assert result == ""
|
||||
|
||||
def test_only_special_chars(self):
|
||||
result = self._sanitize("!!!")
|
||||
assert result == "___"
|
||||
|
||||
def test_whitespace_stripped(self):
|
||||
result = self._sanitize(" Alice ")
|
||||
assert result == "Alice"
|
||||
|
||||
def test_hyphens_preserved(self):
|
||||
assert self._sanitize("first-last") == "first-last"
|
||||
|
||||
def test_underscores_preserved(self):
|
||||
assert self._sanitize("first_last") == "first_last"
|
||||
|
||||
def test_html_injection(self):
|
||||
result = self._sanitize("<script>alert('xss')</script>")
|
||||
assert "<" not in result
|
||||
assert ">" not in result
|
||||
assert "'" not in result
|
||||
|
||||
|
||||
# ── S3 egress configuration ───────────────────────────────────
|
||||
|
||||
|
||||
class TestS3EgressConfig:
|
||||
"""Test S3Upload construction using the SDK directly."""
|
||||
|
||||
def test_build_s3_upload_requires_all_fields(self):
|
||||
# Missing fields should raise or produce invalid config
|
||||
# The validation happens in our client wrapper, not the SDK
|
||||
# Test the validation logic directly
|
||||
s3_bucket = None
|
||||
s3_access_key = "AKID"
|
||||
s3_secret_key = "secret"
|
||||
assert not all([s3_bucket, s3_access_key, s3_secret_key])
|
||||
|
||||
def test_s3_upload_with_credentials(self):
|
||||
from livekit.api import S3Upload
|
||||
|
||||
upload = S3Upload(
|
||||
access_key="AKID",
|
||||
secret="secret123",
|
||||
bucket="test-bucket",
|
||||
region="us-east-1",
|
||||
force_path_style=True,
|
||||
)
|
||||
assert upload.bucket == "test-bucket"
|
||||
assert upload.force_path_style is True
|
||||
|
||||
def test_s3_upload_with_endpoint(self):
|
||||
from livekit.api import S3Upload
|
||||
|
||||
upload = S3Upload(
|
||||
access_key="AKID",
|
||||
secret="secret",
|
||||
bucket="bucket",
|
||||
region="us-east-1",
|
||||
force_path_style=True,
|
||||
endpoint="http://garage:3900",
|
||||
)
|
||||
assert upload.endpoint == "http://garage:3900"
|
||||
|
||||
|
||||
# ── Platform detection ────────────────────────────────────────
|
||||
|
||||
|
||||
# ── Redis participant mapping ──────────────────────────────
|
||||
|
||||
|
||||
class TestParticipantIdentityMapping:
|
||||
"""Test the identity → user_id Redis mapping pattern."""
|
||||
|
||||
def test_mapping_key_format(self):
|
||||
room_name = "myroom-20260401172036"
|
||||
mapping_key = f"livekit:participant_map:{room_name}"
|
||||
assert mapping_key == "livekit:participant_map:myroom-20260401172036"
|
||||
|
||||
def test_identity_with_uuid_suffix_is_unique(self):
|
||||
import uuid
|
||||
|
||||
name = "Juan"
|
||||
id1 = f"{name}-{uuid.uuid4().hex[:6]}"
|
||||
id2 = f"{name}-{uuid.uuid4().hex[:6]}"
|
||||
assert id1 != id2
|
||||
assert id1.startswith("Juan-")
|
||||
assert id2.startswith("Juan-")
|
||||
|
||||
def test_strip_uuid_suffix_for_display(self):
|
||||
"""Pipeline strips UUID suffix for display name."""
|
||||
identity = "Juan-2bcea0"
|
||||
display_name = identity.rsplit("-", 1)[0] if "-" in identity else identity
|
||||
assert display_name == "Juan"
|
||||
|
||||
def test_strip_uuid_preserves_hyphenated_names(self):
|
||||
identity = "Mary-Jane-abc123"
|
||||
display_name = identity.rsplit("-", 1)[0] if "-" in identity else identity
|
||||
assert display_name == "Mary-Jane"
|
||||
|
||||
def test_anon_identity_no_user_id(self):
|
||||
"""Anonymous participants should not have a user_id mapping."""
|
||||
identity = "anon-abc123"
|
||||
# In the pipeline, anon identities don't get looked up
|
||||
assert identity.startswith("anon-")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_redis_hset_hgetall_roundtrip(self):
|
||||
"""Test the actual Redis operations used for participant mapping."""
|
||||
try:
|
||||
from reflector.redis_cache import get_async_redis_client
|
||||
|
||||
redis_client = await get_async_redis_client()
|
||||
test_key = "livekit:participant_map:__test_room__"
|
||||
|
||||
# Write
|
||||
await redis_client.hset(test_key, "Juan-abc123", "user-id-1")
|
||||
await redis_client.hset(test_key, "Alice-def456", "user-id-2")
|
||||
|
||||
# Read
|
||||
raw_map = await redis_client.hgetall(test_key)
|
||||
decoded = {
|
||||
k.decode() if isinstance(k, bytes) else k: v.decode()
|
||||
if isinstance(v, bytes)
|
||||
else v
|
||||
for k, v in raw_map.items()
|
||||
}
|
||||
|
||||
assert decoded["Juan-abc123"] == "user-id-1"
|
||||
assert decoded["Alice-def456"] == "user-id-2"
|
||||
|
||||
# Cleanup
|
||||
await redis_client.delete(test_key)
|
||||
except Exception:
|
||||
pytest.skip("Redis not available")
|
||||
|
||||
|
||||
# ── Egress video cleanup safety ────────────────────────────────
|
||||
|
||||
|
||||
class TestEgressVideoCleanup:
|
||||
"""Ensure video cleanup logic NEVER deletes audio files."""
|
||||
|
||||
AUDIO_FILES = [
|
||||
"livekit/room-20260401/juan-abc123-2026-04-01T100000-TR_AMR3SWs74Divho.ogg",
|
||||
"livekit/room-20260401/alice-def456-2026-04-01T100030-TR_AMirKjdAvLteAZ.ogg",
|
||||
"livekit/room-20260401/bob-789abc-2026-04-01T100100-TR_AMyoSbM7tAQbYj.ogg",
|
||||
]
|
||||
|
||||
VIDEO_FILES = [
|
||||
"livekit/room-20260401/juan-abc123-2026-04-01T100000-TR_VC679dgMQBdfhT.webm",
|
||||
"livekit/room-20260401/alice-def456-2026-04-01T100030-TR_VCLsuRuxLp4eik.webm",
|
||||
]
|
||||
|
||||
MANIFEST_FILES = [
|
||||
"livekit/room-20260401/EG_K5sipvfB5fTM.json",
|
||||
"livekit/room-20260401/EG_nzwBsH9xzgoj.json",
|
||||
]
|
||||
|
||||
def _should_delete(self, filename: str) -> bool:
|
||||
"""Replicate the deletion logic from _handle_egress_ended."""
|
||||
return filename.endswith(".webm")
|
||||
|
||||
def test_audio_files_never_deleted(self):
|
||||
"""CRITICAL: Audio files must NEVER be marked for deletion."""
|
||||
for f in self.AUDIO_FILES:
|
||||
assert not self._should_delete(f), f"Audio file would be deleted: {f}"
|
||||
|
||||
def test_video_files_are_deleted(self):
|
||||
for f in self.VIDEO_FILES:
|
||||
assert self._should_delete(f), f"Video file NOT marked for deletion: {f}"
|
||||
|
||||
def test_manifests_are_kept(self):
|
||||
for f in self.MANIFEST_FILES:
|
||||
assert not self._should_delete(f), f"Manifest would be deleted: {f}"
|
||||
|
||||
def test_ogg_extension_never_matches_delete(self):
|
||||
"""Double-check: no .ogg file ever matches the deletion condition."""
|
||||
test_names = [
|
||||
"anything.ogg",
|
||||
"livekit/room/track.ogg",
|
||||
"video.ogg", # Even if someone names it "video.ogg"
|
||||
".ogg",
|
||||
"TR_VC_fake_video.ogg", # Video-like track ID but .ogg extension
|
||||
]
|
||||
for f in test_names:
|
||||
assert not self._should_delete(f), f".ogg file would be deleted: {f}"
|
||||
|
||||
def test_webm_always_matches_delete(self):
|
||||
test_names = [
|
||||
"anything.webm",
|
||||
"livekit/room/track.webm",
|
||||
"audio.webm", # Even if someone names it "audio.webm"
|
||||
".webm",
|
||||
]
|
||||
for f in test_names:
|
||||
assert self._should_delete(f), f".webm file NOT marked for deletion: {f}"
|
||||
|
||||
def test_unknown_extensions_are_kept(self):
|
||||
"""Unknown file types should NOT be deleted (safe by default)."""
|
||||
test_names = [
|
||||
"file.mp4",
|
||||
"file.wav",
|
||||
"file.mp3",
|
||||
"file.txt",
|
||||
"file",
|
||||
"",
|
||||
]
|
||||
for f in test_names:
|
||||
assert not self._should_delete(
|
||||
f
|
||||
), f"Unknown file type would be deleted: {f}"
|
||||
|
||||
|
||||
# ── Platform detection ────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSourcePlatformDetection:
|
||||
"""Test the recording ID prefix-based platform detection from transcript_process.py."""
|
||||
|
||||
def test_livekit_prefix(self):
|
||||
recording_id = "lk-livekit-20260401234423"
|
||||
platform = "livekit" if recording_id.startswith("lk-") else "daily"
|
||||
assert platform == "livekit"
|
||||
|
||||
def test_daily_no_prefix(self):
|
||||
recording_id = "08fa0b24-9220-44c5-846c-3f116cf8e738"
|
||||
platform = "livekit" if recording_id.startswith("lk-") else "daily"
|
||||
assert platform == "daily"
|
||||
|
||||
def test_none_recording_id(self):
|
||||
recording_id = None
|
||||
platform = (
|
||||
"livekit" if recording_id and recording_id.startswith("lk-") else "daily"
|
||||
)
|
||||
assert platform == "daily"
|
||||
|
||||
def test_empty_recording_id(self):
|
||||
recording_id = ""
|
||||
platform = (
|
||||
"livekit" if recording_id and recording_id.startswith("lk-") else "daily"
|
||||
)
|
||||
assert platform == "daily"
|
||||
393
server/tests/test_livekit_track_processing.py
Normal file
393
server/tests/test_livekit_track_processing.py
Normal file
@@ -0,0 +1,393 @@
|
||||
"""
|
||||
Tests for LiveKit track processing: filepath parsing, offset calculation,
|
||||
and pad_track padding_seconds behavior.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from fractions import Fraction
|
||||
|
||||
import av
|
||||
import pytest
|
||||
|
||||
from reflector.utils.livekit import (
|
||||
LiveKitTrackFile,
|
||||
calculate_track_offsets,
|
||||
extract_livekit_base_room_name,
|
||||
filter_audio_tracks,
|
||||
parse_livekit_track_filepath,
|
||||
)
|
||||
|
||||
# ── Filepath parsing ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestParseLiveKitTrackFilepath:
|
||||
def test_parses_ogg_audio_track(self):
|
||||
result = parse_livekit_track_filepath(
|
||||
"livekit/myroom-20260401172036/juan-4b82ed-2026-04-01T195758-TR_AMR3SWs74Divho.ogg"
|
||||
)
|
||||
assert result.room_name == "myroom-20260401172036"
|
||||
assert result.participant_identity == "juan-4b82ed"
|
||||
assert result.track_id == "TR_AMR3SWs74Divho"
|
||||
assert result.timestamp == datetime(2026, 4, 1, 19, 57, 58, tzinfo=timezone.utc)
|
||||
|
||||
def test_parses_different_identities(self):
|
||||
r1 = parse_livekit_track_filepath(
|
||||
"livekit/room-20260401/alice-a1b2c3-2026-04-01T100000-TR_abc123.ogg"
|
||||
)
|
||||
r2 = parse_livekit_track_filepath(
|
||||
"livekit/room-20260401/bob_smith-d4e5f6-2026-04-01T100030-TR_def456.ogg"
|
||||
)
|
||||
assert r1.participant_identity == "alice-a1b2c3"
|
||||
assert r2.participant_identity == "bob_smith-d4e5f6"
|
||||
|
||||
def test_rejects_json_manifest(self):
|
||||
with pytest.raises(ValueError, match="doesn't match expected format"):
|
||||
parse_livekit_track_filepath("livekit/myroom-20260401/EG_K5sipvfB5fTM.json")
|
||||
|
||||
def test_rejects_webm_video(self):
|
||||
# webm files match the pattern but are filtered by filter_audio_tracks
|
||||
result = parse_livekit_track_filepath(
|
||||
"livekit/myroom-20260401/juan-4b82ed-2026-04-01T195727-TR_VC679dgMQBdfhT.webm"
|
||||
)
|
||||
# webm parses successfully (TR_ prefix matches video tracks too)
|
||||
assert result.track_id == "TR_VC679dgMQBdfhT"
|
||||
|
||||
def test_rejects_invalid_path(self):
|
||||
with pytest.raises(ValueError):
|
||||
parse_livekit_track_filepath("not/a/valid/path.ogg")
|
||||
|
||||
def test_rejects_missing_track_id(self):
|
||||
with pytest.raises(ValueError):
|
||||
parse_livekit_track_filepath("livekit/room/user-2026-04-01T100000.ogg")
|
||||
|
||||
def test_parses_timestamp_correctly(self):
|
||||
result = parse_livekit_track_filepath(
|
||||
"livekit/room-20260401/user-abc123-2026-12-25T235959-TR_test.ogg"
|
||||
)
|
||||
assert result.timestamp == datetime(
|
||||
2026, 12, 25, 23, 59, 59, tzinfo=timezone.utc
|
||||
)
|
||||
|
||||
|
||||
# ── Audio track filtering ─────────────────────────────────────
|
||||
|
||||
|
||||
class TestFilterAudioTracks:
|
||||
def test_filters_to_ogg_only(self):
|
||||
keys = [
|
||||
"livekit/room/EG_abc.json",
|
||||
"livekit/room/user-abc-2026-04-01T100000-TR_audio.ogg",
|
||||
"livekit/room/user-abc-2026-04-01T100000-TR_video.webm",
|
||||
"livekit/room/EG_def.json",
|
||||
"livekit/room/user2-def-2026-04-01T100030-TR_audio2.ogg",
|
||||
]
|
||||
result = filter_audio_tracks(keys)
|
||||
assert len(result) == 2
|
||||
assert all(k.endswith(".ogg") for k in result)
|
||||
|
||||
def test_empty_input(self):
|
||||
assert filter_audio_tracks([]) == []
|
||||
|
||||
def test_no_audio_tracks(self):
|
||||
keys = ["livekit/room/EG_abc.json", "livekit/room/user-TR_v.webm"]
|
||||
assert filter_audio_tracks(keys) == []
|
||||
|
||||
|
||||
# ── Offset calculation ─────────────────────────────────────────
|
||||
|
||||
|
||||
class TestCalculateTrackOffsets:
|
||||
def test_single_track_zero_offset(self):
|
||||
tracks = [
|
||||
LiveKitTrackFile(
|
||||
s3_key="k1",
|
||||
room_name="r",
|
||||
participant_identity="alice",
|
||||
timestamp=datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc),
|
||||
track_id="TR_1",
|
||||
)
|
||||
]
|
||||
offsets = calculate_track_offsets(tracks)
|
||||
assert len(offsets) == 1
|
||||
assert offsets[0][1] == 0.0
|
||||
|
||||
def test_two_tracks_correct_offset(self):
|
||||
tracks = [
|
||||
LiveKitTrackFile(
|
||||
s3_key="k1",
|
||||
room_name="r",
|
||||
participant_identity="alice",
|
||||
timestamp=datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc),
|
||||
track_id="TR_1",
|
||||
),
|
||||
LiveKitTrackFile(
|
||||
s3_key="k2",
|
||||
room_name="r",
|
||||
participant_identity="bob",
|
||||
timestamp=datetime(2026, 4, 1, 10, 1, 10, tzinfo=timezone.utc),
|
||||
track_id="TR_2",
|
||||
),
|
||||
]
|
||||
offsets = calculate_track_offsets(tracks)
|
||||
assert offsets[0][1] == 0.0 # alice (earliest)
|
||||
assert offsets[1][1] == 70.0 # bob (70 seconds later)
|
||||
|
||||
def test_three_tracks_earliest_is_zero(self):
|
||||
tracks = [
|
||||
LiveKitTrackFile(
|
||||
s3_key="k2",
|
||||
room_name="r",
|
||||
participant_identity="bob",
|
||||
timestamp=datetime(2026, 4, 1, 10, 0, 30, tzinfo=timezone.utc),
|
||||
track_id="TR_2",
|
||||
),
|
||||
LiveKitTrackFile(
|
||||
s3_key="k1",
|
||||
room_name="r",
|
||||
participant_identity="alice",
|
||||
timestamp=datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc),
|
||||
track_id="TR_1",
|
||||
),
|
||||
LiveKitTrackFile(
|
||||
s3_key="k3",
|
||||
room_name="r",
|
||||
participant_identity="charlie",
|
||||
timestamp=datetime(2026, 4, 1, 10, 1, 0, tzinfo=timezone.utc),
|
||||
track_id="TR_3",
|
||||
),
|
||||
]
|
||||
offsets = calculate_track_offsets(tracks)
|
||||
offset_map = {t.participant_identity: o for t, o in offsets}
|
||||
assert offset_map["alice"] == 0.0
|
||||
assert offset_map["bob"] == 30.0
|
||||
assert offset_map["charlie"] == 60.0
|
||||
|
||||
def test_empty_tracks(self):
|
||||
assert calculate_track_offsets([]) == []
|
||||
|
||||
def test_simultaneous_tracks_zero_offsets(self):
|
||||
ts = datetime(2026, 4, 1, 10, 0, 0, tzinfo=timezone.utc)
|
||||
tracks = [
|
||||
LiveKitTrackFile(
|
||||
s3_key="k1",
|
||||
room_name="r",
|
||||
participant_identity="a",
|
||||
timestamp=ts,
|
||||
track_id="TR_1",
|
||||
),
|
||||
LiveKitTrackFile(
|
||||
s3_key="k2",
|
||||
room_name="r",
|
||||
participant_identity="b",
|
||||
timestamp=ts,
|
||||
track_id="TR_2",
|
||||
),
|
||||
]
|
||||
offsets = calculate_track_offsets(tracks)
|
||||
assert all(o == 0.0 for _, o in offsets)
|
||||
|
||||
|
||||
# ── Room name extraction ───────────────────────────────────────
|
||||
|
||||
|
||||
class TestExtractLiveKitBaseRoomName:
|
||||
def test_strips_timestamp_suffix(self):
|
||||
assert extract_livekit_base_room_name("myroom-20260401172036") == "myroom"
|
||||
|
||||
def test_preserves_hyphenated_name(self):
|
||||
assert (
|
||||
extract_livekit_base_room_name("my-room-name-20260401172036")
|
||||
== "my-room-name"
|
||||
)
|
||||
|
||||
def test_single_segment(self):
|
||||
assert extract_livekit_base_room_name("room-20260401") == "room"
|
||||
|
||||
|
||||
# ── pad_track padding_seconds behavior ─────────────────────────
|
||||
|
||||
|
||||
class TestPadTrackPaddingSeconds:
|
||||
"""Test that pad_track correctly uses pre-calculated padding_seconds
|
||||
for LiveKit (skipping container metadata) vs extracting from container
|
||||
for Daily (when padding_seconds is None).
|
||||
"""
|
||||
|
||||
def _make_test_ogg(self, path: str, duration_seconds: float = 5.0):
|
||||
"""Create a minimal OGG/Opus file for testing."""
|
||||
with av.open(path, "w", format="ogg") as out:
|
||||
stream = out.add_stream("libopus", rate=48000)
|
||||
stream.bit_rate = 64000
|
||||
samples_per_frame = 960 # Opus standard
|
||||
total_samples = int(duration_seconds * 48000)
|
||||
pts = 0
|
||||
while pts < total_samples:
|
||||
frame = av.AudioFrame(
|
||||
format="s16", layout="stereo", samples=samples_per_frame
|
||||
)
|
||||
# Fill with silence (zeros)
|
||||
frame.planes[0].update(bytes(samples_per_frame * 2 * 2)) # s16 * stereo
|
||||
frame.sample_rate = 48000
|
||||
frame.pts = pts
|
||||
frame.time_base = Fraction(1, 48000)
|
||||
for packet in stream.encode(frame):
|
||||
out.mux(packet)
|
||||
pts += samples_per_frame
|
||||
for packet in stream.encode(None):
|
||||
out.mux(packet)
|
||||
|
||||
def test_ogg_has_zero_start_time(self, tmp_path):
|
||||
"""Verify that OGG files (like LiveKit produces) have start_time=0,
|
||||
confirming why pre-calculated padding is needed."""
|
||||
ogg_path = str(tmp_path / "test.ogg")
|
||||
self._make_test_ogg(ogg_path)
|
||||
|
||||
with av.open(ogg_path) as container:
|
||||
from reflector.utils.audio_padding import (
|
||||
extract_stream_start_time_from_container,
|
||||
)
|
||||
|
||||
start_time = extract_stream_start_time_from_container(container, 0)
|
||||
|
||||
assert start_time <= 0.0, (
|
||||
"OGG files should have start_time<=0 (no usable offset), confirming "
|
||||
f"LiveKit tracks need pre-calculated padding_seconds. Got: {start_time}"
|
||||
)
|
||||
|
||||
def test_precalculated_padding_skips_metadata_extraction(self, tmp_path):
|
||||
"""When padding_seconds is set, pad_track should use it directly
|
||||
and NOT call extract_stream_start_time_from_container."""
|
||||
from reflector.hatchet.workflows.track_processing import TrackInput
|
||||
|
||||
input_data = TrackInput(
|
||||
track_index=0,
|
||||
s3_key="livekit/room/user-abc-2026-04-01T100000-TR_audio.ogg",
|
||||
bucket_name="test-bucket",
|
||||
transcript_id="test-transcript",
|
||||
source_platform="livekit",
|
||||
padding_seconds=70.0,
|
||||
)
|
||||
|
||||
assert input_data.padding_seconds == 70.0
|
||||
# The pad_track function checks: if input.padding_seconds is not None → use it
|
||||
# This means extract_stream_start_time_from_container is never called for LiveKit
|
||||
|
||||
def test_none_padding_falls_back_to_metadata(self, tmp_path):
|
||||
"""When padding_seconds is None (Daily), pad_track should extract
|
||||
start_time from container metadata."""
|
||||
from reflector.hatchet.workflows.track_processing import TrackInput
|
||||
|
||||
input_data = TrackInput(
|
||||
track_index=0,
|
||||
s3_key="daily/room/track.webm",
|
||||
bucket_name="test-bucket",
|
||||
transcript_id="test-transcript",
|
||||
source_platform="daily",
|
||||
padding_seconds=None,
|
||||
)
|
||||
|
||||
assert input_data.padding_seconds is None
|
||||
# pad_track will call extract_stream_start_time_from_container for this case
|
||||
|
||||
def test_zero_padding_returns_original_key(self):
|
||||
"""When padding_seconds=0.0, pad_track should return the original S3 key
|
||||
without applying any padding (same as start_time=0 from metadata)."""
|
||||
from reflector.hatchet.workflows.track_processing import TrackInput
|
||||
|
||||
input_data = TrackInput(
|
||||
track_index=0,
|
||||
s3_key="livekit/room/earliest-track.ogg",
|
||||
bucket_name="test-bucket",
|
||||
transcript_id="test-transcript",
|
||||
source_platform="livekit",
|
||||
padding_seconds=0.0,
|
||||
)
|
||||
|
||||
# padding_seconds=0.0 → start_time_seconds=0.0 → "no padding needed" branch
|
||||
assert input_data.padding_seconds == 0.0
|
||||
|
||||
|
||||
# ── Pipeline offset calculation (process_tracks logic) ─────────
|
||||
|
||||
|
||||
class TestProcessTracksOffsetCalculation:
|
||||
"""Test the offset calculation logic used in process_tracks
|
||||
for LiveKit source_platform."""
|
||||
|
||||
def test_livekit_offsets_from_timestamps(self):
|
||||
"""Simulate the offset calculation done in process_tracks."""
|
||||
tracks = [
|
||||
{
|
||||
"s3_key": "track1.ogg",
|
||||
"participant_identity": "admin-0129c3",
|
||||
"timestamp": "2026-04-01T23:44:50+00:00",
|
||||
},
|
||||
{
|
||||
"s3_key": "track2.ogg",
|
||||
"participant_identity": "juan-5a5b41",
|
||||
"timestamp": "2026-04-01T23:46:00+00:00",
|
||||
},
|
||||
]
|
||||
|
||||
# Replicate the logic from process_tracks
|
||||
timestamps = []
|
||||
for i, track in enumerate(tracks):
|
||||
ts_str = track.get("timestamp")
|
||||
if ts_str:
|
||||
ts = datetime.fromisoformat(ts_str)
|
||||
timestamps.append((i, ts))
|
||||
|
||||
earliest = min(ts for _, ts in timestamps)
|
||||
track_padding = {}
|
||||
for i, ts in timestamps:
|
||||
track_padding[i] = (ts - earliest).total_seconds()
|
||||
|
||||
assert track_padding[0] == 0.0 # admin (earliest)
|
||||
assert track_padding[1] == 70.0 # juan (70s later)
|
||||
|
||||
def test_daily_tracks_get_no_precalculated_padding(self):
|
||||
"""Daily tracks should NOT get padding_seconds (use container metadata)."""
|
||||
tracks = [
|
||||
{"s3_key": "daily-track1.webm"},
|
||||
{"s3_key": "daily-track2.webm"},
|
||||
]
|
||||
|
||||
# Daily tracks don't have "timestamp" field
|
||||
track_padding = {}
|
||||
source_platform = "daily"
|
||||
|
||||
if source_platform == "livekit":
|
||||
# This block should NOT execute for daily
|
||||
pass
|
||||
|
||||
# Daily tracks get no pre-calculated padding
|
||||
assert track_padding == {}
|
||||
for i, _ in enumerate(tracks):
|
||||
assert track_padding.get(i) is None
|
||||
|
||||
def test_livekit_missing_timestamp_graceful(self):
|
||||
"""If a LiveKit track is missing timestamp, it should be skipped."""
|
||||
tracks = [
|
||||
{
|
||||
"s3_key": "track1.ogg",
|
||||
"participant_identity": "alice",
|
||||
"timestamp": "2026-04-01T10:00:00+00:00",
|
||||
},
|
||||
{"s3_key": "track2.ogg", "participant_identity": "bob"}, # no timestamp
|
||||
]
|
||||
|
||||
timestamps = []
|
||||
for i, track in enumerate(tracks):
|
||||
ts_str = track.get("timestamp")
|
||||
if ts_str:
|
||||
try:
|
||||
ts = datetime.fromisoformat(ts_str)
|
||||
timestamps.append((i, ts))
|
||||
except (ValueError, TypeError):
|
||||
timestamps.append((i, None))
|
||||
else:
|
||||
timestamps.append((i, None))
|
||||
|
||||
valid = [(i, ts) for i, ts in timestamps if ts is not None]
|
||||
assert len(valid) == 1 # only alice has a timestamp
|
||||
assert valid[0][0] == 0 # track index 0
|
||||
Reference in New Issue
Block a user