mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-08 06:46:49 +00:00
fix: better docs and internal review fixes
This commit is contained in:
@@ -399,7 +399,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
||||
from reflector.db.meetings import (
|
||||
meetings_controller as mc, # noqa: PLC0415
|
||||
)
|
||||
from reflector.redis_cache import get_redis # noqa: PLC0415
|
||||
from reflector.redis_cache import (
|
||||
get_async_redis_client, # noqa: PLC0415
|
||||
)
|
||||
|
||||
meeting = (
|
||||
await mc.get_by_id(transcript.meeting_id)
|
||||
@@ -407,9 +409,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
||||
else None
|
||||
)
|
||||
if meeting:
|
||||
redis = await get_redis()
|
||||
redis_client = await get_async_redis_client()
|
||||
mapping_key = f"livekit:participant_map:{meeting.room_name}"
|
||||
raw_map = await redis.hgetall(mapping_key)
|
||||
raw_map = await redis_client.hgetall(mapping_key)
|
||||
identity_to_user_id = {
|
||||
k.decode() if isinstance(k, bytes) else k: v.decode()
|
||||
if isinstance(v, bytes)
|
||||
@@ -572,13 +574,18 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
||||
valid_timestamps = [(i, ts) for i, ts in timestamps if ts is not None]
|
||||
if valid_timestamps:
|
||||
earliest = min(ts for _, ts in valid_timestamps)
|
||||
# LiveKit Track Egress outputs OGG/Opus files, but the transcription
|
||||
# service only accepts WebM. The padding step converts OGG→WebM as a
|
||||
# side effect of applying the adelay filter. For the earliest track
|
||||
# (offset=0), we use a minimal padding to force this conversion.
|
||||
LIVEKIT_MIN_PADDING_SECONDS = (
|
||||
0.001 # 1ms — inaudible, forces OGG→WebM conversion
|
||||
)
|
||||
|
||||
for i, ts in valid_timestamps:
|
||||
offset = (ts - earliest).total_seconds()
|
||||
# LiveKit tracks are OGG format; even the earliest track (offset=0)
|
||||
# must go through padding step to convert OGG→WebM for the
|
||||
# transcription service. Use a tiny padding to force conversion.
|
||||
if offset == 0.0:
|
||||
offset = 0.001
|
||||
offset = LIVEKIT_MIN_PADDING_SECONDS
|
||||
track_padding[i] = offset
|
||||
ctx.log(
|
||||
f"process_tracks: track {i} padding={offset}s (from filename timestamp)"
|
||||
|
||||
@@ -25,11 +25,28 @@ def verify_webhook(
|
||||
"""Verify and parse a LiveKit webhook event.
|
||||
|
||||
Returns the parsed WebhookEvent if valid, None if verification fails.
|
||||
Logs at different levels depending on failure type:
|
||||
- WARNING: invalid signature, expired token, malformed JWT (expected rejections)
|
||||
- ERROR: unexpected exceptions (potential bugs or attacks)
|
||||
"""
|
||||
if isinstance(body, bytes):
|
||||
body = body.decode("utf-8")
|
||||
try:
|
||||
return receiver.receive(body, auth_header)
|
||||
except Exception as e:
|
||||
logger.warning("LiveKit webhook verification failed", error=str(e))
|
||||
except (ValueError, KeyError) as e:
|
||||
# Expected verification failures (bad JWT, wrong key, expired, malformed)
|
||||
logger.warning(
|
||||
"LiveKit webhook verification failed",
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
# Unexpected errors — log at ERROR for visibility (potential attack or SDK bug)
|
||||
logger.error(
|
||||
"Unexpected error during LiveKit webhook verification",
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -625,12 +625,12 @@ async def rooms_join_meeting(
|
||||
# Store identity → Reflector user_id mapping for the pipeline
|
||||
# (so TranscriptParticipant.user_id can be set correctly)
|
||||
if user_id:
|
||||
from reflector.redis_cache import get_redis # noqa: PLC0415
|
||||
from reflector.redis_cache import get_async_redis_client # noqa: PLC0415
|
||||
|
||||
redis = await get_redis()
|
||||
redis_client = await get_async_redis_client()
|
||||
mapping_key = f"livekit:participant_map:{meeting.room_name}"
|
||||
await redis.hset(mapping_key, participant_identity, user_id)
|
||||
await redis.expire(mapping_key, 7 * 86400) # 7 day TTL
|
||||
await redis_client.hset(mapping_key, participant_identity, user_id)
|
||||
await redis_client.expire(mapping_key, 7 * 86400) # 7 day TTL
|
||||
|
||||
token = client.create_access_token(
|
||||
room_name=meeting.room_name,
|
||||
@@ -638,6 +638,10 @@ async def rooms_join_meeting(
|
||||
participant_name=participant_name,
|
||||
is_admin=user_id == room.user_id if user_id else False,
|
||||
)
|
||||
# Close the platform client to release aiohttp session
|
||||
if hasattr(client, "close"):
|
||||
await client.close()
|
||||
|
||||
meeting = meeting.model_copy()
|
||||
# For LiveKit, room_url is the WS URL; token goes as a query param
|
||||
meeting.room_url = add_query_param(meeting.room_url, "token", token)
|
||||
|
||||
@@ -1239,6 +1239,11 @@ async def _process_livekit_multitrack_inner(
|
||||
meeting_id: str,
|
||||
):
|
||||
"""Inner processing logic for LiveKit multitrack recording."""
|
||||
# 1. Discover tracks by listing S3 prefix.
|
||||
# Wait briefly for egress files to finish flushing to S3 — the room_finished
|
||||
# webhook fires after empty_timeout, but egress finalization may still be in progress.
|
||||
import asyncio as _asyncio # noqa: PLC0415
|
||||
|
||||
from reflector.storage import get_source_storage # noqa: PLC0415
|
||||
from reflector.utils.livekit import ( # noqa: PLC0415
|
||||
extract_livekit_base_room_name,
|
||||
@@ -1246,32 +1251,60 @@ async def _process_livekit_multitrack_inner(
|
||||
parse_livekit_track_filepath,
|
||||
)
|
||||
|
||||
# 1. Discover tracks by listing S3 prefix
|
||||
EGRESS_FLUSH_DELAY = 10 # seconds — egress typically flushes within a few seconds
|
||||
EGRESS_RETRY_DELAY = 30 # seconds — retry if first listing finds nothing
|
||||
|
||||
await _asyncio.sleep(EGRESS_FLUSH_DELAY)
|
||||
|
||||
storage = get_source_storage("livekit")
|
||||
s3_prefix = f"livekit/{room_name}/"
|
||||
all_keys = await storage.list_objects(prefix=s3_prefix)
|
||||
|
||||
if not all_keys:
|
||||
logger.warning(
|
||||
"No track files found on S3 for LiveKit room",
|
||||
room_name=room_name,
|
||||
s3_prefix=s3_prefix,
|
||||
)
|
||||
return
|
||||
|
||||
# Filter to audio tracks only (.ogg) — skip .json manifests and .webm video
|
||||
audio_keys = filter_audio_tracks(all_keys)
|
||||
audio_keys = filter_audio_tracks(all_keys) if all_keys else []
|
||||
|
||||
if not audio_keys:
|
||||
# Retry once after a longer delay — egress may still be flushing
|
||||
logger.info(
|
||||
"No audio tracks found yet, retrying after delay",
|
||||
room_name=room_name,
|
||||
retry_delay=EGRESS_RETRY_DELAY,
|
||||
)
|
||||
await _asyncio.sleep(EGRESS_RETRY_DELAY)
|
||||
all_keys = await storage.list_objects(prefix=s3_prefix)
|
||||
audio_keys = filter_audio_tracks(all_keys) if all_keys else []
|
||||
|
||||
# Sanity check: compare audio tracks against egress manifests.
|
||||
# Each Track Egress (audio or video) produces a .json manifest.
|
||||
# Video tracks produce .webm files. So expected audio count ≈ manifests - video files.
|
||||
if all_keys:
|
||||
manifest_count = sum(1 for k in all_keys if k.endswith(".json"))
|
||||
video_count = sum(1 for k in all_keys if k.endswith(".webm"))
|
||||
expected_audio = manifest_count - video_count
|
||||
if expected_audio > len(audio_keys) and expected_audio > 0:
|
||||
# Some audio tracks may still be flushing — wait and retry
|
||||
logger.info(
|
||||
"Expected more audio tracks based on manifests, waiting for late flushes",
|
||||
room_name=room_name,
|
||||
expected=expected_audio,
|
||||
found=len(audio_keys),
|
||||
)
|
||||
await _asyncio.sleep(EGRESS_RETRY_DELAY)
|
||||
all_keys = await storage.list_objects(prefix=s3_prefix)
|
||||
audio_keys = filter_audio_tracks(all_keys) if all_keys else []
|
||||
|
||||
logger.info(
|
||||
"Found track files on S3",
|
||||
"S3 track discovery complete",
|
||||
room_name=room_name,
|
||||
total_files=len(all_keys),
|
||||
total_files=len(all_keys) if all_keys else 0,
|
||||
audio_files=len(audio_keys),
|
||||
)
|
||||
|
||||
if not audio_keys:
|
||||
logger.warning(
|
||||
"No audio track files found (only manifests/video)",
|
||||
"No audio track files found on S3 after retries",
|
||||
room_name=room_name,
|
||||
s3_prefix=s3_prefix,
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
331
server/tests/test_livekit_backend.py
Normal file
331
server/tests/test_livekit_backend.py
Normal file
@@ -0,0 +1,331 @@
|
||||
"""
|
||||
Tests for LiveKit backend: webhook verification, token generation,
|
||||
display_name sanitization, and platform client behavior.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.livekit_api.webhooks import create_webhook_receiver, verify_webhook
|
||||
|
||||
# ── Webhook verification ──────────────────────────────────────
|
||||
|
||||
|
||||
class TestWebhookVerification:
|
||||
def _make_receiver(self):
|
||||
"""Create a receiver with test credentials."""
|
||||
return create_webhook_receiver(
|
||||
api_key="test_key",
|
||||
api_secret="test_secret_that_is_long_enough_for_hmac",
|
||||
)
|
||||
|
||||
def test_rejects_empty_auth_header(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, b'{"event":"test"}', "")
|
||||
assert result is None
|
||||
|
||||
def test_rejects_garbage_auth_header(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, b'{"event":"test"}', "not-a-jwt")
|
||||
assert result is None
|
||||
|
||||
def test_rejects_empty_body(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, b"", "Bearer some.jwt.token")
|
||||
assert result is None
|
||||
|
||||
def test_handles_bytes_body(self):
|
||||
receiver = self._make_receiver()
|
||||
# Should not crash on bytes input
|
||||
result = verify_webhook(receiver, b'{"event":"test"}', "invalid")
|
||||
assert result is None
|
||||
|
||||
def test_handles_string_body(self):
|
||||
receiver = self._make_receiver()
|
||||
result = verify_webhook(receiver, '{"event":"test"}', "invalid")
|
||||
assert result is None
|
||||
|
||||
def test_rejects_wrong_secret(self):
|
||||
"""Webhook signed with different secret should be rejected."""
|
||||
receiver = self._make_receiver()
|
||||
# A JWT signed with a different secret
|
||||
fake_jwt = "eyJhbGciOiJIUzI1NiJ9.eyJ0ZXN0IjoxfQ.wrong_signature"
|
||||
result = verify_webhook(receiver, b"{}", fake_jwt)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ── Token generation ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestTokenGeneration:
|
||||
"""Test token generation using the LiveKit SDK directly (no client instantiation)."""
|
||||
|
||||
def _generate_token(
|
||||
self, room_name="room", identity="user", name=None, admin=False, ttl=86400
|
||||
):
|
||||
"""Generate a token using the SDK directly, avoiding LiveKitAPI client session."""
|
||||
from datetime import timedelta
|
||||
|
||||
from livekit.api import AccessToken, VideoGrants
|
||||
|
||||
token = AccessToken(
|
||||
api_key="test_key", api_secret="test_secret_that_is_long_enough_for_hmac"
|
||||
)
|
||||
token.identity = identity
|
||||
token.name = name or identity
|
||||
token.ttl = timedelta(seconds=ttl)
|
||||
token.with_grants(
|
||||
VideoGrants(
|
||||
room_join=True,
|
||||
room=room_name,
|
||||
can_publish=True,
|
||||
can_subscribe=True,
|
||||
room_admin=admin,
|
||||
)
|
||||
)
|
||||
return token.to_jwt()
|
||||
|
||||
def _decode_claims(self, token):
|
||||
import base64
|
||||
import json
|
||||
|
||||
payload = token.split(".")[1]
|
||||
payload += "=" * (4 - len(payload) % 4)
|
||||
return json.loads(base64.b64decode(payload))
|
||||
|
||||
def test_creates_valid_jwt(self):
|
||||
token = self._generate_token(
|
||||
room_name="test-room", identity="user123", name="Test User"
|
||||
)
|
||||
assert isinstance(token, str)
|
||||
assert len(token.split(".")) == 3
|
||||
|
||||
def test_token_includes_room_name(self):
|
||||
token = self._generate_token(room_name="my-room-20260401", identity="alice")
|
||||
claims = self._decode_claims(token)
|
||||
assert claims.get("video", {}).get("room") == "my-room-20260401"
|
||||
assert claims.get("sub") == "alice"
|
||||
|
||||
def test_token_respects_admin_flag(self):
|
||||
token = self._generate_token(identity="admin", admin=True)
|
||||
claims = self._decode_claims(token)
|
||||
assert claims["video"]["roomAdmin"] is True
|
||||
|
||||
def test_token_non_admin_by_default(self):
|
||||
token = self._generate_token(identity="user")
|
||||
claims = self._decode_claims(token)
|
||||
assert claims.get("video", {}).get("roomAdmin") in (None, False)
|
||||
|
||||
def test_ttl_is_timedelta(self):
|
||||
"""Verify ttl as timedelta works (previous bug: int caused TypeError)."""
|
||||
token = self._generate_token(ttl=3600)
|
||||
assert isinstance(token, str)
|
||||
|
||||
|
||||
# ── Display name sanitization ─────────────────────────────────
|
||||
|
||||
|
||||
class TestDisplayNameSanitization:
|
||||
"""Test the sanitization logic from rooms.py join endpoint."""
|
||||
|
||||
def _sanitize(self, display_name: str) -> str:
|
||||
"""Replicate the sanitization from rooms_join_meeting."""
|
||||
safe_name = re.sub(r"[^a-zA-Z0-9_-]", "_", display_name.strip())[:40]
|
||||
return safe_name
|
||||
|
||||
def test_normal_name(self):
|
||||
assert self._sanitize("Alice") == "Alice"
|
||||
|
||||
def test_name_with_spaces(self):
|
||||
assert self._sanitize("John Doe") == "John_Doe"
|
||||
|
||||
def test_name_with_special_chars(self):
|
||||
assert self._sanitize("user@email.com") == "user_email_com"
|
||||
|
||||
def test_name_with_unicode(self):
|
||||
result = self._sanitize("José García")
|
||||
assert result == "Jos__Garc_a"
|
||||
assert all(
|
||||
c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-"
|
||||
for c in result
|
||||
)
|
||||
|
||||
def test_name_with_emoji(self):
|
||||
result = self._sanitize("👋 Hello")
|
||||
assert "_" in result # Emoji replaced with underscore
|
||||
assert "Hello" in result
|
||||
|
||||
def test_very_long_name(self):
|
||||
long_name = "A" * 100
|
||||
result = self._sanitize(long_name)
|
||||
assert len(result) == 40
|
||||
|
||||
def test_empty_name(self):
|
||||
result = self._sanitize("")
|
||||
assert result == ""
|
||||
|
||||
def test_only_special_chars(self):
|
||||
result = self._sanitize("!!!")
|
||||
assert result == "___"
|
||||
|
||||
def test_whitespace_stripped(self):
|
||||
result = self._sanitize(" Alice ")
|
||||
assert result == "Alice"
|
||||
|
||||
def test_hyphens_preserved(self):
|
||||
assert self._sanitize("first-last") == "first-last"
|
||||
|
||||
def test_underscores_preserved(self):
|
||||
assert self._sanitize("first_last") == "first_last"
|
||||
|
||||
def test_html_injection(self):
|
||||
result = self._sanitize("<script>alert('xss')</script>")
|
||||
assert "<" not in result
|
||||
assert ">" not in result
|
||||
assert "'" not in result
|
||||
|
||||
|
||||
# ── S3 egress configuration ───────────────────────────────────
|
||||
|
||||
|
||||
class TestS3EgressConfig:
|
||||
"""Test S3Upload construction using the SDK directly."""
|
||||
|
||||
def test_build_s3_upload_requires_all_fields(self):
|
||||
# Missing fields should raise or produce invalid config
|
||||
# The validation happens in our client wrapper, not the SDK
|
||||
# Test the validation logic directly
|
||||
s3_bucket = None
|
||||
s3_access_key = "AKID"
|
||||
s3_secret_key = "secret"
|
||||
assert not all([s3_bucket, s3_access_key, s3_secret_key])
|
||||
|
||||
def test_s3_upload_with_credentials(self):
|
||||
from livekit.api import S3Upload
|
||||
|
||||
upload = S3Upload(
|
||||
access_key="AKID",
|
||||
secret="secret123",
|
||||
bucket="test-bucket",
|
||||
region="us-east-1",
|
||||
force_path_style=True,
|
||||
)
|
||||
assert upload.bucket == "test-bucket"
|
||||
assert upload.force_path_style is True
|
||||
|
||||
def test_s3_upload_with_endpoint(self):
|
||||
from livekit.api import S3Upload
|
||||
|
||||
upload = S3Upload(
|
||||
access_key="AKID",
|
||||
secret="secret",
|
||||
bucket="bucket",
|
||||
region="us-east-1",
|
||||
force_path_style=True,
|
||||
endpoint="http://garage:3900",
|
||||
)
|
||||
assert upload.endpoint == "http://garage:3900"
|
||||
|
||||
|
||||
# ── Platform detection ────────────────────────────────────────
|
||||
|
||||
|
||||
# ── Redis participant mapping ──────────────────────────────
|
||||
|
||||
|
||||
class TestParticipantIdentityMapping:
|
||||
"""Test the identity → user_id Redis mapping pattern."""
|
||||
|
||||
def test_mapping_key_format(self):
|
||||
room_name = "myroom-20260401172036"
|
||||
mapping_key = f"livekit:participant_map:{room_name}"
|
||||
assert mapping_key == "livekit:participant_map:myroom-20260401172036"
|
||||
|
||||
def test_identity_with_uuid_suffix_is_unique(self):
|
||||
import uuid
|
||||
|
||||
name = "Juan"
|
||||
id1 = f"{name}-{uuid.uuid4().hex[:6]}"
|
||||
id2 = f"{name}-{uuid.uuid4().hex[:6]}"
|
||||
assert id1 != id2
|
||||
assert id1.startswith("Juan-")
|
||||
assert id2.startswith("Juan-")
|
||||
|
||||
def test_strip_uuid_suffix_for_display(self):
|
||||
"""Pipeline strips UUID suffix for display name."""
|
||||
identity = "Juan-2bcea0"
|
||||
display_name = identity.rsplit("-", 1)[0] if "-" in identity else identity
|
||||
assert display_name == "Juan"
|
||||
|
||||
def test_strip_uuid_preserves_hyphenated_names(self):
|
||||
identity = "Mary-Jane-abc123"
|
||||
display_name = identity.rsplit("-", 1)[0] if "-" in identity else identity
|
||||
assert display_name == "Mary-Jane"
|
||||
|
||||
def test_anon_identity_no_user_id(self):
|
||||
"""Anonymous participants should not have a user_id mapping."""
|
||||
identity = "anon-abc123"
|
||||
# In the pipeline, anon identities don't get looked up
|
||||
assert identity.startswith("anon-")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_redis_hset_hgetall_roundtrip(self):
|
||||
"""Test the actual Redis operations used for participant mapping."""
|
||||
try:
|
||||
from reflector.redis_cache import get_async_redis_client
|
||||
|
||||
redis_client = await get_async_redis_client()
|
||||
test_key = "livekit:participant_map:__test_room__"
|
||||
|
||||
# Write
|
||||
await redis_client.hset(test_key, "Juan-abc123", "user-id-1")
|
||||
await redis_client.hset(test_key, "Alice-def456", "user-id-2")
|
||||
|
||||
# Read
|
||||
raw_map = await redis_client.hgetall(test_key)
|
||||
decoded = {
|
||||
k.decode() if isinstance(k, bytes) else k: v.decode()
|
||||
if isinstance(v, bytes)
|
||||
else v
|
||||
for k, v in raw_map.items()
|
||||
}
|
||||
|
||||
assert decoded["Juan-abc123"] == "user-id-1"
|
||||
assert decoded["Alice-def456"] == "user-id-2"
|
||||
|
||||
# Cleanup
|
||||
await redis_client.delete(test_key)
|
||||
except Exception:
|
||||
pytest.skip("Redis not available")
|
||||
|
||||
|
||||
# ── Platform detection ────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSourcePlatformDetection:
|
||||
"""Test the recording ID prefix-based platform detection from transcript_process.py."""
|
||||
|
||||
def test_livekit_prefix(self):
|
||||
recording_id = "lk-livekit-20260401234423"
|
||||
platform = "livekit" if recording_id.startswith("lk-") else "daily"
|
||||
assert platform == "livekit"
|
||||
|
||||
def test_daily_no_prefix(self):
|
||||
recording_id = "08fa0b24-9220-44c5-846c-3f116cf8e738"
|
||||
platform = "livekit" if recording_id.startswith("lk-") else "daily"
|
||||
assert platform == "daily"
|
||||
|
||||
def test_none_recording_id(self):
|
||||
recording_id = None
|
||||
platform = (
|
||||
"livekit" if recording_id and recording_id.startswith("lk-") else "daily"
|
||||
)
|
||||
assert platform == "daily"
|
||||
|
||||
def test_empty_recording_id(self):
|
||||
recording_id = ""
|
||||
platform = (
|
||||
"livekit" if recording_id and recording_id.startswith("lk-") else "daily"
|
||||
)
|
||||
assert platform == "daily"
|
||||
Reference in New Issue
Block a user