feat: enable daily co in selfhosted + only schedule tasks when necessary (#883)

* feat: enable daily co in selfhosted + only schedule tasks when necessary

* feat: refactor aws storage to be platform agnostic + add local pad tracking with slfhosted support
This commit is contained in:
Juan Diego García
2026-03-02 11:08:20 -05:00
committed by GitHub
parent f6cc03286b
commit 045eae8ff2
23 changed files with 1442 additions and 165 deletions

View File

@@ -0,0 +1,247 @@
"""Tests for conditional Celery beat schedule registration.
Verifies that beat tasks are only registered when their corresponding
services are configured (WHEREBY_API_KEY, DAILY_API_KEY, etc.).
"""
import pytest
from reflector.worker.app import build_beat_schedule
# Override autouse fixtures from conftest — these tests don't need database or websockets
@pytest.fixture(autouse=True)
def setup_database():
yield
@pytest.fixture(autouse=True)
def ws_manager_in_memory():
yield
@pytest.fixture(autouse=True)
def reset_hatchet_client():
yield
# Task name sets for each group
WHEREBY_TASKS = {"process_messages", "reprocess_failed_recordings"}
DAILY_TASKS = {
"poll_daily_recordings",
"trigger_daily_reconciliation",
"reprocess_failed_daily_recordings",
}
PLATFORM_TASKS = {
"process_meetings",
"sync_all_ics_calendars",
"create_upcoming_meetings",
}
class TestNoPlatformConfigured:
"""When no video platform is configured, no platform tasks should be registered."""
def test_no_platform_tasks(self):
schedule = build_beat_schedule()
task_names = set(schedule.keys())
assert not task_names & WHEREBY_TASKS
assert not task_names & DAILY_TASKS
assert not task_names & PLATFORM_TASKS
def test_only_healthcheck_disabled_warning(self):
"""With no config at all, schedule should be empty (healthcheck needs URL)."""
schedule = build_beat_schedule()
assert len(schedule) == 0
def test_healthcheck_only(self):
schedule = build_beat_schedule(healthcheck_url="https://hc.example.com/ping")
assert set(schedule.keys()) == {"healthcheck_ping"}
def test_public_mode_only(self):
schedule = build_beat_schedule(public_mode=True)
assert set(schedule.keys()) == {"cleanup_old_public_data"}
class TestWherebyOnly:
"""When only Whereby is configured."""
def test_whereby_api_key(self):
schedule = build_beat_schedule(whereby_api_key="test-key")
task_names = set(schedule.keys())
assert WHEREBY_TASKS <= task_names
assert PLATFORM_TASKS <= task_names
assert not task_names & DAILY_TASKS
def test_whereby_sqs_url(self):
schedule = build_beat_schedule(
aws_process_recording_queue_url="https://sqs.us-east-1.amazonaws.com/123/queue"
)
task_names = set(schedule.keys())
assert WHEREBY_TASKS <= task_names
assert PLATFORM_TASKS <= task_names
assert not task_names & DAILY_TASKS
def test_whereby_task_count(self):
schedule = build_beat_schedule(whereby_api_key="test-key")
# Whereby (2) + Platform (3) = 5
assert len(schedule) == 5
class TestDailyOnly:
"""When only Daily.co is configured."""
def test_daily_api_key(self):
schedule = build_beat_schedule(daily_api_key="test-daily-key")
task_names = set(schedule.keys())
assert DAILY_TASKS <= task_names
assert PLATFORM_TASKS <= task_names
assert not task_names & WHEREBY_TASKS
def test_daily_task_count(self):
schedule = build_beat_schedule(daily_api_key="test-daily-key")
# Daily (3) + Platform (3) = 6
assert len(schedule) == 6
class TestBothPlatforms:
"""When both Whereby and Daily.co are configured."""
def test_all_tasks_registered(self):
schedule = build_beat_schedule(
whereby_api_key="test-key",
daily_api_key="test-daily-key",
)
task_names = set(schedule.keys())
assert WHEREBY_TASKS <= task_names
assert DAILY_TASKS <= task_names
assert PLATFORM_TASKS <= task_names
def test_combined_task_count(self):
schedule = build_beat_schedule(
whereby_api_key="test-key",
daily_api_key="test-daily-key",
)
# Whereby (2) + Daily (3) + Platform (3) = 8
assert len(schedule) == 8
class TestConditionalFlags:
"""Test PUBLIC_MODE and HEALTHCHECK_URL interact correctly with platform tasks."""
def test_all_flags_enabled(self):
schedule = build_beat_schedule(
whereby_api_key="test-key",
daily_api_key="test-daily-key",
public_mode=True,
healthcheck_url="https://hc.example.com/ping",
)
task_names = set(schedule.keys())
assert "cleanup_old_public_data" in task_names
assert "healthcheck_ping" in task_names
assert WHEREBY_TASKS <= task_names
assert DAILY_TASKS <= task_names
assert PLATFORM_TASKS <= task_names
# Whereby (2) + Daily (3) + Platform (3) + cleanup (1) + healthcheck (1) = 10
assert len(schedule) == 10
def test_public_mode_with_whereby(self):
schedule = build_beat_schedule(
whereby_api_key="test-key",
public_mode=True,
)
task_names = set(schedule.keys())
assert "cleanup_old_public_data" in task_names
assert WHEREBY_TASKS <= task_names
assert PLATFORM_TASKS <= task_names
def test_healthcheck_with_daily(self):
schedule = build_beat_schedule(
daily_api_key="test-daily-key",
healthcheck_url="https://hc.example.com/ping",
)
task_names = set(schedule.keys())
assert "healthcheck_ping" in task_names
assert DAILY_TASKS <= task_names
assert PLATFORM_TASKS <= task_names
class TestTaskDefinitions:
"""Verify task definitions have correct structure."""
def test_whereby_task_paths(self):
schedule = build_beat_schedule(whereby_api_key="test-key")
assert (
schedule["process_messages"]["task"]
== "reflector.worker.process.process_messages"
)
assert (
schedule["reprocess_failed_recordings"]["task"]
== "reflector.worker.process.reprocess_failed_recordings"
)
def test_daily_task_paths(self):
schedule = build_beat_schedule(daily_api_key="test-daily-key")
assert (
schedule["poll_daily_recordings"]["task"]
== "reflector.worker.process.poll_daily_recordings"
)
assert (
schedule["trigger_daily_reconciliation"]["task"]
== "reflector.worker.process.trigger_daily_reconciliation"
)
assert (
schedule["reprocess_failed_daily_recordings"]["task"]
== "reflector.worker.process.reprocess_failed_daily_recordings"
)
def test_platform_task_paths(self):
schedule = build_beat_schedule(daily_api_key="test-daily-key")
assert (
schedule["process_meetings"]["task"]
== "reflector.worker.process.process_meetings"
)
assert (
schedule["sync_all_ics_calendars"]["task"]
== "reflector.worker.ics_sync.sync_all_ics_calendars"
)
assert (
schedule["create_upcoming_meetings"]["task"]
== "reflector.worker.ics_sync.create_upcoming_meetings"
)
def test_all_tasks_have_schedule(self):
"""Every registered task must have a 'schedule' key."""
schedule = build_beat_schedule(
whereby_api_key="test-key",
daily_api_key="test-daily-key",
public_mode=True,
healthcheck_url="https://hc.example.com/ping",
)
for name, config in schedule.items():
assert "schedule" in config, f"Task '{name}' missing 'schedule' key"
assert "task" in config, f"Task '{name}' missing 'task' key"
class TestEmptyStringValues:
"""Empty strings should be treated as not configured (falsy)."""
def test_empty_whereby_key(self):
schedule = build_beat_schedule(whereby_api_key="")
assert not set(schedule.keys()) & WHEREBY_TASKS
def test_empty_daily_key(self):
schedule = build_beat_schedule(daily_api_key="")
assert not set(schedule.keys()) & DAILY_TASKS
def test_empty_sqs_url(self):
schedule = build_beat_schedule(aws_process_recording_queue_url="")
assert not set(schedule.keys()) & WHEREBY_TASKS
def test_none_values(self):
schedule = build_beat_schedule(
whereby_api_key=None,
daily_api_key=None,
aws_process_recording_queue_url=None,
)
assert len(schedule) == 0

View File

@@ -367,3 +367,126 @@ async def test_aws_storage_none_endpoint_url():
assert storage.base_url == "https://reflector-bucket.s3.amazonaws.com/"
# No s3 addressing_style override — boto_config should only have retries
assert not hasattr(storage.boto_config, "s3") or storage.boto_config.s3 is None
# --- Tests for get_source_storage() ---
def test_get_source_storage_daily_with_credentials():
"""Daily platform with access keys returns AwsStorage with Daily credentials."""
with patch("reflector.storage.settings") as mock_settings:
mock_settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID = "daily-key"
mock_settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY = "daily-secret"
mock_settings.DAILYCO_STORAGE_AWS_BUCKET_NAME = "daily-bucket"
mock_settings.DAILYCO_STORAGE_AWS_REGION = "us-west-2"
from reflector.storage import get_source_storage
storage = get_source_storage("daily")
assert isinstance(storage, AwsStorage)
assert storage._bucket_name == "daily-bucket"
assert storage._region == "us-west-2"
assert storage._access_key_id == "daily-key"
assert storage._secret_access_key == "daily-secret"
assert storage._endpoint_url is None
def test_get_source_storage_daily_falls_back_without_credentials():
"""Daily platform without access keys falls back to transcript storage."""
with patch("reflector.storage.settings") as mock_settings:
mock_settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID = None
mock_settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY = None
mock_settings.DAILYCO_STORAGE_AWS_BUCKET_NAME = "daily-bucket"
mock_settings.TRANSCRIPT_STORAGE_BACKEND = "aws"
mock_settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME = "transcript-bucket"
mock_settings.TRANSCRIPT_STORAGE_AWS_REGION = "us-east-1"
mock_settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID = "transcript-key"
mock_settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY = "transcript-secret"
mock_settings.TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL = None
from reflector.storage import get_source_storage
with patch("reflector.storage.get_transcripts_storage") as mock_get_transcripts:
fallback = AwsStorage(
aws_bucket_name="transcript-bucket",
aws_region="us-east-1",
aws_access_key_id="transcript-key",
aws_secret_access_key="transcript-secret",
)
mock_get_transcripts.return_value = fallback
storage = get_source_storage("daily")
mock_get_transcripts.assert_called_once()
assert storage is fallback
def test_get_source_storage_whereby_with_credentials():
"""Whereby platform with access keys returns AwsStorage with Whereby credentials."""
with patch("reflector.storage.settings") as mock_settings:
mock_settings.WHEREBY_STORAGE_AWS_ACCESS_KEY_ID = "whereby-key"
mock_settings.WHEREBY_STORAGE_AWS_SECRET_ACCESS_KEY = "whereby-secret"
mock_settings.WHEREBY_STORAGE_AWS_BUCKET_NAME = "whereby-bucket"
mock_settings.WHEREBY_STORAGE_AWS_REGION = "eu-west-1"
from reflector.storage import get_source_storage
storage = get_source_storage("whereby")
assert isinstance(storage, AwsStorage)
assert storage._bucket_name == "whereby-bucket"
assert storage._region == "eu-west-1"
assert storage._access_key_id == "whereby-key"
assert storage._secret_access_key == "whereby-secret"
def test_get_source_storage_unknown_platform_falls_back():
"""Unknown platform falls back to transcript storage."""
with patch("reflector.storage.settings"):
from reflector.storage import get_source_storage
with patch("reflector.storage.get_transcripts_storage") as mock_get_transcripts:
fallback = MagicMock()
mock_get_transcripts.return_value = fallback
storage = get_source_storage("unknown-platform")
mock_get_transcripts.assert_called_once()
assert storage is fallback
@pytest.mark.asyncio
async def test_source_storage_presigns_for_correct_bucket():
"""Source storage presigns URLs using the platform's credentials and the override bucket."""
with patch("reflector.storage.settings") as mock_settings:
mock_settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID = "daily-key"
mock_settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY = "daily-secret"
mock_settings.DAILYCO_STORAGE_AWS_BUCKET_NAME = "daily-bucket"
mock_settings.DAILYCO_STORAGE_AWS_REGION = "us-west-2"
from reflector.storage import get_source_storage
storage = get_source_storage("daily")
mock_client = AsyncMock()
mock_client.generate_presigned_url = AsyncMock(
return_value="https://daily-bucket.s3.amazonaws.com/track.webm?signed"
)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=None)
with patch.object(storage.session, "client", return_value=mock_client):
url = await storage.get_file_url(
"track.webm",
operation="get_object",
expires_in=3600,
bucket="override-bucket",
)
assert "track.webm" in url
mock_client.generate_presigned_url.assert_called_once()
call_kwargs = mock_client.generate_presigned_url.call_args
params = call_kwargs[1].get("Params") or call_kwargs[0][1]
assert params["Bucket"] == "override-bucket"
assert params["Key"] == "track.webm"