Compare commits

..

1 Commits

Author SHA1 Message Date
15ab2e306e feat: Daily+hatchet default (#846)
* feat: set Daily as default video platform

Daily.co has been battle-tested and is ready to be the default.
Whereby remains available for rooms that explicitly set it.

* feat: enforce Hatchet for all multitrack processing

Remove use_celery option from rooms - multitrack (Daily) recordings
now always use Hatchet workflows. Celery remains for single-track
(Whereby) file processing only.

- Remove use_celery column from room table
- Simplify dispatch logic to always use Hatchet for multitracks
- Update tests to mock Hatchet instead of Celery

* fix: update whereby test to patch Hatchet instead of removed Celery import

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-02-05 18:38:08 -05:00
12 changed files with 204 additions and 563 deletions

View File

@@ -0,0 +1,35 @@
"""drop_use_celery_column
Revision ID: 3aa20b96d963
Revises: e69f08ead8ea
Create Date: 2026-02-05 10:12:44.065279
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "3aa20b96d963"
down_revision: Union[str, None] = "e69f08ead8ea"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.drop_column("use_celery")
def downgrade() -> None:
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.add_column(
sa.Column(
"use_celery",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
)
)

View File

@@ -346,27 +346,6 @@ class MeetingController:
return None
return Meeting(**result)
async def get_by_room_and_time_window(
self, room: Room, start_date: datetime, end_date: datetime
) -> Meeting | None:
"""Check if a meeting already exists for this room with the same time window."""
query = (
meetings.select()
.where(
sa.and_(
meetings.c.room_id == room.id,
meetings.c.start_date == start_date,
meetings.c.end_date == end_date,
meetings.c.is_active,
)
)
.limit(1)
)
result = await get_database().fetch_one(query)
if not result:
return None
return Meeting(**result)
async def update_meeting(self, meeting_id: str, **kwargs):
query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs)
await get_database().execute(query)

View File

@@ -57,12 +57,6 @@ rooms = sqlalchemy.Table(
sqlalchemy.String,
nullable=False,
),
sqlalchemy.Column(
"use_celery",
sqlalchemy.Boolean,
nullable=False,
server_default=false(),
),
sqlalchemy.Column(
"skip_consent",
sqlalchemy.Boolean,
@@ -97,7 +91,6 @@ class Room(BaseModel):
ics_last_sync: datetime | None = None
ics_last_etag: str | None = None
platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
use_celery: bool = False
skip_consent: bool = False

View File

@@ -15,14 +15,10 @@ from hatchet_sdk.clients.rest.exceptions import ApiException, NotFoundException
from hatchet_sdk.clients.rest.models import V1TaskStatus
from reflector.db.recordings import recordings_controller
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import Transcript, transcripts_controller
from reflector.hatchet.client import HatchetClientManager
from reflector.logger import logger
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
from reflector.pipelines.main_multitrack_pipeline import (
task_pipeline_multitrack_process,
)
from reflector.utils.string import NonEmptyString
@@ -181,124 +177,98 @@ async def dispatch_transcript_processing(
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
"""
if isinstance(config, MultitrackProcessingConfig):
use_celery = False
if config.room_id:
room = await rooms_controller.get_by_id(config.room_id)
use_celery = room.use_celery if room else False
use_hatchet = not use_celery
if use_celery:
logger.info(
"Room uses legacy Celery processing",
room_id=config.room_id,
transcript_id=config.transcript_id,
# Multitrack processing always uses Hatchet (no Celery fallback)
# First check if we can replay (outside transaction since it's read-only)
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id and not force:
can_replay = await HatchetClientManager.can_replay(
transcript.workflow_run_id
)
if use_hatchet:
# First check if we can replay (outside transaction since it's read-only)
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id and not force:
can_replay = await HatchetClientManager.can_replay(
transcript.workflow_run_id
if can_replay:
await HatchetClientManager.replay_workflow(transcript.workflow_run_id)
logger.info(
"Replaying Hatchet workflow",
workflow_id=transcript.workflow_run_id,
)
if can_replay:
await HatchetClientManager.replay_workflow(
transcript.workflow_run_id
)
logger.info(
"Replaying Hatchet workflow",
workflow_id=transcript.workflow_run_id,
)
return None
else:
# Workflow can't replay (CANCELLED, COMPLETED, or 404 deleted)
# Log and proceed to start new workflow
try:
status = await HatchetClientManager.get_workflow_run_status(
transcript.workflow_run_id
)
logger.info(
"Old workflow not replayable, starting new",
old_workflow_id=transcript.workflow_run_id,
old_status=status.value,
)
except NotFoundException:
# Workflow deleted from Hatchet but ID still in DB
logger.info(
"Old workflow not found in Hatchet, starting new",
old_workflow_id=transcript.workflow_run_id,
)
# Force: cancel old workflow if exists
if force and transcript and transcript.workflow_run_id:
try:
await HatchetClientManager.cancel_workflow(
transcript.workflow_run_id
)
logger.info(
"Cancelled old workflow (--force)",
workflow_id=transcript.workflow_run_id,
)
except NotFoundException:
logger.info(
"Old workflow already deleted (--force)",
workflow_id=transcript.workflow_run_id,
)
await transcripts_controller.update(
transcript, {"workflow_run_id": None}
)
# Re-fetch and check for concurrent dispatch (optimistic approach).
# No database lock - worst case is duplicate dispatch, but Hatchet
# workflows are idempotent so this is acceptable.
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id:
# Another process started a workflow between validation and now
return None
else:
# Workflow can't replay (CANCELLED, COMPLETED, or 404 deleted)
# Log and proceed to start new workflow
try:
status = await HatchetClientManager.get_workflow_run_status(
transcript.workflow_run_id
)
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
logger.info(
"Concurrent workflow detected, skipping dispatch",
workflow_id=transcript.workflow_run_id,
)
return None
except ApiException:
# Workflow might be gone (404) or API issue - proceed with new workflow
pass
logger.info(
"Old workflow not replayable, starting new",
old_workflow_id=transcript.workflow_run_id,
old_status=status.value,
)
except NotFoundException:
# Workflow deleted from Hatchet but ID still in DB
logger.info(
"Old workflow not found in Hatchet, starting new",
old_workflow_id=transcript.workflow_run_id,
)
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": config.recording_id,
"tracks": [{"s3_key": k} for k in config.track_keys],
"bucket_name": config.bucket_name,
"transcript_id": config.transcript_id,
"room_id": config.room_id,
},
additional_metadata={
"transcript_id": config.transcript_id,
"recording_id": config.recording_id,
"daily_recording_id": config.recording_id,
},
# Force: cancel old workflow if exists
if force and transcript and transcript.workflow_run_id:
try:
await HatchetClientManager.cancel_workflow(transcript.workflow_run_id)
logger.info(
"Cancelled old workflow (--force)",
workflow_id=transcript.workflow_run_id,
)
except NotFoundException:
logger.info(
"Old workflow already deleted (--force)",
workflow_id=transcript.workflow_run_id,
)
await transcripts_controller.update(transcript, {"workflow_run_id": None})
# Re-fetch and check for concurrent dispatch (optimistic approach).
# No database lock - worst case is duplicate dispatch, but Hatchet
# workflows are idempotent so this is acceptable.
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id:
# Another process started a workflow between validation and now
try:
status = await HatchetClientManager.get_workflow_run_status(
transcript.workflow_run_id
)
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
logger.info(
"Concurrent workflow detected, skipping dispatch",
workflow_id=transcript.workflow_run_id,
)
return None
except ApiException:
# Workflow might be gone (404) or API issue - proceed with new workflow
pass
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": config.recording_id,
"tracks": [{"s3_key": k} for k in config.track_keys],
"bucket_name": config.bucket_name,
"transcript_id": config.transcript_id,
"room_id": config.room_id,
},
additional_metadata={
"transcript_id": config.transcript_id,
"recording_id": config.recording_id,
"daily_recording_id": config.recording_id,
},
)
if transcript:
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
if transcript:
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
return None
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
return None
# Celery pipeline (durable workflows disabled)
return task_pipeline_multitrack_process.delay(
transcript_id=config.transcript_id,
bucket_name=config.bucket_name,
track_keys=config.track_keys,
)
elif isinstance(config, FileProcessingConfig):
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
else:

View File

@@ -1,7 +1,7 @@
from pydantic.types import PositiveInt
from pydantic_settings import BaseSettings, SettingsConfigDict
from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
from reflector.schemas.platform import DAILY_PLATFORM, Platform
from reflector.utils.string import NonEmptyString
@@ -155,7 +155,7 @@ class Settings(BaseSettings):
None # Webhook UUID for this environment. Not used by production code
)
# Platform Configuration
DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM
DEFAULT_VIDEO_PLATFORM: Platform = DAILY_PLATFORM
# Zulip integration
ZULIP_REALM: str | None = None

View File

@@ -5,7 +5,7 @@ from celery import shared_task
from celery.utils.log import get_task_logger
from reflector.asynctask import asynctask
from reflector.db.calendar_events import CalendarEvent, calendar_events_controller
from reflector.db.calendar_events import calendar_events_controller
from reflector.db.meetings import meetings_controller
from reflector.db.rooms import Room, rooms_controller
from reflector.redis_cache import RedisAsyncLock
@@ -83,9 +83,10 @@ def _should_sync(room) -> bool:
return time_since_sync.total_seconds() >= room.ics_fetch_interval
async def create_upcoming_meetings_for_event(
event: CalendarEvent, create_window: datetime, room: Room
):
MEETING_DEFAULT_DURATION = timedelta(hours=1)
async def create_upcoming_meetings_for_event(event, create_window, room: Room):
if event.start_time <= create_window:
return
existing_meeting = await meetings_controller.get_by_calendar_event(event.id, room)
@@ -93,21 +94,6 @@ async def create_upcoming_meetings_for_event(
if existing_meeting:
return
# Prevent duplicate meetings from aggregated calendar feeds
# (e.g. same event appears with different UIDs from Cal.com and Google Calendar)
end_date = event.end_time
existing_by_time = await meetings_controller.get_by_room_and_time_window(
room, event.start_time, end_date
)
if existing_by_time:
logger.info(
"Skipping duplicate calendar event - meeting already exists for this time window",
room_id=room.id,
event_id=event.id,
existing_meeting_id=existing_by_time.id,
)
return
logger.info(
"Pre-creating meeting for calendar event",
room_id=room.id,
@@ -116,6 +102,8 @@ async def create_upcoming_meetings_for_event(
)
try:
end_date = event.end_time or (event.start_time + MEETING_DEFAULT_DURATION)
client = create_platform_client(room.platform)
meeting_data = await client.create_meeting(

View File

@@ -27,9 +27,6 @@ from reflector.db.transcripts import (
from reflector.hatchet.client import HatchetClientManager
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
from reflector.pipelines.main_live_pipeline import asynctask
from reflector.pipelines.main_multitrack_pipeline import (
task_pipeline_multitrack_process,
)
from reflector.pipelines.topic_processing import EmptyPipeline
from reflector.processors import AudioFileWriterProcessor
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
@@ -351,49 +348,29 @@ async def _process_multitrack_recording_inner(
room_id=room.id,
)
use_celery = room and room.use_celery
use_hatchet = not use_celery
if use_celery:
logger.info(
"Room uses legacy Celery processing",
room_id=room.id,
transcript_id=transcript.id,
)
if use_hatchet:
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording_id,
"tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording_id,
"daily_recording_id": recording_id,
},
)
logger.info(
"Started Hatchet workflow",
workflow_id=workflow_id,
transcript_id=transcript.id,
)
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
return
# Celery pipeline (runs when durable workflows disabled)
task_pipeline_multitrack_process.delay(
transcript_id=transcript.id,
bucket_name=bucket_name,
track_keys=filter_cam_audio_tracks(track_keys),
# Multitrack processing always uses Hatchet (no Celery fallback)
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording_id,
"tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording_id,
"daily_recording_id": recording_id,
},
)
logger.info(
"Started Hatchet workflow",
workflow_id=workflow_id,
transcript_id=transcript.id,
)
await transcripts_controller.update(transcript, {"workflow_run_id": workflow_id})
@shared_task
@@ -1072,66 +1049,43 @@ async def reprocess_failed_daily_recordings():
)
continue
use_celery = room and room.use_celery
use_hatchet = not use_celery
if use_hatchet:
if not transcript:
logger.warning(
"No transcript for Hatchet reprocessing, skipping",
recording_id=recording.id,
)
continue
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording.id,
"tracks": [
{"s3_key": k}
for k in filter_cam_audio_tracks(recording.track_keys)
],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id if room else None,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording.id,
"reprocess": True,
},
)
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
logger.info(
"Queued Daily recording for Hatchet reprocessing",
# Multitrack reprocessing always uses Hatchet (no Celery fallback)
if not transcript:
logger.warning(
"No transcript for Hatchet reprocessing, skipping",
recording_id=recording.id,
workflow_id=workflow_id,
room_name=meeting.room_name,
track_count=len(recording.track_keys),
)
else:
logger.info(
"Queueing Daily recording for Celery reprocessing",
recording_id=recording.id,
room_name=meeting.room_name,
track_count=len(recording.track_keys),
transcript_status=transcript.status if transcript else None,
)
continue
# For reprocessing, pass actual recording time (though it's ignored - see _process_multitrack_recording_inner)
# Reprocessing uses recording.meeting_id directly instead of time-based matching
recording_start_ts = int(recording.recorded_at.timestamp())
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording.id,
"tracks": [
{"s3_key": k}
for k in filter_cam_audio_tracks(recording.track_keys)
],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id if room else None,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording.id,
"reprocess": True,
},
)
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
process_multitrack_recording.delay(
bucket_name=bucket_name,
daily_room_name=meeting.room_name,
recording_id=recording.id,
track_keys=recording.track_keys,
recording_start_ts=recording_start_ts,
)
logger.info(
"Queued Daily recording for Hatchet reprocessing",
recording_id=recording.id,
workflow_id=workflow_id,
room_name=meeting.room_name,
track_count=len(recording.track_keys),
)
reprocessed_count += 1

View File

@@ -4,7 +4,7 @@ from unittest.mock import patch
import pytest
from reflector.schemas.platform import WHEREBY_PLATFORM
from reflector.schemas.platform import DAILY_PLATFORM, WHEREBY_PLATFORM
@pytest.fixture(scope="session", autouse=True)
@@ -14,6 +14,7 @@ def register_mock_platform():
from reflector.video_platforms.registry import register_platform
register_platform(WHEREBY_PLATFORM, MockPlatformClient)
register_platform(DAILY_PLATFORM, MockPlatformClient)
yield

View File

@@ -1,190 +0,0 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, patch
import pytest
from reflector.db import get_database
from reflector.db.calendar_events import CalendarEvent, calendar_events_controller
from reflector.db.meetings import meetings
from reflector.db.rooms import rooms_controller
from reflector.worker.ics_sync import create_upcoming_meetings_for_event
@pytest.mark.asyncio
async def test_duplicate_calendar_event_does_not_create_duplicate_meeting():
"""When an aggregated ICS feed contains the same event with different UIDs
(e.g. Cal.com UID + Google Calendar UUID), only one meeting should be created."""
room = await rooms_controller.add(
name="dedup-test-room",
user_id="test-user",
zulip_auto_post=False,
zulip_stream="",
zulip_topic="",
is_locked=False,
room_mode="normal",
recording_type="cloud",
recording_trigger="automatic-2nd-participant",
is_shared=False,
ics_url="https://calendar.example.com/dedup.ics",
ics_enabled=True,
)
now = datetime.now(timezone.utc)
start_time = now + timedelta(hours=1)
end_time = now + timedelta(hours=2)
# Create first calendar event (Cal.com UID)
event1 = await calendar_events_controller.upsert(
CalendarEvent(
room_id=room.id,
ics_uid="abc123@Cal.com",
title="Team Standup",
start_time=start_time,
end_time=end_time,
)
)
# create_window must be before start_time for the function to proceed
create_window = now - timedelta(minutes=6)
# Create meeting for event1
with patch(
"reflector.worker.ics_sync.create_platform_client"
) as mock_platform_factory:
mock_client = AsyncMock()
async def mock_create_meeting_1(room_name_prefix, *, end_date, room):
return AsyncMock(
meeting_id="meeting-1",
room_name="dedup-test-room-abc",
room_url="https://mock.video/dedup-test-room-abc",
host_room_url="https://mock.video/dedup-test-room-abc?host=true",
)
mock_client.create_meeting = mock_create_meeting_1
mock_client.upload_logo = AsyncMock()
mock_platform_factory.return_value = mock_client
await create_upcoming_meetings_for_event(event1, create_window, room)
# Verify meeting was created
results = await get_database().fetch_all(
meetings.select().where(meetings.c.room_id == room.id)
)
assert len(results) == 1, f"Expected 1 meeting, got {len(results)}"
# Create second calendar event with different UID but same time window (Google Calendar UUID)
event2 = await calendar_events_controller.upsert(
CalendarEvent(
room_id=room.id,
ics_uid="550e8400-e29b-41d4-a716-446655440000",
title="Team Standup",
start_time=start_time,
end_time=end_time,
)
)
# Try to create meeting for event2 - should be skipped due to dedup
with patch(
"reflector.worker.ics_sync.create_platform_client"
) as mock_platform_factory:
mock_client = AsyncMock()
create_meeting_called = False
async def mock_create_meeting_2(room_name_prefix, *, end_date, room):
nonlocal create_meeting_called
create_meeting_called = True
mock_client.create_meeting = mock_create_meeting_2
mock_client.upload_logo = AsyncMock()
mock_platform_factory.return_value = mock_client
await create_upcoming_meetings_for_event(event2, create_window, room)
# Platform client should NOT have been called for the duplicate
assert (
not create_meeting_called
), "create_meeting should not be called for duplicate"
# Verify still only 1 meeting
results = await get_database().fetch_all(
meetings.select().where(meetings.c.room_id == room.id)
)
assert len(results) == 1, f"Expected 1 meeting after dedup, got {len(results)}"
@pytest.mark.asyncio
async def test_different_time_windows_create_separate_meetings():
"""Events at different times should create separate meetings, even if titles match."""
room = await rooms_controller.add(
name="dedup-diff-time-room",
user_id="test-user",
zulip_auto_post=False,
zulip_stream="",
zulip_topic="",
is_locked=False,
room_mode="normal",
recording_type="cloud",
recording_trigger="automatic-2nd-participant",
is_shared=False,
ics_url="https://calendar.example.com/dedup2.ics",
ics_enabled=True,
)
now = datetime.now(timezone.utc)
create_window = now - timedelta(minutes=6)
# Event 1: 1-2pm
event1 = await calendar_events_controller.upsert(
CalendarEvent(
room_id=room.id,
ics_uid="event-morning@Cal.com",
title="Team Standup",
start_time=now + timedelta(hours=1),
end_time=now + timedelta(hours=2),
)
)
# Event 2: 3-4pm (different time)
event2 = await calendar_events_controller.upsert(
CalendarEvent(
room_id=room.id,
ics_uid="event-afternoon@Cal.com",
title="Team Standup",
start_time=now + timedelta(hours=3),
end_time=now + timedelta(hours=4),
)
)
with patch(
"reflector.worker.ics_sync.create_platform_client"
) as mock_platform_factory:
mock_client = AsyncMock()
call_count = 0
async def mock_create_meeting(room_name_prefix, *, end_date, room):
nonlocal call_count
call_count += 1
return AsyncMock(
meeting_id=f"meeting-{call_count}",
room_name=f"dedup-diff-time-room-{call_count}",
room_url=f"https://mock.video/dedup-diff-time-room-{call_count}",
host_room_url=f"https://mock.video/dedup-diff-time-room-{call_count}?host=true",
)
mock_client.create_meeting = mock_create_meeting
mock_client.upload_logo = AsyncMock()
mock_platform_factory.return_value = mock_client
await create_upcoming_meetings_for_event(event1, create_window, room)
await create_upcoming_meetings_for_event(event2, create_window, room)
results = await get_database().fetch_all(
meetings.select().where(meetings.c.room_id == room.id)
)
assert (
len(results) == 2
), f"Expected 2 meetings for different times, got {len(results)}"

View File

@@ -1,6 +1,6 @@
import asyncio
import time
from unittest.mock import patch
from unittest.mock import AsyncMock, patch
import pytest
from httpx import ASGITransport, AsyncClient
@@ -142,17 +142,17 @@ async def test_whereby_recording_uses_file_pipeline(client):
"reflector.services.transcript_process.task_pipeline_file_process"
) as mock_file_pipeline,
patch(
"reflector.services.transcript_process.task_pipeline_multitrack_process"
) as mock_multitrack_pipeline,
"reflector.services.transcript_process.HatchetClientManager"
) as mock_hatchet,
):
response = await client.post(f"/transcripts/{transcript.id}/process")
assert response.status_code == 200
assert response.json()["status"] == "ok"
# Whereby recordings should use file pipeline
# Whereby recordings should use file pipeline, not Hatchet
mock_file_pipeline.delay.assert_called_once_with(transcript_id=transcript.id)
mock_multitrack_pipeline.delay.assert_not_called()
mock_hatchet.start_workflow.assert_not_called()
@pytest.mark.usefixtures("setup_database")
@@ -177,8 +177,6 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
recording_trigger="automatic-2nd-participant",
is_shared=False,
)
# Force Celery backend for test
await rooms_controller.update(room, {"use_celery": True})
transcript = await transcripts_controller.add(
"",
@@ -213,18 +211,23 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
"reflector.services.transcript_process.task_pipeline_file_process"
) as mock_file_pipeline,
patch(
"reflector.services.transcript_process.task_pipeline_multitrack_process"
) as mock_multitrack_pipeline,
"reflector.services.transcript_process.HatchetClientManager"
) as mock_hatchet,
):
mock_hatchet.start_workflow = AsyncMock(return_value="test-workflow-id")
response = await client.post(f"/transcripts/{transcript.id}/process")
assert response.status_code == 200
assert response.json()["status"] == "ok"
# Daily.co multitrack recordings should use multitrack pipeline
mock_multitrack_pipeline.delay.assert_called_once_with(
transcript_id=transcript.id,
bucket_name="daily-bucket",
track_keys=track_keys,
)
# Daily.co multitrack recordings should use Hatchet workflow
mock_hatchet.start_workflow.assert_called_once()
call_kwargs = mock_hatchet.start_workflow.call_args.kwargs
assert call_kwargs["workflow_name"] == "DiarizationPipeline"
assert call_kwargs["input_data"]["transcript_id"] == transcript.id
assert call_kwargs["input_data"]["bucket_name"] == "daily-bucket"
assert call_kwargs["input_data"]["tracks"] == [
{"s3_key": k} for k in track_keys
]
mock_file_pipeline.delay.assert_not_called()

View File

@@ -8,7 +8,7 @@ import {
useRef,
useState,
} from "react";
import { Box, Spinner, Center, Text, Button, VStack } from "@chakra-ui/react";
import { Box, Spinner, Center, Text } from "@chakra-ui/react";
import { useRouter, useParams } from "next/navigation";
import DailyIframe, {
DailyCall,
@@ -16,13 +16,10 @@ import DailyIframe, {
DailyCustomTrayButton,
DailyCustomTrayButtons,
DailyEventObjectCustomButtonClick,
DailyEventObjectFatalError,
DailyFatalErrorType,
DailyFactoryOptions,
DailyParticipantsObject,
} from "@daily-co/daily-js";
import type { components } from "../../reflector-api";
import { printApiError, ApiError } from "../../api/_error";
import { useAuth } from "../../lib/AuthProvider";
import { useConsentDialog } from "../../lib/consent";
import {
@@ -48,63 +45,6 @@ const RAW_TRACKS_NAMESPACE = "a1b2c3d4-e5f6-7890-abcd-ef1234567890";
const RECORDING_START_DELAY_MS = 2000;
const RECORDING_START_MAX_RETRIES = 5;
const FATAL_ERROR_MESSAGES: Partial<
Record<DailyFatalErrorType, { message: string; rejoinable?: boolean }>
> = {
"connection-error": {
message: "Connection lost. Please check your network.",
rejoinable: true,
},
"exp-room": { message: "The meeting time has ended." },
"exp-token": { message: "Your session has expired.", rejoinable: true },
ejected: { message: "You were removed from this meeting." },
"meeting-full": { message: "This meeting is full." },
"not-allowed": { message: "You are not allowed to join this meeting." },
"nbf-room": { message: "This meeting hasn't started yet." },
"nbf-token": { message: "This meeting hasn't started yet." },
"no-room": { message: "This room does not exist." },
"end-of-life": { message: "This meeting room is no longer available." },
};
function FatalErrorScreen({
error,
roomName,
}: {
error: FatalError;
roomName: string;
}) {
const router = useRouter();
const info =
error.type !== "unknown" ? FATAL_ERROR_MESSAGES[error.type] : undefined;
const message = info?.message ?? `Something went wrong: ${error.message}`;
const rejoinable = info?.rejoinable ?? false;
return (
<Center width="100vw" height="100vh">
<VStack gap={4}>
<Text color="red.500">{message}</Text>
{rejoinable ? (
<>
<Button onClick={() => window.location.reload()}>
Try Rejoining
</Button>
<Button
variant="outline"
onClick={() => router.push(`/${roomName}`)}
>
Leave
</Button>
</>
) : (
<Button onClick={() => router.push(`/${roomName}`)}>
Back to Room
</Button>
)}
</VStack>
</Center>
);
}
type Meeting = components["schemas"]["Meeting"];
type Room = components["schemas"]["RoomDetails"];
@@ -142,8 +82,6 @@ const USE_FRAME_INIT_STATE = {
joined: false as boolean,
} as const;
type FatalError = { type: DailyFatalErrorType | "unknown"; message: string };
// Daily js and not Daily react used right now because daily-js allows for prebuild interface vs. -react is customizable but has no nice defaults
const useFrame = (
container: HTMLDivElement | null,
@@ -151,7 +89,6 @@ const useFrame = (
onLeftMeeting: () => void;
onCustomButtonClick: (ev: DailyEventObjectCustomButtonClick) => void;
onJoinMeeting: () => void;
onError: (ev: DailyEventObjectFatalError) => void;
},
) => {
const [{ frame, joined }, setState] = useState(USE_FRAME_INIT_STATE);
@@ -197,7 +134,6 @@ const useFrame = (
if (!frame) return;
frame.on("left-meeting", cbs.onLeftMeeting);
frame.on("custom-button-click", cbs.onCustomButtonClick);
frame.on("error", cbs.onError);
const joinCb = () => {
if (!frame) {
console.error("frame is null in joined-meeting callback");
@@ -209,7 +145,6 @@ const useFrame = (
return () => {
frame.off("left-meeting", cbs.onLeftMeeting);
frame.off("custom-button-click", cbs.onCustomButtonClick);
frame.off("error", cbs.onError);
frame.off("joined-meeting", joinCb);
};
}, [frame, cbs]);
@@ -253,7 +188,6 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) {
const joinMutation = useRoomJoinMeeting();
const startRecordingMutation = useMeetingStartRecording();
const [joinedMeeting, setJoinedMeeting] = useState<Meeting | null>(null);
const [fatalError, setFatalError] = useState<FatalError | null>(null);
// Generate deterministic instanceIds so all participants use SAME IDs
const cloudInstanceId = parseNonEmptyString(meeting.id);
@@ -300,18 +234,8 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) {
const roomUrl = joinedMeeting?.room_url;
const handleLeave = useCallback(() => {
// If a fatal error occurred, don't redirect — let the error UI show
if (fatalError) return;
router.push("/browse");
}, [router, fatalError]);
const handleError = useCallback((ev: DailyEventObjectFatalError) => {
const error: FatalError = {
type: ev.error?.type ?? "unknown",
message: ev.errorMsg,
};
setFatalError(error);
}, []);
}, [router]);
const handleCustomButtonClick = useCallback(
(ev: DailyEventObjectCustomButtonClick) => {
@@ -400,7 +324,6 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) {
onLeftMeeting: handleLeave,
onCustomButtonClick: handleCustomButtonClick,
onJoinMeeting: handleFrameJoinMeeting,
onError: handleError,
});
useEffect(() => {
@@ -457,27 +380,13 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) {
}
if (joinMutation.isError) {
const apiDetail = printApiError(
joinMutation.error as /*ref 095959E6-01CC-4CF0-B3A9-F65F12F046D3*/ ApiError,
);
return (
<Center width="100vw" height="100vh">
<VStack gap={4}>
<Text color="red.500">
{apiDetail ?? "Failed to join meeting. Please try again."}
</Text>
<Button onClick={() => router.push(`/${roomName}`)}>
Back to Room
</Button>
</VStack>
<Text color="red.500">Failed to join meeting. Please try again.</Text>
</Center>
);
}
if (fatalError) {
return <FatalErrorScreen error={fatalError} roomName={roomName} />;
}
if (!roomUrl) {
return null;
}

View File

@@ -9,7 +9,6 @@ import { MeetingId } from "./types";
import { NonEmptyString } from "./utils";
/*
* ref 095959E6-01CC-4CF0-B3A9-F65F12F046D3
* XXX error types returned from the hooks are not always correct; declared types are ValidationError but real type could be string or any other
* this is either a limitation or incorrect usage of Python json schema generator
* or, limitation or incorrect usage of .d type generator from json schema