mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-02-06 10:46:46 +00:00
Compare commits
1 Commits
fix-room-q
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 15ab2e306e |
@@ -0,0 +1,35 @@
|
|||||||
|
"""drop_use_celery_column
|
||||||
|
|
||||||
|
Revision ID: 3aa20b96d963
|
||||||
|
Revises: e69f08ead8ea
|
||||||
|
Create Date: 2026-02-05 10:12:44.065279
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "3aa20b96d963"
|
||||||
|
down_revision: Union[str, None] = "e69f08ead8ea"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
with op.batch_alter_table("room", schema=None) as batch_op:
|
||||||
|
batch_op.drop_column("use_celery")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
with op.batch_alter_table("room", schema=None) as batch_op:
|
||||||
|
batch_op.add_column(
|
||||||
|
sa.Column(
|
||||||
|
"use_celery",
|
||||||
|
sa.Boolean(),
|
||||||
|
server_default=sa.text("false"),
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
@@ -57,12 +57,6 @@ rooms = sqlalchemy.Table(
|
|||||||
sqlalchemy.String,
|
sqlalchemy.String,
|
||||||
nullable=False,
|
nullable=False,
|
||||||
),
|
),
|
||||||
sqlalchemy.Column(
|
|
||||||
"use_celery",
|
|
||||||
sqlalchemy.Boolean,
|
|
||||||
nullable=False,
|
|
||||||
server_default=false(),
|
|
||||||
),
|
|
||||||
sqlalchemy.Column(
|
sqlalchemy.Column(
|
||||||
"skip_consent",
|
"skip_consent",
|
||||||
sqlalchemy.Boolean,
|
sqlalchemy.Boolean,
|
||||||
@@ -97,7 +91,6 @@ class Room(BaseModel):
|
|||||||
ics_last_sync: datetime | None = None
|
ics_last_sync: datetime | None = None
|
||||||
ics_last_etag: str | None = None
|
ics_last_etag: str | None = None
|
||||||
platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
|
platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
|
||||||
use_celery: bool = False
|
|
||||||
skip_consent: bool = False
|
skip_consent: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,14 +15,10 @@ from hatchet_sdk.clients.rest.exceptions import ApiException, NotFoundException
|
|||||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||||
|
|
||||||
from reflector.db.recordings import recordings_controller
|
from reflector.db.recordings import recordings_controller
|
||||||
from reflector.db.rooms import rooms_controller
|
|
||||||
from reflector.db.transcripts import Transcript, transcripts_controller
|
from reflector.db.transcripts import Transcript, transcripts_controller
|
||||||
from reflector.hatchet.client import HatchetClientManager
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||||
from reflector.pipelines.main_multitrack_pipeline import (
|
|
||||||
task_pipeline_multitrack_process,
|
|
||||||
)
|
|
||||||
from reflector.utils.string import NonEmptyString
|
from reflector.utils.string import NonEmptyString
|
||||||
|
|
||||||
|
|
||||||
@@ -181,21 +177,7 @@ async def dispatch_transcript_processing(
|
|||||||
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
|
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
|
||||||
"""
|
"""
|
||||||
if isinstance(config, MultitrackProcessingConfig):
|
if isinstance(config, MultitrackProcessingConfig):
|
||||||
use_celery = False
|
# Multitrack processing always uses Hatchet (no Celery fallback)
|
||||||
if config.room_id:
|
|
||||||
room = await rooms_controller.get_by_id(config.room_id)
|
|
||||||
use_celery = room.use_celery if room else False
|
|
||||||
|
|
||||||
use_hatchet = not use_celery
|
|
||||||
|
|
||||||
if use_celery:
|
|
||||||
logger.info(
|
|
||||||
"Room uses legacy Celery processing",
|
|
||||||
room_id=config.room_id,
|
|
||||||
transcript_id=config.transcript_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
if use_hatchet:
|
|
||||||
# First check if we can replay (outside transaction since it's read-only)
|
# First check if we can replay (outside transaction since it's read-only)
|
||||||
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
||||||
if transcript and transcript.workflow_run_id and not force:
|
if transcript and transcript.workflow_run_id and not force:
|
||||||
@@ -203,9 +185,7 @@ async def dispatch_transcript_processing(
|
|||||||
transcript.workflow_run_id
|
transcript.workflow_run_id
|
||||||
)
|
)
|
||||||
if can_replay:
|
if can_replay:
|
||||||
await HatchetClientManager.replay_workflow(
|
await HatchetClientManager.replay_workflow(transcript.workflow_run_id)
|
||||||
transcript.workflow_run_id
|
|
||||||
)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Replaying Hatchet workflow",
|
"Replaying Hatchet workflow",
|
||||||
workflow_id=transcript.workflow_run_id,
|
workflow_id=transcript.workflow_run_id,
|
||||||
@@ -233,9 +213,7 @@ async def dispatch_transcript_processing(
|
|||||||
# Force: cancel old workflow if exists
|
# Force: cancel old workflow if exists
|
||||||
if force and transcript and transcript.workflow_run_id:
|
if force and transcript and transcript.workflow_run_id:
|
||||||
try:
|
try:
|
||||||
await HatchetClientManager.cancel_workflow(
|
await HatchetClientManager.cancel_workflow(transcript.workflow_run_id)
|
||||||
transcript.workflow_run_id
|
|
||||||
)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Cancelled old workflow (--force)",
|
"Cancelled old workflow (--force)",
|
||||||
workflow_id=transcript.workflow_run_id,
|
workflow_id=transcript.workflow_run_id,
|
||||||
@@ -245,9 +223,7 @@ async def dispatch_transcript_processing(
|
|||||||
"Old workflow already deleted (--force)",
|
"Old workflow already deleted (--force)",
|
||||||
workflow_id=transcript.workflow_run_id,
|
workflow_id=transcript.workflow_run_id,
|
||||||
)
|
)
|
||||||
await transcripts_controller.update(
|
await transcripts_controller.update(transcript, {"workflow_run_id": None})
|
||||||
transcript, {"workflow_run_id": None}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Re-fetch and check for concurrent dispatch (optimistic approach).
|
# Re-fetch and check for concurrent dispatch (optimistic approach).
|
||||||
# No database lock - worst case is duplicate dispatch, but Hatchet
|
# No database lock - worst case is duplicate dispatch, but Hatchet
|
||||||
@@ -293,12 +269,6 @@ async def dispatch_transcript_processing(
|
|||||||
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
|
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Celery pipeline (durable workflows disabled)
|
|
||||||
return task_pipeline_multitrack_process.delay(
|
|
||||||
transcript_id=config.transcript_id,
|
|
||||||
bucket_name=config.bucket_name,
|
|
||||||
track_keys=config.track_keys,
|
|
||||||
)
|
|
||||||
elif isinstance(config, FileProcessingConfig):
|
elif isinstance(config, FileProcessingConfig):
|
||||||
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
|
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pydantic.types import PositiveInt
|
from pydantic.types import PositiveInt
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
|
from reflector.schemas.platform import DAILY_PLATFORM, Platform
|
||||||
from reflector.utils.string import NonEmptyString
|
from reflector.utils.string import NonEmptyString
|
||||||
|
|
||||||
|
|
||||||
@@ -155,7 +155,7 @@ class Settings(BaseSettings):
|
|||||||
None # Webhook UUID for this environment. Not used by production code
|
None # Webhook UUID for this environment. Not used by production code
|
||||||
)
|
)
|
||||||
# Platform Configuration
|
# Platform Configuration
|
||||||
DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM
|
DEFAULT_VIDEO_PLATFORM: Platform = DAILY_PLATFORM
|
||||||
|
|
||||||
# Zulip integration
|
# Zulip integration
|
||||||
ZULIP_REALM: str | None = None
|
ZULIP_REALM: str | None = None
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ from reflector.db.transcripts import (
|
|||||||
from reflector.hatchet.client import HatchetClientManager
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||||
from reflector.pipelines.main_live_pipeline import asynctask
|
from reflector.pipelines.main_live_pipeline import asynctask
|
||||||
from reflector.pipelines.main_multitrack_pipeline import (
|
|
||||||
task_pipeline_multitrack_process,
|
|
||||||
)
|
|
||||||
from reflector.pipelines.topic_processing import EmptyPipeline
|
from reflector.pipelines.topic_processing import EmptyPipeline
|
||||||
from reflector.processors import AudioFileWriterProcessor
|
from reflector.processors import AudioFileWriterProcessor
|
||||||
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
|
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
|
||||||
@@ -351,17 +348,7 @@ async def _process_multitrack_recording_inner(
|
|||||||
room_id=room.id,
|
room_id=room.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
use_celery = room and room.use_celery
|
# Multitrack processing always uses Hatchet (no Celery fallback)
|
||||||
use_hatchet = not use_celery
|
|
||||||
|
|
||||||
if use_celery:
|
|
||||||
logger.info(
|
|
||||||
"Room uses legacy Celery processing",
|
|
||||||
room_id=room.id,
|
|
||||||
transcript_id=transcript.id,
|
|
||||||
)
|
|
||||||
|
|
||||||
if use_hatchet:
|
|
||||||
workflow_id = await HatchetClientManager.start_workflow(
|
workflow_id = await HatchetClientManager.start_workflow(
|
||||||
workflow_name="DiarizationPipeline",
|
workflow_name="DiarizationPipeline",
|
||||||
input_data={
|
input_data={
|
||||||
@@ -383,17 +370,7 @@ async def _process_multitrack_recording_inner(
|
|||||||
transcript_id=transcript.id,
|
transcript_id=transcript.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
await transcripts_controller.update(
|
await transcripts_controller.update(transcript, {"workflow_run_id": workflow_id})
|
||||||
transcript, {"workflow_run_id": workflow_id}
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Celery pipeline (runs when durable workflows disabled)
|
|
||||||
task_pipeline_multitrack_process.delay(
|
|
||||||
transcript_id=transcript.id,
|
|
||||||
bucket_name=bucket_name,
|
|
||||||
track_keys=filter_cam_audio_tracks(track_keys),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
@@ -1072,10 +1049,7 @@ async def reprocess_failed_daily_recordings():
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
use_celery = room and room.use_celery
|
# Multitrack reprocessing always uses Hatchet (no Celery fallback)
|
||||||
use_hatchet = not use_celery
|
|
||||||
|
|
||||||
if use_hatchet:
|
|
||||||
if not transcript:
|
if not transcript:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"No transcript for Hatchet reprocessing, skipping",
|
"No transcript for Hatchet reprocessing, skipping",
|
||||||
@@ -1112,26 +1086,6 @@ async def reprocess_failed_daily_recordings():
|
|||||||
room_name=meeting.room_name,
|
room_name=meeting.room_name,
|
||||||
track_count=len(recording.track_keys),
|
track_count=len(recording.track_keys),
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
"Queueing Daily recording for Celery reprocessing",
|
|
||||||
recording_id=recording.id,
|
|
||||||
room_name=meeting.room_name,
|
|
||||||
track_count=len(recording.track_keys),
|
|
||||||
transcript_status=transcript.status if transcript else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
# For reprocessing, pass actual recording time (though it's ignored - see _process_multitrack_recording_inner)
|
|
||||||
# Reprocessing uses recording.meeting_id directly instead of time-based matching
|
|
||||||
recording_start_ts = int(recording.recorded_at.timestamp())
|
|
||||||
|
|
||||||
process_multitrack_recording.delay(
|
|
||||||
bucket_name=bucket_name,
|
|
||||||
daily_room_name=meeting.room_name,
|
|
||||||
recording_id=recording.id,
|
|
||||||
track_keys=recording.track_keys,
|
|
||||||
recording_start_ts=recording_start_ts,
|
|
||||||
)
|
|
||||||
|
|
||||||
reprocessed_count += 1
|
reprocessed_count += 1
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from reflector.schemas.platform import WHEREBY_PLATFORM
|
from reflector.schemas.platform import DAILY_PLATFORM, WHEREBY_PLATFORM
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
@@ -14,6 +14,7 @@ def register_mock_platform():
|
|||||||
from reflector.video_platforms.registry import register_platform
|
from reflector.video_platforms.registry import register_platform
|
||||||
|
|
||||||
register_platform(WHEREBY_PLATFORM, MockPlatformClient)
|
register_platform(WHEREBY_PLATFORM, MockPlatformClient)
|
||||||
|
register_platform(DAILY_PLATFORM, MockPlatformClient)
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
from unittest.mock import patch
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from httpx import ASGITransport, AsyncClient
|
from httpx import ASGITransport, AsyncClient
|
||||||
@@ -142,17 +142,17 @@ async def test_whereby_recording_uses_file_pipeline(client):
|
|||||||
"reflector.services.transcript_process.task_pipeline_file_process"
|
"reflector.services.transcript_process.task_pipeline_file_process"
|
||||||
) as mock_file_pipeline,
|
) as mock_file_pipeline,
|
||||||
patch(
|
patch(
|
||||||
"reflector.services.transcript_process.task_pipeline_multitrack_process"
|
"reflector.services.transcript_process.HatchetClientManager"
|
||||||
) as mock_multitrack_pipeline,
|
) as mock_hatchet,
|
||||||
):
|
):
|
||||||
response = await client.post(f"/transcripts/{transcript.id}/process")
|
response = await client.post(f"/transcripts/{transcript.id}/process")
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json()["status"] == "ok"
|
assert response.json()["status"] == "ok"
|
||||||
|
|
||||||
# Whereby recordings should use file pipeline
|
# Whereby recordings should use file pipeline, not Hatchet
|
||||||
mock_file_pipeline.delay.assert_called_once_with(transcript_id=transcript.id)
|
mock_file_pipeline.delay.assert_called_once_with(transcript_id=transcript.id)
|
||||||
mock_multitrack_pipeline.delay.assert_not_called()
|
mock_hatchet.start_workflow.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("setup_database")
|
@pytest.mark.usefixtures("setup_database")
|
||||||
@@ -177,8 +177,6 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
|||||||
recording_trigger="automatic-2nd-participant",
|
recording_trigger="automatic-2nd-participant",
|
||||||
is_shared=False,
|
is_shared=False,
|
||||||
)
|
)
|
||||||
# Force Celery backend for test
|
|
||||||
await rooms_controller.update(room, {"use_celery": True})
|
|
||||||
|
|
||||||
transcript = await transcripts_controller.add(
|
transcript = await transcripts_controller.add(
|
||||||
"",
|
"",
|
||||||
@@ -213,18 +211,23 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
|||||||
"reflector.services.transcript_process.task_pipeline_file_process"
|
"reflector.services.transcript_process.task_pipeline_file_process"
|
||||||
) as mock_file_pipeline,
|
) as mock_file_pipeline,
|
||||||
patch(
|
patch(
|
||||||
"reflector.services.transcript_process.task_pipeline_multitrack_process"
|
"reflector.services.transcript_process.HatchetClientManager"
|
||||||
) as mock_multitrack_pipeline,
|
) as mock_hatchet,
|
||||||
):
|
):
|
||||||
|
mock_hatchet.start_workflow = AsyncMock(return_value="test-workflow-id")
|
||||||
|
|
||||||
response = await client.post(f"/transcripts/{transcript.id}/process")
|
response = await client.post(f"/transcripts/{transcript.id}/process")
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json()["status"] == "ok"
|
assert response.json()["status"] == "ok"
|
||||||
|
|
||||||
# Daily.co multitrack recordings should use multitrack pipeline
|
# Daily.co multitrack recordings should use Hatchet workflow
|
||||||
mock_multitrack_pipeline.delay.assert_called_once_with(
|
mock_hatchet.start_workflow.assert_called_once()
|
||||||
transcript_id=transcript.id,
|
call_kwargs = mock_hatchet.start_workflow.call_args.kwargs
|
||||||
bucket_name="daily-bucket",
|
assert call_kwargs["workflow_name"] == "DiarizationPipeline"
|
||||||
track_keys=track_keys,
|
assert call_kwargs["input_data"]["transcript_id"] == transcript.id
|
||||||
)
|
assert call_kwargs["input_data"]["bucket_name"] == "daily-bucket"
|
||||||
|
assert call_kwargs["input_data"]["tracks"] == [
|
||||||
|
{"s3_key": k} for k in track_keys
|
||||||
|
]
|
||||||
mock_file_pipeline.delay.assert_not_called()
|
mock_file_pipeline.delay.assert_not_called()
|
||||||
|
|||||||
Reference in New Issue
Block a user