mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-02-06 18:56:48 +00:00
feat: durable (#794)
* durable (no-mistakes) * hatchet no-mistake * hatchet no-mistake * hatchet no-mistake, better logging * remove conductor and add hatchet tests (no-mistakes) * self-review (no-mistakes) * hatched logs * remove shadow mode for hatchet * and add hatchet processor setting to room * . * cleanup * hatchet init db * self-review (no-mistakes) * self-review (no-mistakes) * hatchet: restore zullip report * self-review round * self-review round * self-review round * dry hatchet with celery * dry hatched with celery - 2 * self-review round * more NES instead of str * self-review wip * self-review round * self-review round * self-review round * can_replay cancelled * add forgotten file * pr autoreviewer fixes * better log webhook events * durable_started return * migration sync * latest changes feature parity * migration merge * pr review --------- Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
@@ -11,13 +11,19 @@ from typing import Literal, Union, assert_never
|
||||
|
||||
import celery
|
||||
from celery.result import AsyncResult
|
||||
from hatchet_sdk.clients.rest.exceptions import ApiException
|
||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||
|
||||
from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.transcripts import Transcript
|
||||
from reflector.db.rooms import rooms_controller
|
||||
from reflector.db.transcripts import Transcript, transcripts_controller
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||
from reflector.pipelines.main_multitrack_pipeline import (
|
||||
task_pipeline_multitrack_process,
|
||||
)
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.string import NonEmptyString
|
||||
|
||||
|
||||
@@ -37,6 +43,8 @@ class MultitrackProcessingConfig:
|
||||
transcript_id: NonEmptyString
|
||||
bucket_name: NonEmptyString
|
||||
track_keys: list[str]
|
||||
recording_id: NonEmptyString | None = None
|
||||
room_id: NonEmptyString | None = None
|
||||
mode: Literal["multitrack"] = "multitrack"
|
||||
|
||||
|
||||
@@ -49,6 +57,7 @@ class ValidationOk:
|
||||
# transcript currently doesnt always have recording_id
|
||||
recording_id: NonEmptyString | None
|
||||
transcript_id: NonEmptyString
|
||||
room_id: NonEmptyString | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -96,6 +105,7 @@ async def validate_transcript_for_processing(
|
||||
if transcript.status == "idle":
|
||||
return ValidationNotReady(detail="Recording is not ready for processing")
|
||||
|
||||
# Check Celery tasks
|
||||
if task_is_scheduled_or_active(
|
||||
"reflector.pipelines.main_file_pipeline.task_pipeline_file_process",
|
||||
transcript_id=transcript.id,
|
||||
@@ -105,8 +115,25 @@ async def validate_transcript_for_processing(
|
||||
):
|
||||
return ValidationAlreadyScheduled(detail="already running")
|
||||
|
||||
# Check Hatchet workflows (if enabled)
|
||||
if settings.HATCHET_ENABLED and transcript.workflow_run_id:
|
||||
try:
|
||||
status = await HatchetClientManager.get_workflow_run_status(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
# If workflow is running or queued, don't allow new processing
|
||||
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
|
||||
return ValidationAlreadyScheduled(
|
||||
detail="Hatchet workflow already running"
|
||||
)
|
||||
except ApiException:
|
||||
# Workflow might be gone (404) or API issue - allow processing
|
||||
pass
|
||||
|
||||
return ValidationOk(
|
||||
recording_id=transcript.recording_id, transcript_id=transcript.id
|
||||
recording_id=transcript.recording_id,
|
||||
transcript_id=transcript.id,
|
||||
room_id=transcript.room_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -116,6 +143,7 @@ async def prepare_transcript_processing(validation: ValidationOk) -> PrepareResu
|
||||
"""
|
||||
bucket_name: str | None = None
|
||||
track_keys: list[str] | None = None
|
||||
recording_id: str | None = validation.recording_id
|
||||
|
||||
if validation.recording_id:
|
||||
recording = await recordings_controller.get_by_id(validation.recording_id)
|
||||
@@ -137,6 +165,8 @@ async def prepare_transcript_processing(validation: ValidationOk) -> PrepareResu
|
||||
bucket_name=bucket_name, # type: ignore (validated above)
|
||||
track_keys=track_keys,
|
||||
transcript_id=validation.transcript_id,
|
||||
recording_id=recording_id,
|
||||
room_id=validation.room_id,
|
||||
)
|
||||
|
||||
return FileProcessingConfig(
|
||||
@@ -144,8 +174,115 @@ async def prepare_transcript_processing(validation: ValidationOk) -> PrepareResu
|
||||
)
|
||||
|
||||
|
||||
def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult:
|
||||
async def dispatch_transcript_processing(
|
||||
config: ProcessingConfig, force: bool = False
|
||||
) -> AsyncResult | None:
|
||||
"""Dispatch transcript processing to appropriate backend (Hatchet or Celery).
|
||||
|
||||
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
|
||||
"""
|
||||
if isinstance(config, MultitrackProcessingConfig):
|
||||
# Check if room has use_hatchet=True (overrides env vars)
|
||||
room_forces_hatchet = False
|
||||
if config.room_id:
|
||||
room = await rooms_controller.get_by_id(config.room_id)
|
||||
room_forces_hatchet = room.use_hatchet if room else False
|
||||
|
||||
# Start durable workflow if enabled (Hatchet)
|
||||
# or if room has use_hatchet=True
|
||||
use_hatchet = settings.HATCHET_ENABLED or room_forces_hatchet
|
||||
|
||||
if room_forces_hatchet:
|
||||
logger.info(
|
||||
"Room forces Hatchet workflow",
|
||||
room_id=config.room_id,
|
||||
transcript_id=config.transcript_id,
|
||||
)
|
||||
|
||||
if use_hatchet:
|
||||
# First check if we can replay (outside transaction since it's read-only)
|
||||
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
||||
if transcript and transcript.workflow_run_id and not force:
|
||||
can_replay = await HatchetClientManager.can_replay(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
if can_replay:
|
||||
await HatchetClientManager.replay_workflow(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
logger.info(
|
||||
"Replaying Hatchet workflow",
|
||||
workflow_id=transcript.workflow_run_id,
|
||||
)
|
||||
return None
|
||||
else:
|
||||
# Workflow exists but can't replay (CANCELLED, COMPLETED, etc.)
|
||||
# Log and proceed to start new workflow
|
||||
status = await HatchetClientManager.get_workflow_run_status(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
logger.info(
|
||||
"Old workflow not replayable, starting new",
|
||||
old_workflow_id=transcript.workflow_run_id,
|
||||
old_status=status.value,
|
||||
)
|
||||
|
||||
# Force: cancel old workflow if exists
|
||||
if force and transcript and transcript.workflow_run_id:
|
||||
await HatchetClientManager.cancel_workflow(transcript.workflow_run_id)
|
||||
logger.info(
|
||||
"Cancelled old workflow (--force)",
|
||||
workflow_id=transcript.workflow_run_id,
|
||||
)
|
||||
await transcripts_controller.update(
|
||||
transcript, {"workflow_run_id": None}
|
||||
)
|
||||
|
||||
# Re-fetch and check for concurrent dispatch (optimistic approach).
|
||||
# No database lock - worst case is duplicate dispatch, but Hatchet
|
||||
# workflows are idempotent so this is acceptable.
|
||||
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
||||
if transcript and transcript.workflow_run_id:
|
||||
# Another process started a workflow between validation and now
|
||||
try:
|
||||
status = await HatchetClientManager.get_workflow_run_status(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
|
||||
logger.info(
|
||||
"Concurrent workflow detected, skipping dispatch",
|
||||
workflow_id=transcript.workflow_run_id,
|
||||
)
|
||||
return None
|
||||
except ApiException:
|
||||
# Workflow might be gone (404) or API issue - proceed with new workflow
|
||||
pass
|
||||
|
||||
workflow_id = await HatchetClientManager.start_workflow(
|
||||
workflow_name="DiarizationPipeline",
|
||||
input_data={
|
||||
"recording_id": config.recording_id,
|
||||
"tracks": [{"s3_key": k} for k in config.track_keys],
|
||||
"bucket_name": config.bucket_name,
|
||||
"transcript_id": config.transcript_id,
|
||||
"room_id": config.room_id,
|
||||
},
|
||||
additional_metadata={
|
||||
"transcript_id": config.transcript_id,
|
||||
"recording_id": config.recording_id,
|
||||
"daily_recording_id": config.recording_id,
|
||||
},
|
||||
)
|
||||
|
||||
if transcript:
|
||||
await transcripts_controller.update(
|
||||
transcript, {"workflow_run_id": workflow_id}
|
||||
)
|
||||
|
||||
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
|
||||
return None
|
||||
|
||||
# Celery pipeline (durable workflows disabled)
|
||||
return task_pipeline_multitrack_process.delay(
|
||||
transcript_id=config.transcript_id,
|
||||
bucket_name=config.bucket_name,
|
||||
|
||||
Reference in New Issue
Block a user