feat: durable (#794)

* durable (no-mistakes)

* hatchet no-mistake

* hatchet no-mistake

* hatchet no-mistake, better logging

* remove conductor and add hatchet tests (no-mistakes)

* self-review (no-mistakes)

* hatched logs

* remove shadow mode for hatchet

* and add hatchet processor setting to room

* .

* cleanup

* hatchet init db

* self-review (no-mistakes)

* self-review (no-mistakes)

* hatchet: restore zullip report

* self-review round

* self-review round

* self-review round

* dry hatchet with celery

* dry hatched with celery - 2

* self-review round

* more NES instead of str

* self-review wip

* self-review round

* self-review round

* self-review round

* can_replay cancelled

* add forgotten file

* pr autoreviewer fixes

* better log webhook events

* durable_started return

* migration sync

* latest changes feature parity

* migration merge

* pr review

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
Igor Monadical
2025-12-22 12:09:20 -05:00
committed by GitHub
parent f580b996ee
commit 1dac999b56
36 changed files with 4908 additions and 2009 deletions

View File

@@ -15,8 +15,11 @@ import time
from typing import Callable
from celery.result import AsyncResult
from hatchet_sdk.clients.rest.models import V1TaskStatus
from reflector.db import get_database
from reflector.db.transcripts import Transcript, transcripts_controller
from reflector.hatchet.client import HatchetClientManager
from reflector.services.transcript_process import (
FileProcessingConfig,
MultitrackProcessingConfig,
@@ -34,24 +37,26 @@ async def process_transcript_inner(
transcript: Transcript,
on_validation: Callable[[ValidationResult], None],
on_preprocess: Callable[[PrepareResult], None],
) -> AsyncResult:
force: bool = False,
) -> AsyncResult | None:
validation = await validate_transcript_for_processing(transcript)
on_validation(validation)
config = await prepare_transcript_processing(validation)
on_preprocess(config)
return dispatch_transcript_processing(config)
return await dispatch_transcript_processing(config, force=force)
async def process_transcript(transcript_id: str, sync: bool = False) -> None:
async def process_transcript(
transcript_id: str, sync: bool = False, force: bool = False
) -> None:
"""
Process a transcript by ID, auto-detecting multitrack vs file pipeline.
Args:
transcript_id: The transcript UUID
sync: If True, wait for task completion. If False, dispatch and exit.
force: If True, cancel old workflow and start new (latest code). If False, replay failed workflow.
"""
from reflector.db import get_database
database = get_database()
await database.connect()
@@ -82,10 +87,42 @@ async def process_transcript(transcript_id: str, sync: bool = False) -> None:
print(f"Dispatching file pipeline", file=sys.stderr)
result = await process_transcript_inner(
transcript, on_validation=on_validation, on_preprocess=on_preprocess
transcript,
on_validation=on_validation,
on_preprocess=on_preprocess,
force=force,
)
if sync:
if result is None:
# Hatchet workflow dispatched
if sync:
# Re-fetch transcript to get workflow_run_id
transcript = await transcripts_controller.get_by_id(transcript_id)
if not transcript or not transcript.workflow_run_id:
print("Error: workflow_run_id not found", file=sys.stderr)
sys.exit(1)
print("Waiting for Hatchet workflow...", file=sys.stderr)
while True:
status = await HatchetClientManager.get_workflow_run_status(
transcript.workflow_run_id
)
print(f" Status: {status.value}", file=sys.stderr)
if status == V1TaskStatus.COMPLETED:
print("Workflow completed successfully", file=sys.stderr)
break
elif status in (V1TaskStatus.FAILED, V1TaskStatus.CANCELLED):
print(f"Workflow failed: {status}", file=sys.stderr)
sys.exit(1)
await asyncio.sleep(5)
else:
print(
"Task dispatched (use --sync to wait for completion)",
file=sys.stderr,
)
elif sync:
print("Waiting for task completion...", file=sys.stderr)
while not result.ready():
print(f" Status: {result.state}", file=sys.stderr)
@@ -118,9 +155,16 @@ def main():
action="store_true",
help="Wait for task completion instead of just dispatching",
)
parser.add_argument(
"--force",
action="store_true",
help="Cancel old workflow and start new (uses latest code instead of replaying)",
)
args = parser.parse_args()
asyncio.run(process_transcript(args.transcript_id, sync=args.sync))
asyncio.run(
process_transcript(args.transcript_id, sync=args.sync, force=args.force)
)
if __name__ == "__main__":