hatchet no-mistake

2025-12-21 04:39:06 +00:00 · 2025-12-16 00:48:30 -05:00
parent 243ff2177c
commit c5498d26bf
18 changed files with 2189 additions and 1952 deletions
--- a/TASKS.md
+++ b/TASKS.md
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -48,6 +48,20 @@ services:
      conductor:
        condition: service_healthy

+  hatchet-worker:
+    build:
+      context: server
+    volumes:
+      - ./server/:/app/
+      - /app/.venv
+    env_file:
+      - ./server/.env
+    environment:
+      ENTRYPOINT: hatchet-worker
+    depends_on:
+      hatchet:
+        condition: service_healthy
+
  redis:
    image: redis:7.2
    ports:
@@ -81,8 +95,8 @@ services:
  conductor:
    image: conductoross/conductor-standalone:3.15.0
    ports:
-      - 8180:8080
-      - 5001:5000
+      - "8180:8080"
+      - "5001:5000"
    environment:
      - conductor.db.type=memory
    healthcheck:
@@ -91,6 +105,54 @@ services:
      timeout: 10s
      retries: 5

+  hatchet-postgres:
+    image: postgres:15.6
+    command: postgres -c 'max_connections=200'
+    restart: always
+    environment:
+      - POSTGRES_USER=hatchet
+      - POSTGRES_PASSWORD=hatchet
+      - POSTGRES_DB=hatchet
+    ports:
+      - "5436:5432"
+    volumes:
+      - ./data/hatchet-postgres:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -d hatchet -U hatchet"]
+      interval: 10s
+      timeout: 10s
+      retries: 5
+      start_period: 10s
+
+  hatchet:
+    image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
+    ports:
+      - "8889:8888"
+      - "7078:7077"
+    depends_on:
+      hatchet-postgres:
+        condition: service_healthy
+    environment:
+      DATABASE_URL: "postgresql://hatchet:hatchet@hatchet-postgres:5432/hatchet?sslmode=disable"
+      SERVER_AUTH_COOKIE_DOMAIN: localhost
+      SERVER_AUTH_COOKIE_INSECURE: "t"
+      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
+      SERVER_GRPC_INSECURE: "t"
+      SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
+      SERVER_GRPC_PORT: "7077"
+      SERVER_URL: http://localhost:8889
+      SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
+      SERVER_DEFAULT_ENGINE_VERSION: "V1"
+      SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
+    volumes:
+      - ./data/hatchet-config:/config
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 30s
+
 networks:
  default:
    attachable: true
--- a/server/HATCHET_LLM_OBSERVATIONS.md
+++ b/server/HATCHET_LLM_OBSERVATIONS.md
@@ -0,0 +1,339 @@
+# Hatchet Migration - LLM Debugging Observations
+
+This document captures hard-won debugging insights from implementing the multitrack diarization pipeline with Hatchet. These observations are particularly relevant for LLM assistants working on this codebase.
+
+## Architecture Context
+
+- **Hatchet SDK v1.21+** uses async workers with gRPC for task polling
+- Workers connect to Hatchet server via gRPC (port 7077) and trigger workflows via REST (port 8888)
+- `hatchet-lite` image bundles server, engine, and database in one container
+- Tasks are decorated with `@workflow.task()` (not `@hatchet.step()` as in older examples)
+- Workflow input is validated via Pydantic models with `input_validator=` parameter
+
+---
+
+## Challenge 1: SDK Version API Breaking Changes
+
+### Symptoms
+```
+AttributeError: 'V1WorkflowRunDetails' object has no attribute 'workflow_run_id'
+```
+
+### Root Cause
+Hatchet SDK v1.21+ changed the response structure for workflow creation. Old examples show:
+```python
+result = await client.runs.aio_create(workflow_name, input_data)
+return result.workflow_run_id  # OLD - doesn't work
+```
+
+### Resolution
+Access the run ID through the new nested structure:
+```python
+result = await client.runs.aio_create(workflow_name, input_data)
+return result.run.metadata.id  # NEW - SDK v1.21+
+```
+
+### Key Insight
+**Don't trust documentation or examples.** Read the SDK source code or use IDE autocomplete to discover actual attribute names. The SDK evolves faster than docs.
+
+---
+
+## Challenge 2: Worker Appears Hung at "starting runner..."
+
+### Symptoms
+```
+[INFO] Starting Hatchet workers
+[INFO] Starting Hatchet worker polling...
+[INFO] STARTING HATCHET...
+[INFO] starting runner...
+# ... nothing else, appears stuck
+```
+
+### Root Cause
+Without debug mode, Hatchet SDK doesn't log:
+- Workflow registration
+- gRPC connection status
+- Heartbeat activity
+- Action listener acquisition
+
+The worker IS working, you just can't see it.
+
+### Resolution
+Always enable debug mode during development:
+```bash
+HATCHET_DEBUG=true
+```
+
+With debug enabled, you'll see the actual activity:
+```
+[DEBUG] 'worker-name' waiting for ['workflow:task1', 'workflow:task2']
+[DEBUG] starting action listener: worker-name
+[DEBUG] acquired action listener: 562d00a8-8895-42a1-b65b-46f905c902f9
+[DEBUG] sending heartbeat
+```
+
+### Key Insight
+**Start every Hatchet debugging session with `HATCHET_DEBUG=true`.** Silent workers waste hours of debugging time.
+
+---
+
+## Challenge 3: Docker Networking + JWT Token URL Conflicts
+
+### Symptoms
+```
+grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
+    status = StatusCode.UNAVAILABLE
+    details = "failed to connect to all addresses"
+```
+
+### Root Cause
+The Hatchet API token embeds URLs:
+```json
+{
+  "aud": "http://localhost:8889",
+  "grpc_broadcast_address": "localhost:7077",
+  "server_url": "http://localhost:8889"
+}
+```
+
+Inside Docker containers, `localhost` refers to the container itself, not the Hatchet server.
+
+### Resolution
+Override the token-embedded URLs with environment variables:
+```bash
+# In .env or docker-compose environment
+HATCHET_CLIENT_HOST_PORT=hatchet:7077
+HATCHET_CLIENT_SERVER_URL=http://hatchet:8888
+HATCHET_CLIENT_TLS_STRATEGY=none
+```
+
+### Key Insight
+**The JWT token is not the final word on connection settings.** Environment variables override token-embedded URLs, which is essential for Docker networking.
+
+---
+
+## Challenge 4: Workflow Name Case Sensitivity
+
+### Symptoms
+```
+BadRequestException: (400)
+HTTP response body: errors=[APIError(description='workflow names not found: diarizationpipeline')]
+```
+
+### Root Cause
+Hatchet uses the exact workflow name you define for triggering:
+```python
+diarization_pipeline = hatchet.workflow(
+    name="DiarizationPipeline",  # Use THIS exact name to trigger
+    input_validator=PipelineInput
+)
+```
+
+Internally, task identifiers are lowercased (`diarizationpipeline:get_recording`), but workflow triggers must match the defined name.
+
+### Resolution
+```python
+# Correct
+await client.start_workflow('DiarizationPipeline', input_data)
+
+# Wrong
+await client.start_workflow('diarizationpipeline', input_data)
+```
+
+### Key Insight
+**Workflow names are case-sensitive for triggering, but task refs are lowercase.** Don't conflate the two.
+
+---
+
+## Challenge 5: Pydantic Response Object Iteration
+
+### Symptoms
+```
+AttributeError: 'tuple' object has no attribute 'participant_id'
+```
+
+### Root Cause
+When API responses return Pydantic models with list fields:
+```python
+class MeetingParticipantsResponse(BaseModel):
+    data: List[MeetingParticipant]
+```
+
+Iterating the response object directly is wrong:
+```python
+for p in participants:  # WRONG - iterates over model fields as tuples
+```
+
+### Resolution
+Access the `.data` attribute explicitly:
+```python
+for p in participants.data:  # CORRECT - iterates over list items
+    print(p.participant_id)
+```
+
+### Key Insight
+**Pydantic models with list fields require explicit `.data` access.** The model itself is not iterable in the expected way.
+
+---
+
+## Challenge 6: Database Connections in Async Workers
+
+### Symptoms
+```
+InterfaceError: cannot perform operation: another operation is in progress
+```
+
+### Root Cause
+Similar to Conductor, Hatchet workers may inherit stale database connections. Each task runs in an async context that may not share the same event loop as cached connections.
+
+### Resolution
+Create fresh database connections per task:
+```python
+async def _get_fresh_db_connection():
+    """Create fresh database connection for worker task."""
+    import databases
+    from reflector.db import _database_context
+    from reflector.settings import settings
+
+    _database_context.set(None)
+    db = databases.Database(settings.DATABASE_URL)
+    _database_context.set(db)
+    await db.connect()
+    return db
+
+async def _close_db_connection(db):
+    await db.disconnect()
+    _database_context.set(None)
+```
+
+### Key Insight
+**Cached singletons (DB, HTTP clients) are unsafe in workflow workers.** Always create fresh connections.
+
+---
+
+## Challenge 7: Child Workflow Fan-out Pattern
+
+### Symptoms
+Child workflows spawn but parent doesn't wait for completion, or results aren't collected.
+
+### Root Cause
+Hatchet child workflows need explicit spawning and result collection:
+```python
+# Spawning children
+child_runs = await asyncio.gather(*[
+    child_workflow.aio_run(child_input)
+    for child_input in inputs
+])
+
+# Results are returned directly from aio_run()
+```
+
+### Resolution
+Use `aio_run()` for child workflows and `asyncio.gather()` for parallelism:
+```python
+@parent_workflow.task(parents=[setup_task])
+async def process_tracks(input: ParentInput, ctx: Context) -> dict:
+    child_coroutines = [
+        track_workflow.aio_run(TrackInput(track_index=i, ...))
+        for i in range(len(input.tracks))
+    ]
+
+    results = await asyncio.gather(*child_coroutines, return_exceptions=True)
+
+    # Handle failures
+    for i, result in enumerate(results):
+        if isinstance(result, Exception):
+            logger.error(f"Track {i} failed: {result}")
+
+    return {"track_results": [r for r in results if not isinstance(r, Exception)]}
+```
+
+### Key Insight
+**Child workflows in Hatchet return results directly.** No need to poll for completion like in Conductor.
+
+---
+
+## Debugging Workflow
+
+### 1. Enable Debug Mode First
+```bash
+HATCHET_DEBUG=true
+```
+
+### 2. Verify Worker Registration
+Look for this in debug logs:
+```
+[DEBUG] 'worker-name' waiting for ['workflow:task1', 'workflow:task2', ...]
+[DEBUG] acquired action listener: {uuid}
+```
+
+### 3. Test Workflow Trigger Separately
+```python
+docker exec server uv run python -c "
+from reflector.hatchet.client import HatchetClientManager
+from reflector.hatchet.workflows.diarization_pipeline import PipelineInput
+import asyncio
+
+async def test():
+    input_data = PipelineInput(
+        transcript_id='test',
+        recording_id=None,
+        room_name='test-room',
+        bucket_name='bucket',
+        tracks=[],
+    )
+    run_id = await HatchetClientManager.start_workflow(
+        'DiarizationPipeline',
+        input_data.model_dump()
+    )
+    print(f'Triggered: {run_id}')
+
+asyncio.run(test())
+"
+```
+
+### 4. Check Hatchet Server Logs
+```bash
+docker logs reflector-hatchet-1 --tail 50
+```
+
+Look for `WRN` entries indicating API errors or connection issues.
+
+### 5. Verify gRPC Connectivity
+```python
+docker exec worker python -c "
+import socket
+sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+result = sock.connect_ex(('hatchet', 7077))
+print(f'gRPC port 7077: {\"reachable\" if result == 0 else \"blocked\"}')"
+```
+
+### 6. Force Container Rebuild
+Volume mounts may cache old bytecode:
+```bash
+docker compose up -d --build --force-recreate hatchet-worker
+```
+
+---
+
+## Common Gotchas Summary
+
+| Issue | Signal | Fix |
+|-------|--------|-----|
+| SDK API changed | `AttributeError` on result | Check SDK source for actual attributes |
+| Worker appears stuck | Only "starting runner..." | Enable `HATCHET_DEBUG=true` |
+| Can't connect from Docker | gRPC unavailable | Set `HATCHET_CLIENT_HOST_PORT` and `_SERVER_URL` |
+| Workflow not found | 400 Bad Request | Use exact case-sensitive workflow name |
+| Tuple iteration error | `'tuple' has no attribute` | Access `.data` on Pydantic response models |
+| DB conflicts | "another operation in progress" | Fresh DB connection per task |
+| Old code running | Fixed code but same error | Force rebuild container, clear `__pycache__` |
+
+---
+
+## Files Most Likely to Need Hatchet-Specific Handling
+
+- `server/reflector/hatchet/workflows/*.py` - Workflow and task definitions
+- `server/reflector/hatchet/client.py` - Client wrapper, SDK version compatibility
+- `server/reflector/hatchet/run_workers.py` - Worker startup and registration
+- `server/reflector/hatchet/progress.py` - Progress emission for UI updates
+- `docker-compose.yml` - Hatchet infrastructure services
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -40,6 +40,7 @@ dependencies = [
    "webvtt-py>=0.5.0",
    "icalendar>=6.0.0",
    "conductor-python>=1.2.3",
+    "hatchet-sdk>=0.47.0",
 ]

 [dependency-groups]
@@ -135,5 +136,10 @@ select = [
 "reflector/processors/summary/summary_builder.py" = ["E501"]
 "gpu/modal_deployments/**.py" = ["PLC0415"]
 "reflector/tools/**.py" = ["PLC0415"]
+"reflector/hatchet/run_workers.py" = ["PLC0415"]
+"reflector/hatchet/workflows/**.py" = ["PLC0415"]
+"reflector/conductor/run_workers.py" = ["PLC0415"]
+"reflector/conductor/workers/**.py" = ["PLC0415"]
+"reflector/views/hatchet.py" = ["PLC0415"]
 "migrations/versions/**.py" = ["PLC0415"]
 "tests/**.py" = ["PLC0415"]
--- a/server/reflector/app.py
+++ b/server/reflector/app.py
@@ -14,6 +14,7 @@ from reflector.metrics import metrics_init
 from reflector.settings import settings
 from reflector.views.conductor import router as conductor_router
 from reflector.views.daily import router as daily_router
+from reflector.views.hatchet import router as hatchet_router
 from reflector.views.meetings import router as meetings_router
 from reflector.views.rooms import router as rooms_router
 from reflector.views.rtc_offer import router as rtc_offer_router
@@ -100,6 +101,7 @@ app.include_router(zulip_router, prefix="/v1")
 app.include_router(whereby_router, prefix="/v1")
 app.include_router(daily_router, prefix="/v1/daily")
 app.include_router(conductor_router, prefix="/v1")
+app.include_router(hatchet_router, prefix="/v1")
 add_pagination(app)

 # prepare celery
--- a/server/reflector/hatchet/init.py
+++ b/server/reflector/hatchet/init.py
@@ -0,0 +1,6 @@
+"""Hatchet workflow orchestration for Reflector."""
+
+from reflector.hatchet.client import HatchetClientManager
+from reflector.hatchet.progress import emit_progress, emit_progress_async
+
+__all__ = ["HatchetClientManager", "emit_progress", "emit_progress_async"]
--- a/server/reflector/hatchet/client.py
+++ b/server/reflector/hatchet/client.py
@@ -0,0 +1,48 @@
+"""Hatchet Python client wrapper."""
+
+from hatchet_sdk import Hatchet
+
+from reflector.settings import settings
+
+
+class HatchetClientManager:
+    """Singleton manager for Hatchet client connections."""
+
+    _instance: Hatchet | None = None
+
+    @classmethod
+    def get_client(cls) -> Hatchet:
+        """Get or create the Hatchet client."""
+        if cls._instance is None:
+            if not settings.HATCHET_CLIENT_TOKEN:
+                raise ValueError("HATCHET_CLIENT_TOKEN must be set")
+
+            cls._instance = Hatchet(
+                debug=settings.HATCHET_DEBUG,
+            )
+        return cls._instance
+
+    @classmethod
+    async def start_workflow(
+        cls, workflow_name: str, input_data: dict, key: str | None = None
+    ) -> str:
+        """Start a workflow and return the workflow run ID."""
+        client = cls.get_client()
+        result = await client.runs.aio_create(
+            workflow_name,
+            input_data,
+        )
+        # SDK v1.21+ returns V1WorkflowRunDetails with run.metadata.id
+        return result.run.metadata.id
+
+    @classmethod
+    async def get_workflow_status(cls, workflow_run_id: str) -> dict:
+        """Get the current status of a workflow run."""
+        client = cls.get_client()
+        run = await client.runs.aio_get(workflow_run_id)
+        return run.to_dict()
+
+    @classmethod
+    def reset(cls) -> None:
+        """Reset the client instance (for testing)."""
+        cls._instance = None
--- a/server/reflector/hatchet/progress.py
+++ b/server/reflector/hatchet/progress.py
@@ -0,0 +1,120 @@
+"""Progress event emission for Hatchet workers."""
+
+import asyncio
+from typing import Literal
+
+from reflector.db.transcripts import PipelineProgressData
+from reflector.logger import logger
+from reflector.ws_manager import get_ws_manager
+
+# Step mapping for progress tracking (matches Conductor pipeline)
+PIPELINE_STEPS = {
+    "get_recording": 1,
+    "get_participants": 2,
+    "pad_track": 3,  # Fork tasks share same step
+    "mixdown_tracks": 4,
+    "generate_waveform": 5,
+    "transcribe_track": 6,  # Fork tasks share same step
+    "merge_transcripts": 7,
+    "detect_topics": 8,
+    "generate_title": 9,  # Fork tasks share same step
+    "generate_summary": 9,  # Fork tasks share same step
+    "finalize": 10,
+    "cleanup_consent": 11,
+    "post_zulip": 12,
+    "send_webhook": 13,
+}
+
+TOTAL_STEPS = 13
+
+
+async def _emit_progress_async(
+    transcript_id: str,
+    step: str,
+    status: Literal["pending", "in_progress", "completed", "failed"],
+    workflow_id: str | None = None,
+) -> None:
+    """Async implementation of progress emission."""
+    ws_manager = get_ws_manager()
+    step_index = PIPELINE_STEPS.get(step, 0)
+
+    data = PipelineProgressData(
+        workflow_id=workflow_id,
+        current_step=step,
+        step_index=step_index,
+        total_steps=TOTAL_STEPS,
+        step_status=status,
+    )
+
+    await ws_manager.send_json(
+        room_id=f"ts:{transcript_id}",
+        message={
+            "event": "PIPELINE_PROGRESS",
+            "data": data.model_dump(),
+        },
+    )
+
+    logger.debug(
+        "[Hatchet Progress] Emitted",
+        transcript_id=transcript_id,
+        step=step,
+        status=status,
+        step_index=step_index,
+    )
+
+
+def emit_progress(
+    transcript_id: str,
+    step: str,
+    status: Literal["pending", "in_progress", "completed", "failed"],
+    workflow_id: str | None = None,
+) -> None:
+    """Emit a pipeline progress event (sync wrapper for Hatchet workers).
+
+    Args:
+        transcript_id: The transcript ID to emit progress for
+        step: The current step name (e.g., "transcribe_track")
+        status: The step status
+        workflow_id: Optional workflow run ID
+    """
+    try:
+        # Get or create event loop for sync context
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = None
+
+        if loop is not None and loop.is_running():
+            # Already in async context, schedule the coroutine
+            asyncio.create_task(
+                _emit_progress_async(transcript_id, step, status, workflow_id)
+            )
+        else:
+            # Not in async context, run synchronously
+            asyncio.run(_emit_progress_async(transcript_id, step, status, workflow_id))
+    except Exception as e:
+        # Progress emission should never break the pipeline
+        logger.warning(
+            "[Hatchet Progress] Failed to emit progress event",
+            error=str(e),
+            transcript_id=transcript_id,
+            step=step,
+        )
+
+
+async def emit_progress_async(
+    transcript_id: str,
+    step: str,
+    status: Literal["pending", "in_progress", "completed", "failed"],
+    workflow_id: str | None = None,
+) -> None:
+    """Async version of emit_progress for use in async Hatchet tasks."""
+    try:
+        await _emit_progress_async(transcript_id, step, status, workflow_id)
+    except Exception as e:
+        logger.warning(
+            "[Hatchet Progress] Failed to emit progress event",
+            error=str(e),
+            transcript_id=transcript_id,
+            step=step,
+        )
--- a/server/reflector/hatchet/run_workers.py
+++ b/server/reflector/hatchet/run_workers.py
@@ -0,0 +1,59 @@
+"""
+Run Hatchet workers for the diarization pipeline.
+
+Usage:
+    uv run -m reflector.hatchet.run_workers
+
+    # Or via docker:
+    docker compose exec server uv run -m reflector.hatchet.run_workers
+"""
+
+import signal
+import sys
+
+from reflector.logger import logger
+from reflector.settings import settings
+
+
+def main() -> None:
+    """Start Hatchet worker polling."""
+    if not settings.HATCHET_ENABLED:
+        logger.error("HATCHET_ENABLED is False, not starting workers")
+        sys.exit(1)
+
+    if not settings.HATCHET_CLIENT_TOKEN:
+        logger.error("HATCHET_CLIENT_TOKEN is not set")
+        sys.exit(1)
+
+    logger.info(
+        "Starting Hatchet workers",
+        debug=settings.HATCHET_DEBUG,
+    )
+
+    # Import workflows to register them
+    from reflector.hatchet.client import HatchetClientManager
+    from reflector.hatchet.workflows import diarization_pipeline, track_workflow
+
+    hatchet = HatchetClientManager.get_client()
+
+    # Create worker with both workflows
+    worker = hatchet.worker(
+        "reflector-diarization-worker",
+        workflows=[diarization_pipeline, track_workflow],
+    )
+
+    # Handle graceful shutdown
+    def shutdown_handler(signum: int, frame) -> None:
+        logger.info("Received shutdown signal, stopping workers...")
+        # Worker cleanup happens automatically on exit
+        sys.exit(0)
+
+    signal.signal(signal.SIGINT, shutdown_handler)
+    signal.signal(signal.SIGTERM, shutdown_handler)
+
+    logger.info("Starting Hatchet worker polling...")
+    worker.start()
+
+
+if __name__ == "__main__":
+    main()
--- a/server/reflector/hatchet/workflows/init.py
+++ b/server/reflector/hatchet/workflows/init.py
@@ -0,0 +1,14 @@
+"""Hatchet workflow definitions."""
+
+from reflector.hatchet.workflows.diarization_pipeline import (
+    PipelineInput,
+    diarization_pipeline,
+)
+from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
+
+__all__ = [
+    "diarization_pipeline",
+    "track_workflow",
+    "PipelineInput",
+    "TrackInput",
+]
--- a/server/reflector/hatchet/workflows/diarization_pipeline.py
+++ b/server/reflector/hatchet/workflows/diarization_pipeline.py
@@ -0,0 +1,808 @@
+"""
+Hatchet main workflow: DiarizationPipeline
+
+Multitrack diarization pipeline for Daily.co recordings.
+Orchestrates the full processing flow from recording metadata to final transcript.
+"""
+
+import asyncio
+import tempfile
+from datetime import timedelta
+from pathlib import Path
+
+import av
+from hatchet_sdk import Context
+from pydantic import BaseModel
+
+from reflector.hatchet.client import HatchetClientManager
+from reflector.hatchet.progress import emit_progress_async
+from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
+from reflector.logger import logger
+
+# Audio constants
+OPUS_STANDARD_SAMPLE_RATE = 48000
+OPUS_DEFAULT_BIT_RATE = 64000
+PRESIGNED_URL_EXPIRATION_SECONDS = 7200
+
+
+class PipelineInput(BaseModel):
+    """Input to trigger the diarization pipeline."""
+
+    recording_id: str | None
+    room_name: str | None
+    tracks: list[dict]  # List of {"s3_key": str}
+    bucket_name: str
+    transcript_id: str
+    room_id: str | None = None
+
+
+# Get hatchet client and define workflow
+hatchet = HatchetClientManager.get_client()
+
+diarization_pipeline = hatchet.workflow(
+    name="DiarizationPipeline", input_validator=PipelineInput
+)
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+
+async def _get_fresh_db_connection():
+    """Create fresh database connection for subprocess."""
+    import databases
+
+    from reflector.db import _database_context
+    from reflector.settings import settings
+
+    _database_context.set(None)
+    db = databases.Database(settings.DATABASE_URL)
+    _database_context.set(db)
+    await db.connect()
+    return db
+
+
+async def _close_db_connection(db):
+    """Close database connection."""
+    from reflector.db import _database_context
+
+    await db.disconnect()
+    _database_context.set(None)
+
+
+def _get_storage():
+    """Create fresh storage instance."""
+    from reflector.settings import settings
+    from reflector.storage.storage_aws import AwsStorage
+
+    return AwsStorage(
+        aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
+        aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
+        aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
+        aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
+    )
+
+
+# ============================================================================
+# Pipeline Tasks
+# ============================================================================
+
+
+@diarization_pipeline.task(execution_timeout=timedelta(seconds=60), retries=3)
+async def get_recording(input: PipelineInput, ctx: Context) -> dict:
+    """Fetch recording metadata from Daily.co API."""
+    logger.info("[Hatchet] get_recording", recording_id=input.recording_id)
+
+    await emit_progress_async(
+        input.transcript_id, "get_recording", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        from reflector.dailyco_api.client import DailyApiClient
+        from reflector.settings import settings
+
+        if not input.recording_id:
+            # No recording_id in reprocess path - return minimal data
+            await emit_progress_async(
+                input.transcript_id, "get_recording", "completed", ctx.workflow_run_id
+            )
+            return {
+                "id": None,
+                "mtg_session_id": None,
+                "room_name": input.room_name,
+                "duration": 0,
+            }
+
+        if not settings.DAILY_API_KEY:
+            raise ValueError("DAILY_API_KEY not configured")
+
+        async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
+            recording = await client.get_recording(input.recording_id)
+
+        logger.info(
+            "[Hatchet] get_recording complete",
+            recording_id=input.recording_id,
+            room_name=recording.room_name,
+            duration=recording.duration,
+        )
+
+        await emit_progress_async(
+            input.transcript_id, "get_recording", "completed", ctx.workflow_run_id
+        )
+
+        return {
+            "id": recording.id,
+            "mtg_session_id": recording.mtgSessionId,
+            "room_name": recording.room_name,
+            "duration": recording.duration,
+        }
+
+    except Exception as e:
+        logger.error("[Hatchet] get_recording failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "get_recording", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[get_recording], execution_timeout=timedelta(seconds=60), retries=3
+)
+async def get_participants(input: PipelineInput, ctx: Context) -> dict:
+    """Fetch participant list from Daily.co API."""
+    logger.info("[Hatchet] get_participants", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "get_participants", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        recording_data = ctx.task_output(get_recording)
+        mtg_session_id = recording_data.get("mtg_session_id")
+
+        from reflector.dailyco_api.client import DailyApiClient
+        from reflector.settings import settings
+
+        if not mtg_session_id or not settings.DAILY_API_KEY:
+            # Return empty participants if no session ID
+            await emit_progress_async(
+                input.transcript_id,
+                "get_participants",
+                "completed",
+                ctx.workflow_run_id,
+            )
+            return {"participants": [], "num_tracks": len(input.tracks)}
+
+        async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
+            participants = await client.get_meeting_participants(mtg_session_id)
+
+        participants_list = [
+            {"participant_id": p.participant_id, "user_name": p.user_name}
+            for p in participants.data
+        ]
+
+        logger.info(
+            "[Hatchet] get_participants complete",
+            participant_count=len(participants_list),
+        )
+
+        await emit_progress_async(
+            input.transcript_id, "get_participants", "completed", ctx.workflow_run_id
+        )
+
+        return {"participants": participants_list, "num_tracks": len(input.tracks)}
+
+    except Exception as e:
+        logger.error("[Hatchet] get_participants failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "get_participants", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[get_participants], execution_timeout=timedelta(seconds=600), retries=3
+)
+async def process_tracks(input: PipelineInput, ctx: Context) -> dict:
+    """Spawn child workflows for each track (dynamic fan-out).
+
+    Processes pad_track and transcribe_track for each audio track in parallel.
+    """
+    logger.info(
+        "[Hatchet] process_tracks",
+        num_tracks=len(input.tracks),
+        transcript_id=input.transcript_id,
+    )
+
+    # Spawn child workflows for each track
+    child_coroutines = [
+        track_workflow.aio_run(
+            TrackInput(
+                track_index=i,
+                s3_key=track["s3_key"],
+                bucket_name=input.bucket_name,
+                transcript_id=input.transcript_id,
+            )
+        )
+        for i, track in enumerate(input.tracks)
+    ]
+
+    # Wait for all child workflows to complete
+    results = await asyncio.gather(*child_coroutines)
+
+    # Collect all track results
+    all_words = []
+    padded_urls = []
+
+    for result in results:
+        transcribe_result = result.get("transcribe_track", {})
+        all_words.extend(transcribe_result.get("words", []))
+
+        pad_result = result.get("pad_track", {})
+        padded_urls.append(pad_result.get("padded_url"))
+
+    # Sort words by start time
+    all_words.sort(key=lambda w: w.get("start", 0))
+
+    logger.info(
+        "[Hatchet] process_tracks complete",
+        num_tracks=len(input.tracks),
+        total_words=len(all_words),
+    )
+
+    return {
+        "all_words": all_words,
+        "padded_urls": padded_urls,
+        "word_count": len(all_words),
+        "num_tracks": len(input.tracks),
+    }
+
+
+@diarization_pipeline.task(
+    parents=[process_tracks], execution_timeout=timedelta(seconds=300), retries=3
+)
+async def mixdown_tracks(input: PipelineInput, ctx: Context) -> dict:
+    """Mix all padded tracks into single audio file."""
+    logger.info("[Hatchet] mixdown_tracks", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "mixdown_tracks", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        track_data = ctx.task_output(process_tracks)
+        padded_urls = track_data.get("padded_urls", [])
+
+        if not padded_urls:
+            raise ValueError("No padded tracks to mixdown")
+
+        storage = _get_storage()
+
+        # Download all tracks and mix
+        temp_inputs = []
+        try:
+            for i, url in enumerate(padded_urls):
+                if not url:
+                    continue
+                temp_input = tempfile.NamedTemporaryFile(suffix=".webm", delete=False)
+                temp_inputs.append(temp_input.name)
+
+                # Download track
+                import httpx
+
+                async with httpx.AsyncClient() as client:
+                    response = await client.get(url)
+                    response.raise_for_status()
+                    with open(temp_input.name, "wb") as f:
+                        f.write(response.content)
+
+            # Mix using PyAV amix filter
+            if len(temp_inputs) == 0:
+                raise ValueError("No valid tracks to mixdown")
+
+            output_path = tempfile.mktemp(suffix=".mp3")
+
+            try:
+                # Use ffmpeg-style mixing via PyAV
+                containers = [av.open(path) for path in temp_inputs]
+
+                # Get the longest duration
+                max_duration = 0.0
+                for container in containers:
+                    if container.duration:
+                        duration = float(container.duration * av.time_base)
+                        max_duration = max(max_duration, duration)
+
+                # Close containers for now
+                for container in containers:
+                    container.close()
+
+                # Use subprocess for mixing (simpler than complex PyAV graph)
+                import subprocess
+
+                # Build ffmpeg command
+                cmd = ["ffmpeg", "-y"]
+                for path in temp_inputs:
+                    cmd.extend(["-i", path])
+
+                # Build filter for N inputs
+                n = len(temp_inputs)
+                filter_str = f"amix=inputs={n}:duration=longest:normalize=0"
+                cmd.extend(["-filter_complex", filter_str])
+                cmd.extend(["-ac", "2", "-ar", "48000", "-b:a", "128k", output_path])
+
+                subprocess.run(cmd, check=True, capture_output=True)
+
+                # Upload mixed file
+                file_size = Path(output_path).stat().st_size
+                storage_path = f"file_pipeline_hatchet/{input.transcript_id}/mixed.mp3"
+
+                with open(output_path, "rb") as mixed_file:
+                    await storage.put_file(storage_path, mixed_file)
+
+                logger.info(
+                    "[Hatchet] mixdown_tracks uploaded",
+                    key=storage_path,
+                    size=file_size,
+                )
+
+            finally:
+                Path(output_path).unlink(missing_ok=True)
+
+        finally:
+            for path in temp_inputs:
+                Path(path).unlink(missing_ok=True)
+
+        await emit_progress_async(
+            input.transcript_id, "mixdown_tracks", "completed", ctx.workflow_run_id
+        )
+
+        return {
+            "audio_key": storage_path,
+            "duration": max_duration,
+            "tracks_mixed": len(temp_inputs),
+        }
+
+    except Exception as e:
+        logger.error("[Hatchet] mixdown_tracks failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "mixdown_tracks", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[mixdown_tracks], execution_timeout=timedelta(seconds=120), retries=3
+)
+async def generate_waveform(input: PipelineInput, ctx: Context) -> dict:
+    """Generate audio waveform visualization."""
+    logger.info("[Hatchet] generate_waveform", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "generate_waveform", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        mixdown_data = ctx.task_output(mixdown_tracks)
+        audio_key = mixdown_data.get("audio_key")
+
+        storage = _get_storage()
+        audio_url = await storage.get_file_url(
+            audio_key,
+            operation="get_object",
+            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
+        )
+
+        from reflector.pipelines.waveform_helpers import generate_waveform_data
+
+        waveform = await generate_waveform_data(audio_url)
+
+        # Store waveform
+        waveform_key = f"file_pipeline_hatchet/{input.transcript_id}/waveform.json"
+        import json
+
+        waveform_bytes = json.dumps(waveform).encode()
+        import io
+
+        await storage.put_file(waveform_key, io.BytesIO(waveform_bytes))
+
+        logger.info("[Hatchet] generate_waveform complete")
+
+        await emit_progress_async(
+            input.transcript_id, "generate_waveform", "completed", ctx.workflow_run_id
+        )
+
+        return {"waveform_key": waveform_key}
+
+    except Exception as e:
+        logger.error("[Hatchet] generate_waveform failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "generate_waveform", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[mixdown_tracks], execution_timeout=timedelta(seconds=300), retries=3
+)
+async def detect_topics(input: PipelineInput, ctx: Context) -> dict:
+    """Detect topics using LLM."""
+    logger.info("[Hatchet] detect_topics", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "detect_topics", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        track_data = ctx.task_output(process_tracks)
+        words = track_data.get("all_words", [])
+
+        from reflector.pipelines import topic_processing
+        from reflector.processors.types import Transcript as TranscriptType
+        from reflector.processors.types import Word
+
+        # Convert word dicts to Word objects
+        word_objects = [Word(**w) for w in words]
+        transcript = TranscriptType(words=word_objects)
+
+        empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
+
+        async def noop_callback(t):
+            pass
+
+        topics = await topic_processing.detect_topics(
+            transcript,
+            "en",  # target_language
+            on_topic_callback=noop_callback,
+            empty_pipeline=empty_pipeline,
+        )
+
+        topics_list = [t.model_dump() for t in topics]
+
+        logger.info("[Hatchet] detect_topics complete", topic_count=len(topics_list))
+
+        await emit_progress_async(
+            input.transcript_id, "detect_topics", "completed", ctx.workflow_run_id
+        )
+
+        return {"topics": topics_list}
+
+    except Exception as e:
+        logger.error("[Hatchet] detect_topics failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "detect_topics", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[detect_topics], execution_timeout=timedelta(seconds=120), retries=3
+)
+async def generate_title(input: PipelineInput, ctx: Context) -> dict:
+    """Generate meeting title using LLM."""
+    logger.info("[Hatchet] generate_title", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "generate_title", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        topics_data = ctx.task_output(detect_topics)
+        topics = topics_data.get("topics", [])
+
+        from reflector.pipelines import topic_processing
+        from reflector.processors.types import Topic
+
+        topic_objects = [Topic(**t) for t in topics]
+
+        title = await topic_processing.generate_title(topic_objects)
+
+        logger.info("[Hatchet] generate_title complete", title=title)
+
+        await emit_progress_async(
+            input.transcript_id, "generate_title", "completed", ctx.workflow_run_id
+        )
+
+        return {"title": title}
+
+    except Exception as e:
+        logger.error("[Hatchet] generate_title failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "generate_title", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[detect_topics], execution_timeout=timedelta(seconds=300), retries=3
+)
+async def generate_summary(input: PipelineInput, ctx: Context) -> dict:
+    """Generate meeting summary using LLM."""
+    logger.info("[Hatchet] generate_summary", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "generate_summary", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        track_data = ctx.task_output(process_tracks)
+        topics_data = ctx.task_output(detect_topics)
+
+        words = track_data.get("all_words", [])
+        topics = topics_data.get("topics", [])
+
+        from reflector.pipelines import topic_processing
+        from reflector.processors.types import Topic, Word
+        from reflector.processors.types import Transcript as TranscriptType
+
+        word_objects = [Word(**w) for w in words]
+        transcript = TranscriptType(words=word_objects)
+        topic_objects = [Topic(**t) for t in topics]
+
+        summary, short_summary = await topic_processing.generate_summary(
+            transcript, topic_objects
+        )
+
+        logger.info("[Hatchet] generate_summary complete")
+
+        await emit_progress_async(
+            input.transcript_id, "generate_summary", "completed", ctx.workflow_run_id
+        )
+
+        return {"summary": summary, "short_summary": short_summary}
+
+    except Exception as e:
+        logger.error("[Hatchet] generate_summary failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "generate_summary", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[generate_waveform, generate_title, generate_summary],
+    execution_timeout=timedelta(seconds=60),
+    retries=3,
+)
+async def finalize(input: PipelineInput, ctx: Context) -> dict:
+    """Finalize transcript status and update database."""
+    logger.info("[Hatchet] finalize", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "finalize", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        title_data = ctx.task_output(generate_title)
+        summary_data = ctx.task_output(generate_summary)
+        mixdown_data = ctx.task_output(mixdown_tracks)
+        track_data = ctx.task_output(process_tracks)
+
+        title = title_data.get("title", "")
+        summary = summary_data.get("summary", "")
+        short_summary = summary_data.get("short_summary", "")
+        duration = mixdown_data.get("duration", 0)
+        all_words = track_data.get("all_words", [])
+
+        db = await _get_fresh_db_connection()
+
+        try:
+            from reflector.db.transcripts import transcripts_controller
+            from reflector.processors.types import Word
+
+            transcript = await transcripts_controller.get_by_id(input.transcript_id)
+            if transcript is None:
+                raise ValueError(
+                    f"Transcript {input.transcript_id} not found in database"
+                )
+
+            # Convert words back to Word objects for storage
+            word_objects = [Word(**w) for w in all_words]
+
+            await transcripts_controller.update(
+                transcript,
+                {
+                    "status": "ended",
+                    "title": title,
+                    "long_summary": summary,
+                    "short_summary": short_summary,
+                    "duration": duration,
+                    "words": word_objects,
+                },
+            )
+
+            logger.info(
+                "[Hatchet] finalize complete", transcript_id=input.transcript_id
+            )
+
+        finally:
+            await _close_db_connection(db)
+
+        await emit_progress_async(
+            input.transcript_id, "finalize", "completed", ctx.workflow_run_id
+        )
+
+        return {"status": "COMPLETED"}
+
+    except Exception as e:
+        logger.error("[Hatchet] finalize failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "finalize", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[finalize], execution_timeout=timedelta(seconds=60), retries=3
+)
+async def cleanup_consent(input: PipelineInput, ctx: Context) -> dict:
+    """Check and handle consent requirements."""
+    logger.info("[Hatchet] cleanup_consent", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "cleanup_consent", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        db = await _get_fresh_db_connection()
+
+        try:
+            from reflector.db.meetings import meetings_controller
+            from reflector.db.transcripts import transcripts_controller
+
+            transcript = await transcripts_controller.get_by_id(input.transcript_id)
+            if transcript and transcript.meeting_id:
+                meeting = await meetings_controller.get_by_id(transcript.meeting_id)
+                if meeting:
+                    # Check consent logic here
+                    # For now just mark as checked
+                    pass
+
+            logger.info(
+                "[Hatchet] cleanup_consent complete", transcript_id=input.transcript_id
+            )
+
+        finally:
+            await _close_db_connection(db)
+
+        await emit_progress_async(
+            input.transcript_id, "cleanup_consent", "completed", ctx.workflow_run_id
+        )
+
+        return {"consent_checked": True}
+
+    except Exception as e:
+        logger.error("[Hatchet] cleanup_consent failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "cleanup_consent", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[cleanup_consent], execution_timeout=timedelta(seconds=60), retries=5
+)
+async def post_zulip(input: PipelineInput, ctx: Context) -> dict:
+    """Post notification to Zulip."""
+    logger.info("[Hatchet] post_zulip", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "post_zulip", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        from reflector.settings import settings
+
+        if not settings.ZULIP_REALM:
+            logger.info("[Hatchet] post_zulip skipped (Zulip not configured)")
+            await emit_progress_async(
+                input.transcript_id, "post_zulip", "completed", ctx.workflow_run_id
+            )
+            return {"zulip_message_id": None, "skipped": True}
+
+        from reflector.zulip import post_transcript_notification
+
+        db = await _get_fresh_db_connection()
+
+        try:
+            from reflector.db.transcripts import transcripts_controller
+
+            transcript = await transcripts_controller.get_by_id(input.transcript_id)
+            if transcript:
+                message_id = await post_transcript_notification(transcript)
+                logger.info(
+                    "[Hatchet] post_zulip complete", zulip_message_id=message_id
+                )
+            else:
+                message_id = None
+
+        finally:
+            await _close_db_connection(db)
+
+        await emit_progress_async(
+            input.transcript_id, "post_zulip", "completed", ctx.workflow_run_id
+        )
+
+        return {"zulip_message_id": message_id}
+
+    except Exception as e:
+        logger.error("[Hatchet] post_zulip failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "post_zulip", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@diarization_pipeline.task(
+    parents=[post_zulip], execution_timeout=timedelta(seconds=120), retries=30
+)
+async def send_webhook(input: PipelineInput, ctx: Context) -> dict:
+    """Send completion webhook to external service."""
+    logger.info("[Hatchet] send_webhook", transcript_id=input.transcript_id)
+
+    await emit_progress_async(
+        input.transcript_id, "send_webhook", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        if not input.room_id:
+            logger.info("[Hatchet] send_webhook skipped (no room_id)")
+            await emit_progress_async(
+                input.transcript_id, "send_webhook", "completed", ctx.workflow_run_id
+            )
+            return {"webhook_sent": False, "skipped": True}
+
+        db = await _get_fresh_db_connection()
+
+        try:
+            from reflector.db.rooms import rooms_controller
+            from reflector.db.transcripts import transcripts_controller
+
+            room = await rooms_controller.get_by_id(input.room_id)
+            transcript = await transcripts_controller.get_by_id(input.transcript_id)
+
+            if room and room.webhook_url and transcript:
+                import httpx
+
+                webhook_payload = {
+                    "event": "transcript.completed",
+                    "transcript_id": input.transcript_id,
+                    "title": transcript.title,
+                    "duration": transcript.duration,
+                }
+
+                async with httpx.AsyncClient() as client:
+                    response = await client.post(
+                        room.webhook_url, json=webhook_payload, timeout=30
+                    )
+                    response.raise_for_status()
+
+                logger.info(
+                    "[Hatchet] send_webhook complete", status_code=response.status_code
+                )
+
+                await emit_progress_async(
+                    input.transcript_id,
+                    "send_webhook",
+                    "completed",
+                    ctx.workflow_run_id,
+                )
+
+                return {"webhook_sent": True, "response_code": response.status_code}
+
+        finally:
+            await _close_db_connection(db)
+
+        await emit_progress_async(
+            input.transcript_id, "send_webhook", "completed", ctx.workflow_run_id
+        )
+
+        return {"webhook_sent": False, "skipped": True}
+
+    except Exception as e:
+        logger.error("[Hatchet] send_webhook failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "send_webhook", "failed", ctx.workflow_run_id
+        )
+        raise
--- a/server/reflector/hatchet/workflows/track_processing.py
+++ b/server/reflector/hatchet/workflows/track_processing.py
@@ -0,0 +1,337 @@
+"""
+Hatchet child workflow: TrackProcessing
+
+Handles individual audio track processing: padding and transcription.
+Spawned dynamically by the main diarization pipeline for each track.
+"""
+
+import math
+import tempfile
+from datetime import timedelta
+from fractions import Fraction
+from pathlib import Path
+
+import av
+from av.audio.resampler import AudioResampler
+from hatchet_sdk import Context
+from pydantic import BaseModel
+
+from reflector.hatchet.client import HatchetClientManager
+from reflector.hatchet.progress import emit_progress_async
+from reflector.logger import logger
+
+# Audio constants matching existing pipeline
+OPUS_STANDARD_SAMPLE_RATE = 48000
+OPUS_DEFAULT_BIT_RATE = 64000
+PRESIGNED_URL_EXPIRATION_SECONDS = 7200
+
+
+class TrackInput(BaseModel):
+    """Input for individual track processing."""
+
+    track_index: int
+    s3_key: str
+    bucket_name: str
+    transcript_id: str
+    language: str = "en"
+
+
+# Get hatchet client and define workflow
+hatchet = HatchetClientManager.get_client()
+
+track_workflow = hatchet.workflow(name="TrackProcessing", input_validator=TrackInput)
+
+
+def _extract_stream_start_time_from_container(container, track_idx: int) -> float:
+    """Extract meeting-relative start time from WebM stream metadata.
+
+    Uses PyAV to read stream.start_time from WebM container.
+    More accurate than filename timestamps by ~209ms due to network/encoding delays.
+    """
+    start_time_seconds = 0.0
+    try:
+        audio_streams = [s for s in container.streams if s.type == "audio"]
+        stream = audio_streams[0] if audio_streams else container.streams[0]
+
+        # 1) Try stream-level start_time (most reliable for Daily.co tracks)
+        if stream.start_time is not None and stream.time_base is not None:
+            start_time_seconds = float(stream.start_time * stream.time_base)
+
+        # 2) Fallback to container-level start_time
+        if (start_time_seconds <= 0) and (container.start_time is not None):
+            start_time_seconds = float(container.start_time * av.time_base)
+
+        # 3) Fallback to first packet DTS
+        if start_time_seconds <= 0:
+            for packet in container.demux(stream):
+                if packet.dts is not None:
+                    start_time_seconds = float(packet.dts * stream.time_base)
+                    break
+    except Exception as e:
+        logger.warning(
+            "PyAV metadata read failed; assuming 0 start_time",
+            track_idx=track_idx,
+            error=str(e),
+        )
+        start_time_seconds = 0.0
+
+    logger.info(
+        f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s",
+        track_idx=track_idx,
+    )
+    return start_time_seconds
+
+
+def _apply_audio_padding_to_file(
+    in_container,
+    output_path: str,
+    start_time_seconds: float,
+    track_idx: int,
+) -> None:
+    """Apply silence padding to audio track using PyAV filter graph."""
+    delay_ms = math.floor(start_time_seconds * 1000)
+
+    logger.info(
+        f"Padding track {track_idx} with {delay_ms}ms delay using PyAV",
+        track_idx=track_idx,
+        delay_ms=delay_ms,
+    )
+
+    with av.open(output_path, "w", format="webm") as out_container:
+        in_stream = next((s for s in in_container.streams if s.type == "audio"), None)
+        if in_stream is None:
+            raise Exception("No audio stream in input")
+
+        out_stream = out_container.add_stream("libopus", rate=OPUS_STANDARD_SAMPLE_RATE)
+        out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
+        graph = av.filter.Graph()
+
+        abuf_args = (
+            f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
+            f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
+            f"sample_fmt=s16:"
+            f"channel_layout=stereo"
+        )
+        src = graph.add("abuffer", args=abuf_args, name="src")
+        aresample_f = graph.add("aresample", args="async=1", name="ares")
+        delays_arg = f"{delay_ms}|{delay_ms}"
+        adelay_f = graph.add("adelay", args=f"delays={delays_arg}:all=1", name="delay")
+        sink = graph.add("abuffersink", name="sink")
+
+        src.link_to(aresample_f)
+        aresample_f.link_to(adelay_f)
+        adelay_f.link_to(sink)
+        graph.configure()
+
+        resampler = AudioResampler(
+            format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
+        )
+
+        for frame in in_container.decode(in_stream):
+            out_frames = resampler.resample(frame) or []
+            for rframe in out_frames:
+                rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
+                rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
+                src.push(rframe)
+
+                while True:
+                    try:
+                        f_out = sink.pull()
+                    except Exception:
+                        break
+                    f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
+                    f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
+                    for packet in out_stream.encode(f_out):
+                        out_container.mux(packet)
+
+        # Flush remaining frames
+        src.push(None)
+        while True:
+            try:
+                f_out = sink.pull()
+            except Exception:
+                break
+            f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
+            f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
+            for packet in out_stream.encode(f_out):
+                out_container.mux(packet)
+
+        for packet in out_stream.encode(None):
+            out_container.mux(packet)
+
+
+@track_workflow.task(execution_timeout=timedelta(seconds=300), retries=3)
+async def pad_track(input: TrackInput, ctx: Context) -> dict:
+    """Pad single audio track with silence for alignment.
+
+    Extracts stream.start_time from WebM container metadata and applies
+    silence padding using PyAV filter graph (adelay).
+    """
+    logger.info(
+        "[Hatchet] pad_track",
+        track_index=input.track_index,
+        s3_key=input.s3_key,
+        transcript_id=input.transcript_id,
+    )
+
+    await emit_progress_async(
+        input.transcript_id, "pad_track", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        # Create fresh storage instance to avoid aioboto3 fork issues
+        from reflector.settings import settings
+        from reflector.storage.storage_aws import AwsStorage
+
+        storage = AwsStorage(
+            aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
+            aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
+            aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
+            aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
+        )
+
+        # Get presigned URL for source file
+        source_url = await storage.get_file_url(
+            input.s3_key,
+            operation="get_object",
+            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
+            bucket=input.bucket_name,
+        )
+
+        # Open container and extract start time
+        with av.open(source_url) as in_container:
+            start_time_seconds = _extract_stream_start_time_from_container(
+                in_container, input.track_index
+            )
+
+            # If no padding needed, return original URL
+            if start_time_seconds <= 0:
+                logger.info(
+                    f"Track {input.track_index} requires no padding",
+                    track_index=input.track_index,
+                )
+                await emit_progress_async(
+                    input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
+                )
+                return {
+                    "padded_url": source_url,
+                    "size": 0,
+                    "track_index": input.track_index,
+                }
+
+            # Create temp file for padded output
+            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
+                temp_path = temp_file.name
+
+            try:
+                _apply_audio_padding_to_file(
+                    in_container, temp_path, start_time_seconds, input.track_index
+                )
+
+                file_size = Path(temp_path).stat().st_size
+                storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
+
+                logger.info(
+                    f"About to upload padded track",
+                    key=storage_path,
+                    size=file_size,
+                )
+
+                with open(temp_path, "rb") as padded_file:
+                    await storage.put_file(storage_path, padded_file)
+
+                logger.info(
+                    f"Uploaded padded track to S3",
+                    key=storage_path,
+                    size=file_size,
+                )
+            finally:
+                Path(temp_path).unlink(missing_ok=True)
+
+        # Get presigned URL for padded file
+        padded_url = await storage.get_file_url(
+            storage_path,
+            operation="get_object",
+            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
+        )
+
+        logger.info(
+            "[Hatchet] pad_track complete",
+            track_index=input.track_index,
+            padded_url=padded_url[:50] + "...",
+        )
+
+        await emit_progress_async(
+            input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
+        )
+
+        return {
+            "padded_url": padded_url,
+            "size": file_size,
+            "track_index": input.track_index,
+        }
+
+    except Exception as e:
+        logger.error("[Hatchet] pad_track failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "pad_track", "failed", ctx.workflow_run_id
+        )
+        raise
+
+
+@track_workflow.task(
+    parents=[pad_track], execution_timeout=timedelta(seconds=600), retries=3
+)
+async def transcribe_track(input: TrackInput, ctx: Context) -> dict:
+    """Transcribe audio track using GPU (Modal.com) or local Whisper."""
+    logger.info(
+        "[Hatchet] transcribe_track",
+        track_index=input.track_index,
+        language=input.language,
+    )
+
+    await emit_progress_async(
+        input.transcript_id, "transcribe_track", "in_progress", ctx.workflow_run_id
+    )
+
+    try:
+        pad_result = ctx.task_output(pad_track)
+        audio_url = pad_result.get("padded_url")
+
+        if not audio_url:
+            raise ValueError("Missing padded_url from pad_track")
+
+        from reflector.pipelines.transcription_helpers import (
+            transcribe_file_with_processor,
+        )
+
+        transcript = await transcribe_file_with_processor(audio_url, input.language)
+
+        # Tag all words with speaker index
+        words = []
+        for word in transcript.words:
+            word_dict = word.model_dump()
+            word_dict["speaker"] = input.track_index
+            words.append(word_dict)
+
+        logger.info(
+            "[Hatchet] transcribe_track complete",
+            track_index=input.track_index,
+            word_count=len(words),
+        )
+
+        await emit_progress_async(
+            input.transcript_id, "transcribe_track", "completed", ctx.workflow_run_id
+        )
+
+        return {
+            "words": words,
+            "track_index": input.track_index,
+        }
+
+    except Exception as e:
+        logger.error("[Hatchet] transcribe_track failed", error=str(e), exc_info=True)
+        await emit_progress_async(
+            input.transcript_id, "transcribe_track", "failed", ctx.workflow_run_id
+        )
+        raise
--- a/server/reflector/services/transcript_process.py
+++ b/server/reflector/services/transcript_process.py
@@ -15,6 +15,7 @@ from celery.result import AsyncResult
 from reflector.conductor.client import ConductorClientManager
 from reflector.db.recordings import recordings_controller
 from reflector.db.transcripts import Transcript
+from reflector.hatchet.client import HatchetClientManager
 from reflector.logger import logger
 from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
 from reflector.pipelines.main_multitrack_pipeline import (
@@ -156,8 +157,47 @@ async def prepare_transcript_processing(

 def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult | None:
    if isinstance(config, MultitrackProcessingConfig):
-        # Start Conductor workflow if enabled
-        if settings.CONDUCTOR_ENABLED:
+        # Start durable workflow if enabled (Hatchet or Conductor)
+        durable_started = False
+
+        if settings.HATCHET_ENABLED:
+            import asyncio
+
+            async def _start_hatchet():
+                return await HatchetClientManager.start_workflow(
+                    workflow_name="DiarizationPipeline",
+                    input_data={
+                        "recording_id": config.recording_id,
+                        "room_name": None,  # Not available in reprocess path
+                        "tracks": [{"s3_key": k} for k in config.track_keys],
+                        "bucket_name": config.bucket_name,
+                        "transcript_id": config.transcript_id,
+                        "room_id": config.room_id,
+                    },
+                )
+
+            try:
+                loop = asyncio.get_running_loop()
+            except RuntimeError:
+                loop = None
+
+            if loop and loop.is_running():
+                # Already in async context
+                import concurrent.futures
+
+                with concurrent.futures.ThreadPoolExecutor() as pool:
+                    workflow_id = pool.submit(asyncio.run, _start_hatchet()).result()
+            else:
+                workflow_id = asyncio.run(_start_hatchet())
+
+            logger.info(
+                "Started Hatchet workflow (reprocess)",
+                workflow_id=workflow_id,
+                transcript_id=config.transcript_id,
+            )
+            durable_started = True
+
+        elif settings.CONDUCTOR_ENABLED:
            workflow_id = ConductorClientManager.start_workflow(
                name="diarization_pipeline",
                version=1,
@@ -175,11 +215,13 @@ def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult | No
                workflow_id=workflow_id,
                transcript_id=config.transcript_id,
            )
+            durable_started = True

-            if not settings.CONDUCTOR_SHADOW_MODE:
-                return None  # Conductor-only, no Celery result
+        # If durable workflow started and not in shadow mode, skip Celery
+        if durable_started and not settings.DURABLE_WORKFLOW_SHADOW_MODE:
+            return None

-        # Celery pipeline (shadow mode or Conductor disabled)
+        # Celery pipeline (shadow mode or durable workflows disabled)
        return task_pipeline_multitrack_process.delay(
            transcript_id=config.transcript_id,
            bucket_name=config.bucket_name,
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -150,11 +150,34 @@ class Settings(BaseSettings):
    ZULIP_API_KEY: str | None = None
    ZULIP_BOT_EMAIL: str | None = None

+    # Durable workflow orchestration
+    # Provider: "hatchet" or "conductor" (or "none" to disable)
+    DURABLE_WORKFLOW_PROVIDER: str = "none"
+    DURABLE_WORKFLOW_SHADOW_MODE: bool = False  # Run both provider + Celery
+
    # Conductor workflow orchestration
    CONDUCTOR_SERVER_URL: str = "http://conductor:8080/api"
    CONDUCTOR_DEBUG: bool = False
-    CONDUCTOR_ENABLED: bool = False
-    CONDUCTOR_SHADOW_MODE: bool = False
+
+    # Hatchet workflow orchestration
+    HATCHET_CLIENT_TOKEN: str | None = None
+    HATCHET_CLIENT_TLS_STRATEGY: str = "none"  # none, tls, mtls
+    HATCHET_DEBUG: bool = False
+
+    @property
+    def CONDUCTOR_ENABLED(self) -> bool:
+        """Legacy compatibility: True if Conductor is the active provider."""
+        return self.DURABLE_WORKFLOW_PROVIDER == "conductor"
+
+    @property
+    def HATCHET_ENABLED(self) -> bool:
+        """True if Hatchet is the active provider."""
+        return self.DURABLE_WORKFLOW_PROVIDER == "hatchet"
+
+    @property
+    def CONDUCTOR_SHADOW_MODE(self) -> bool:
+        """Legacy compatibility for shadow mode."""
+        return self.DURABLE_WORKFLOW_SHADOW_MODE and self.CONDUCTOR_ENABLED


 settings = Settings()
--- a/server/reflector/views/hatchet.py
+++ b/server/reflector/views/hatchet.py
@@ -0,0 +1,57 @@
+"""Hatchet health and status endpoints."""
+
+from fastapi import APIRouter
+
+from reflector.settings import settings
+
+router = APIRouter(prefix="/hatchet", tags=["hatchet"])
+
+
+@router.get("/health")
+async def hatchet_health():
+    """Check Hatchet connectivity and status."""
+    if not settings.HATCHET_ENABLED:
+        return {"status": "disabled", "connected": False}
+
+    if not settings.HATCHET_CLIENT_TOKEN:
+        return {
+            "status": "unhealthy",
+            "connected": False,
+            "error": "HATCHET_CLIENT_TOKEN not configured",
+        }
+
+    try:
+        from reflector.hatchet.client import HatchetClientManager
+
+        # Get client to verify token is valid
+        client = HatchetClientManager.get_client()
+
+        # Try to get the client's gRPC connection status
+        # The SDK doesn't have a simple health check, so we just verify we can create the client
+        if client is not None:
+            return {"status": "healthy", "connected": True}
+        else:
+            return {
+                "status": "unhealthy",
+                "connected": False,
+                "error": "Failed to create client",
+            }
+    except ValueError as e:
+        return {"status": "unhealthy", "connected": False, "error": str(e)}
+    except Exception as e:
+        return {"status": "unhealthy", "connected": False, "error": str(e)}
+
+
+@router.get("/workflow/{workflow_run_id}")
+async def get_workflow_status(workflow_run_id: str):
+    """Get the status of a workflow run."""
+    if not settings.HATCHET_ENABLED:
+        return {"error": "Hatchet is disabled"}
+
+    try:
+        from reflector.hatchet.client import HatchetClientManager
+
+        status = await HatchetClientManager.get_workflow_status(workflow_run_id)
+        return status
+    except Exception as e:
+        return {"error": str(e)}
--- a/server/reflector/worker/process.py
+++ b/server/reflector/worker/process.py
@@ -286,8 +286,34 @@ async def _process_multitrack_recording_inner(
            room_id=room.id,
        )

-    # Start Conductor workflow if enabled
-    if settings.CONDUCTOR_ENABLED:
+    # Start durable workflow if enabled (Hatchet or Conductor)
+    durable_started = False
+
+    if settings.HATCHET_ENABLED:
+        from reflector.hatchet.client import HatchetClientManager  # noqa: PLC0415
+
+        workflow_id = await HatchetClientManager.start_workflow(
+            workflow_name="DiarizationPipeline",
+            input_data={
+                "recording_id": recording_id,
+                "room_name": daily_room_name,
+                "tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
+                "bucket_name": bucket_name,
+                "transcript_id": transcript.id,
+                "room_id": room.id,
+            },
+        )
+        logger.info(
+            "Started Hatchet workflow",
+            workflow_id=workflow_id,
+            transcript_id=transcript.id,
+        )
+
+        # Store workflow_id on recording for status tracking
+        await recordings_controller.update(recording, {"workflow_id": workflow_id})
+        durable_started = True
+
+    elif settings.CONDUCTOR_ENABLED:
        from reflector.conductor.client import ConductorClientManager  # noqa: PLC0415

        workflow_id = ConductorClientManager.start_workflow(
@@ -310,11 +336,13 @@ async def _process_multitrack_recording_inner(

        # Store workflow_id on recording for status tracking
        await recordings_controller.update(recording, {"workflow_id": workflow_id})
+        durable_started = True

-        if not settings.CONDUCTOR_SHADOW_MODE:
-            return  # Don't trigger Celery
+    # If durable workflow started and not in shadow mode, skip Celery
+    if durable_started and not settings.DURABLE_WORKFLOW_SHADOW_MODE:
+        return

-    # Celery pipeline (runs when Conductor disabled OR in shadow mode)
+    # Celery pipeline (runs when durable workflows disabled OR in shadow mode)
    task_pipeline_multitrack_process.delay(
        transcript_id=transcript.id,
        bucket_name=bucket_name,
--- a/server/runserver.sh
+++ b/server/runserver.sh
@@ -9,6 +9,8 @@ elif [ "${ENTRYPOINT}" = "beat" ]; then
    uv run celery -A reflector.worker.app beat --loglevel=info
 elif [ "${ENTRYPOINT}" = "conductor-worker" ]; then
    uv run python -m reflector.conductor.run_workers
+elif [ "${ENTRYPOINT}" = "hatchet-worker" ]; then
+    uv run python -m reflector.hatchet.run_workers
 else
    echo "Unknown command"
 fi
--- a/server/uv.lock
+++ b/server/uv.lock
@@ -1218,6 +1218,70 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/bf/c4/a839fcc28bebfa72925d9121c4d39398f77f95bcba0cf26c972a0cfb1de7/griffe-1.8.0-py3-none-any.whl", hash = "sha256:110faa744b2c5c84dd432f4fa9aa3b14805dd9519777dd55e8db214320593b02", size = 132487 },
 ]

+[[package]]
+name = "grpcio"
+version = "1.76.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/00/8163a1beeb6971f66b4bbe6ac9457b97948beba8dd2fc8e1281dce7f79ec/grpcio-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a", size = 5843567 },
+    { url = "https://files.pythonhosted.org/packages/10/c1/934202f5cf335e6d852530ce14ddb0fef21be612ba9ecbbcbd4d748ca32d/grpcio-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c", size = 11848017 },
+    { url = "https://files.pythonhosted.org/packages/11/0b/8dec16b1863d74af6eb3543928600ec2195af49ca58b16334972f6775663/grpcio-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465", size = 6412027 },
+    { url = "https://files.pythonhosted.org/packages/d7/64/7b9e6e7ab910bea9d46f2c090380bab274a0b91fb0a2fe9b0cd399fffa12/grpcio-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48", size = 7075913 },
+    { url = "https://files.pythonhosted.org/packages/68/86/093c46e9546073cefa789bd76d44c5cb2abc824ca62af0c18be590ff13ba/grpcio-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da", size = 6615417 },
+    { url = "https://files.pythonhosted.org/packages/f7/b6/5709a3a68500a9c03da6fb71740dcdd5ef245e39266461a03f31a57036d8/grpcio-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397", size = 7199683 },
+    { url = "https://files.pythonhosted.org/packages/91/d3/4b1f2bf16ed52ce0b508161df3a2d186e4935379a159a834cb4a7d687429/grpcio-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749", size = 8163109 },
+    { url = "https://files.pythonhosted.org/packages/5c/61/d9043f95f5f4cf085ac5dd6137b469d41befb04bd80280952ffa2a4c3f12/grpcio-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00", size = 7626676 },
+    { url = "https://files.pythonhosted.org/packages/36/95/fd9a5152ca02d8881e4dd419cdd790e11805979f499a2e5b96488b85cf27/grpcio-1.76.0-cp311-cp311-win32.whl", hash = "sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054", size = 3997688 },
+    { url = "https://files.pythonhosted.org/packages/60/9c/5c359c8d4c9176cfa3c61ecd4efe5affe1f38d9bae81e81ac7186b4c9cc8/grpcio-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d", size = 4709315 },
+    { url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718 },
+    { url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627 },
+    { url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167 },
+    { url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267 },
+    { url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963 },
+    { url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484 },
+    { url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777 },
+    { url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014 },
+    { url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750 },
+    { url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003 },
+]
+
+[[package]]
+name = "grpcio-tools"
+version = "1.76.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "grpcio" },
+    { name = "protobuf" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a0/77/17d60d636ccd86a0db0eccc24d02967bbc3eea86b9db7324b04507ebaa40/grpcio_tools-1.76.0.tar.gz", hash = "sha256:ce80169b5e6adf3e8302f3ebb6cb0c3a9f08089133abca4b76ad67f751f5ad88", size = 5390807 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/d1/efbeed1a864c846228c0a3b322e7a2d6545f025e35246aebf96496a36004/grpcio_tools-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6480f6af6833850a85cca1c6b435ef4ffd2ac8e88ef683b4065233827950243", size = 2545931 },
+    { url = "https://files.pythonhosted.org/packages/af/8e/f257c0f565d9d44658301238b01a9353bc6f3b272bb4191faacae042579d/grpcio_tools-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c7c23fe1dc09818e16a48853477806ad77dd628b33996f78c05a293065f8210c", size = 5844794 },
+    { url = "https://files.pythonhosted.org/packages/c7/c0/6c1e89c67356cb20e19ed670c5099b13e40fd678cac584c778f931666a86/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fcdce7f7770ff052cd4e60161764b0b3498c909bde69138f8bd2e7b24a3ecd8f", size = 2591772 },
+    { url = "https://files.pythonhosted.org/packages/c0/10/5f33aa7bc3ddaad0cfd2f4e950ac4f1a310e8d0c7b1358622a581e8b7a2f/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b598fdcebffa931c7da5c9e90b5805fff7e9bc6cf238319358a1b85704c57d33", size = 2905140 },
+    { url = "https://files.pythonhosted.org/packages/f4/3e/23e3a52a77368f47188ed83c34eb53866d3ce0f73835b2f6764844ae89eb/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6a9818ff884796b12dcf8db32126e40ec1098cacf5697f27af9cfccfca1c1fae", size = 2656475 },
+    { url = "https://files.pythonhosted.org/packages/51/85/a74ae87ec7dbd3d2243881f5c548215aed1148660df7945be3a125ba9a21/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:105e53435b2eed3961da543db44a2a34479d98d18ea248219856f30a0ca4646b", size = 3106158 },
+    { url = "https://files.pythonhosted.org/packages/54/d5/a6ed1e5823bc5d55a1eb93e0c14ccee0b75951f914832ab51fb64d522a0f/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:454a1232c7f99410d92fa9923c7851fd4cdaf657ee194eac73ea1fe21b406d6e", size = 3654980 },
+    { url = "https://files.pythonhosted.org/packages/f9/29/c05d5501ba156a242079ef71d073116d2509c195b5e5e74c545f0a3a3a69/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ca9ccf667afc0268d45ab202af4556c72e57ea36ebddc93535e1a25cbd4f8aba", size = 3322658 },
+    { url = "https://files.pythonhosted.org/packages/02/b6/ee0317b91da19a7537d93c4161cbc2a45a165c8893209b0bbd470d830ffa/grpcio_tools-1.76.0-cp311-cp311-win32.whl", hash = "sha256:a83c87513b708228b4cad7619311daba65b40937745103cadca3db94a6472d9c", size = 993837 },
+    { url = "https://files.pythonhosted.org/packages/81/63/9623cadf0406b264737f16d4ed273bb2d65001d87fbd803b565c45d665d1/grpcio_tools-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:2ce5e87ec71f2e4041dce4351f2a8e3b713e3bca6b54c69c3fbc6c7ad1f4c386", size = 1158634 },
+    { url = "https://files.pythonhosted.org/packages/4f/ca/a931c1439cabfe305c9afd07e233150cd0565aa062c20d1ee412ed188852/grpcio_tools-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:4ad555b8647de1ebaffb25170249f89057721ffb74f7da96834a07b4855bb46a", size = 2546852 },
+    { url = "https://files.pythonhosted.org/packages/4c/07/935cfbb7dccd602723482a86d43fbd992f91e9867bca0056a1e9f348473e/grpcio_tools-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:243af7c8fc7ff22a40a42eb8e0f6f66963c1920b75aae2a2ec503a9c3c8b31c1", size = 5841777 },
+    { url = "https://files.pythonhosted.org/packages/e4/92/8fcb5acebdccb647e0fa3f002576480459f6cf81e79692d7b3c4d6e29605/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8207b890f423142cc0025d041fb058f7286318df6a049565c27869d73534228b", size = 2594004 },
+    { url = "https://files.pythonhosted.org/packages/9d/ea/64838e8113b7bfd4842b15c815a7354cb63242fdce9d6648d894b5d50897/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3dafa34c2626a6691d103877e8a145f54c34cf6530975f695b396ed2fc5c98f8", size = 2905563 },
+    { url = "https://files.pythonhosted.org/packages/a6/d6/53798827d821098219e58518b6db52161ce4985620850aa74ce3795da8a7/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:30f1d2dda6ece285b3d9084e94f66fa721ebdba14ae76b2bc4c581c8a166535c", size = 2656936 },
+    { url = "https://files.pythonhosted.org/packages/89/a3/d9c1cefc46a790eec520fe4e70e87279abb01a58b1a3b74cf93f62b824a2/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a889af059dc6dbb82d7b417aa581601316e364fe12eb54c1b8d95311ea50916d", size = 3109811 },
+    { url = "https://files.pythonhosted.org/packages/50/75/5997752644b73b5d59377d333a51c8a916606df077f5a487853e37dca289/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c3f2c3c44c56eb5d479ab178f0174595d0a974c37dade442f05bb73dfec02f31", size = 3658786 },
+    { url = "https://files.pythonhosted.org/packages/84/47/dcf8380df4bd7931ffba32fc6adc2de635b6569ca27fdec7121733797062/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:479ce02dff684046f909a487d452a83a96b4231f7c70a3b218a075d54e951f56", size = 3325144 },
+    { url = "https://files.pythonhosted.org/packages/04/88/ea3e5fdb874d8c2d04488e4b9d05056537fba70915593f0c283ac77df188/grpcio_tools-1.76.0-cp312-cp312-win32.whl", hash = "sha256:9ba4bb539936642a44418b38ee6c3e8823c037699e2cb282bd8a44d76a4be833", size = 993523 },
+    { url = "https://files.pythonhosted.org/packages/de/b1/ce7d59d147675ec191a55816be46bc47a343b5ff07279eef5817c09cc53e/grpcio_tools-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:0cd489016766b05f9ed8a6b6596004b62c57d323f49593eac84add032a6d43f7", size = 1158493 },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -1227,6 +1291,27 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
 ]

+[[package]]
+name = "hatchet-sdk"
+version = "1.21.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "grpcio" },
+    { name = "grpcio-tools" },
+    { name = "prometheus-client" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "python-dateutil" },
+    { name = "tenacity" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7c/df/75dd02e1dc6b99f7151a57f084876c50f739ad4d643b060078f65d51d717/hatchet_sdk-1.21.6.tar.gz", hash = "sha256:b65741324ad721ce57f5fe3f960e2942c4ac2ceec6ca483dd35f84137ff7c46c", size = 219345 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/86/e4cd7928bcabd33c634c33d4e878e2454e03f97c87b72947c7ff5762d813/hatchet_sdk-1.21.6-py3-none-any.whl", hash = "sha256:589fba9104a6517e1ba677b9865fa0a20e221863a8c2a2724051198994c11399", size = 529167 },
+]
+
 [[package]]
 name = "hf-xet"
 version = "1.1.5"
@@ -3150,6 +3235,7 @@ dependencies = [
    { name = "databases", extra = ["aiosqlite", "asyncpg"] },
    { name = "fastapi", extra = ["standard"] },
    { name = "fastapi-pagination" },
+    { name = "hatchet-sdk" },
    { name = "httpx" },
    { name = "icalendar" },
    { name = "jsonschema" },
@@ -3227,6 +3313,7 @@ requires-dist = [
    { name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
    { name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
    { name = "fastapi-pagination", specifier = ">=0.12.6" },
+    { name = "hatchet-sdk", specifier = ">=0.47.0" },
    { name = "httpx", specifier = ">=0.24.1" },
    { name = "icalendar", specifier = ">=6.0.0" },
    { name = "jsonschema", specifier = ">=4.23.0" },