hatchet no-mistake

2025-12-21 04:39:06 +00:00 · 2025-12-16 00:48:30 -05:00
parent 243ff2177c
commit c5498d26bf
18 changed files with 2189 additions and 1952 deletions
--- a/TASKS.md
+++ b/TASKS.md
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -48,6 +48,20 @@ services:
      conductor:
        condition: service_healthy
  hatchet-worker:
    build:
      context: server
    volumes:
      - ./server/:/app/
      - /app/.venv
    env_file:
      - ./server/.env
    environment:
      ENTRYPOINT: hatchet-worker
    depends_on:
      hatchet:
        condition: service_healthy
  redis:
    image: redis:7.2
    ports:
@@ -81,8 +95,8 @@ services:
  conductor:
    image: conductoross/conductor-standalone:3.15.0
    ports:
-      - 8180:8080
+      - "8180:8080"
-      - 5001:5000
+      - "5001:5000"
    environment:
      - conductor.db.type=memory
    healthcheck:
@@ -91,6 +105,54 @@ services:
      timeout: 10s
      retries: 5
  hatchet-postgres:
    image: postgres:15.6
    command: postgres -c 'max_connections=200'
    restart: always
    environment:
      - POSTGRES_USER=hatchet
      - POSTGRES_PASSWORD=hatchet
      - POSTGRES_DB=hatchet
    ports:
      - "5436:5432"
    volumes:
      - ./data/hatchet-postgres:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -d hatchet -U hatchet"]
      interval: 10s
      timeout: 10s
      retries: 5
      start_period: 10s
  hatchet:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
    ports:
      - "8889:8888"
      - "7078:7077"
    depends_on:
      hatchet-postgres:
        condition: service_healthy
    environment:
      DATABASE_URL: "postgresql://hatchet:hatchet@hatchet-postgres:5432/hatchet?sslmode=disable"
      SERVER_AUTH_COOKIE_DOMAIN: localhost
      SERVER_AUTH_COOKIE_INSECURE: "t"
      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
      SERVER_GRPC_INSECURE: "t"
      SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
      SERVER_GRPC_PORT: "7077"
      SERVER_URL: http://localhost:8889
      SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
      SERVER_DEFAULT_ENGINE_VERSION: "V1"
      SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
    volumes:
      - ./data/hatchet-config:/config
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 30s
 networks:
  default:
    attachable: true
--- a/server/HATCHET_LLM_OBSERVATIONS.md
+++ b/server/HATCHET_LLM_OBSERVATIONS.md
@@ -0,0 +1,339 @@
 # Hatchet Migration - LLM Debugging Observations
 This document captures hard-won debugging insights from implementing the multitrack diarization pipeline with Hatchet. These observations are particularly relevant for LLM assistants working on this codebase.
 ## Architecture Context
 - **Hatchet SDK v1.21+** uses async workers with gRPC for task polling
 - Workers connect to Hatchet server via gRPC (port 7077) and trigger workflows via REST (port 8888)
 - `hatchet-lite` image bundles server, engine, and database in one container
 - Tasks are decorated with `@workflow.task()` (not `@hatchet.step()` as in older examples)
 - Workflow input is validated via Pydantic models with `input_validator=` parameter
 ---
 ## Challenge 1: SDK Version API Breaking Changes
 ### Symptoms
 ```
 AttributeError: 'V1WorkflowRunDetails' object has no attribute 'workflow_run_id'
 ```
 ### Root Cause
 Hatchet SDK v1.21+ changed the response structure for workflow creation. Old examples show:
 ```python
 result = await client.runs.aio_create(workflow_name, input_data)
 return result.workflow_run_id  # OLD - doesn't work
 ```
 ### Resolution
 Access the run ID through the new nested structure:
 ```python
 result = await client.runs.aio_create(workflow_name, input_data)
 return result.run.metadata.id  # NEW - SDK v1.21+
 ```
 ### Key Insight
 **Don't trust documentation or examples.** Read the SDK source code or use IDE autocomplete to discover actual attribute names. The SDK evolves faster than docs.
 ---
 ## Challenge 2: Worker Appears Hung at "starting runner..."
 ### Symptoms
 ```
 [INFO] Starting Hatchet workers
 [INFO] Starting Hatchet worker polling...
 [INFO] STARTING HATCHET...
 [INFO] starting runner...
 # ... nothing else, appears stuck
 ```
 ### Root Cause
 Without debug mode, Hatchet SDK doesn't log:
 - Workflow registration
 - gRPC connection status
 - Heartbeat activity
 - Action listener acquisition
 The worker IS working, you just can't see it.
 ### Resolution
 Always enable debug mode during development:
 ```bash
 HATCHET_DEBUG=true
 ```
 With debug enabled, you'll see the actual activity:
 ```
 [DEBUG] 'worker-name' waiting for ['workflow:task1', 'workflow:task2']
 [DEBUG] starting action listener: worker-name
 [DEBUG] acquired action listener: 562d00a8-8895-42a1-b65b-46f905c902f9
 [DEBUG] sending heartbeat
 ```
 ### Key Insight
 **Start every Hatchet debugging session with `HATCHET_DEBUG=true`.** Silent workers waste hours of debugging time.
 ---
 ## Challenge 3: Docker Networking + JWT Token URL Conflicts
 ### Symptoms
 ```
 grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
    status = StatusCode.UNAVAILABLE
    details = "failed to connect to all addresses"
 ```
 ### Root Cause
 The Hatchet API token embeds URLs:
 ```json
 {
  "aud": "http://localhost:8889",
  "grpc_broadcast_address": "localhost:7077",
  "server_url": "http://localhost:8889"
 }
 ```
 Inside Docker containers, `localhost` refers to the container itself, not the Hatchet server.
 ### Resolution
 Override the token-embedded URLs with environment variables:
 ```bash
 # In .env or docker-compose environment
 HATCHET_CLIENT_HOST_PORT=hatchet:7077
 HATCHET_CLIENT_SERVER_URL=http://hatchet:8888
 HATCHET_CLIENT_TLS_STRATEGY=none
 ```
 ### Key Insight
 **The JWT token is not the final word on connection settings.** Environment variables override token-embedded URLs, which is essential for Docker networking.
 ---
 ## Challenge 4: Workflow Name Case Sensitivity
 ### Symptoms
 ```
 BadRequestException: (400)
 HTTP response body: errors=[APIError(description='workflow names not found: diarizationpipeline')]
 ```
 ### Root Cause
 Hatchet uses the exact workflow name you define for triggering:
 ```python
 diarization_pipeline = hatchet.workflow(
    name="DiarizationPipeline",  # Use THIS exact name to trigger
    input_validator=PipelineInput
 )
 ```
 Internally, task identifiers are lowercased (`diarizationpipeline:get_recording`), but workflow triggers must match the defined name.
 ### Resolution
 ```python
 # Correct
 await client.start_workflow('DiarizationPipeline', input_data)
 # Wrong
 await client.start_workflow('diarizationpipeline', input_data)
 ```
 ### Key Insight
 **Workflow names are case-sensitive for triggering, but task refs are lowercase.** Don't conflate the two.
 ---
 ## Challenge 5: Pydantic Response Object Iteration
 ### Symptoms
 ```
 AttributeError: 'tuple' object has no attribute 'participant_id'
 ```
 ### Root Cause
 When API responses return Pydantic models with list fields:
 ```python
 class MeetingParticipantsResponse(BaseModel):
    data: List[MeetingParticipant]
 ```
 Iterating the response object directly is wrong:
 ```python
 for p in participants:  # WRONG - iterates over model fields as tuples
 ```
 ### Resolution
 Access the `.data` attribute explicitly:
 ```python
 for p in participants.data:  # CORRECT - iterates over list items
    print(p.participant_id)
 ```
 ### Key Insight
 **Pydantic models with list fields require explicit `.data` access.** The model itself is not iterable in the expected way.
 ---
 ## Challenge 6: Database Connections in Async Workers
 ### Symptoms
 ```
 InterfaceError: cannot perform operation: another operation is in progress
 ```
 ### Root Cause
 Similar to Conductor, Hatchet workers may inherit stale database connections. Each task runs in an async context that may not share the same event loop as cached connections.
 ### Resolution
 Create fresh database connections per task:
 ```python
 async def _get_fresh_db_connection():
    """Create fresh database connection for worker task."""
    import databases
    from reflector.db import _database_context
    from reflector.settings import settings
    _database_context.set(None)
    db = databases.Database(settings.DATABASE_URL)
    _database_context.set(db)
    await db.connect()
    return db
 async def _close_db_connection(db):
    await db.disconnect()
    _database_context.set(None)
 ```
 ### Key Insight
 **Cached singletons (DB, HTTP clients) are unsafe in workflow workers.** Always create fresh connections.
 ---
 ## Challenge 7: Child Workflow Fan-out Pattern
 ### Symptoms
 Child workflows spawn but parent doesn't wait for completion, or results aren't collected.
 ### Root Cause
 Hatchet child workflows need explicit spawning and result collection:
 ```python
 # Spawning children
 child_runs = await asyncio.gather(*[
    child_workflow.aio_run(child_input)
    for child_input in inputs
 ])
 # Results are returned directly from aio_run()
 ```
 ### Resolution
 Use `aio_run()` for child workflows and `asyncio.gather()` for parallelism:
 ```python
@parent_workflow.task(parents=[setup_task])
 async def process_tracks(input: ParentInput, ctx: Context) -> dict:
    child_coroutines = [
        track_workflow.aio_run(TrackInput(track_index=i, ...))
        for i in range(len(input.tracks))
    ]
    results = await asyncio.gather(*child_coroutines, return_exceptions=True)
    # Handle failures
    for i, result in enumerate(results):
        if isinstance(result, Exception):
            logger.error(f"Track {i} failed: {result}")
    return {"track_results": [r for r in results if not isinstance(r, Exception)]}
 ```
 ### Key Insight
 **Child workflows in Hatchet return results directly.** No need to poll for completion like in Conductor.
 ---
 ## Debugging Workflow
 ### 1. Enable Debug Mode First
 ```bash
 HATCHET_DEBUG=true
 ```
 ### 2. Verify Worker Registration
 Look for this in debug logs:
 ```
 [DEBUG] 'worker-name' waiting for ['workflow:task1', 'workflow:task2', ...]
 [DEBUG] acquired action listener: {uuid}
 ```
 ### 3. Test Workflow Trigger Separately
 ```python
 docker exec server uv run python -c "
 from reflector.hatchet.client import HatchetClientManager
 from reflector.hatchet.workflows.diarization_pipeline import PipelineInput
 import asyncio
 async def test():
    input_data = PipelineInput(
        transcript_id='test',
        recording_id=None,
        room_name='test-room',
        bucket_name='bucket',
        tracks=[],
    )
    run_id = await HatchetClientManager.start_workflow(
        'DiarizationPipeline',
        input_data.model_dump()
    )
    print(f'Triggered: {run_id}')
 asyncio.run(test())
 "
 ```
 ### 4. Check Hatchet Server Logs
 ```bash
 docker logs reflector-hatchet-1 --tail 50
 ```
 Look for `WRN` entries indicating API errors or connection issues.
 ### 5. Verify gRPC Connectivity
 ```python
 docker exec worker python -c "
 import socket
 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 result = sock.connect_ex(('hatchet', 7077))
 print(f'gRPC port 7077: {\"reachable\" if result == 0 else \"blocked\"}')"
 ```
 ### 6. Force Container Rebuild
 Volume mounts may cache old bytecode:
 ```bash
 docker compose up -d --build --force-recreate hatchet-worker
 ```
 ---
 ## Common Gotchas Summary
 | Issue | Signal | Fix |
 |-------|--------|-----|
 | SDK API changed | `AttributeError` on result | Check SDK source for actual attributes |
 | Worker appears stuck | Only "starting runner..." | Enable `HATCHET_DEBUG=true` |
 | Can't connect from Docker | gRPC unavailable | Set `HATCHET_CLIENT_HOST_PORT` and `_SERVER_URL` |
 | Workflow not found | 400 Bad Request | Use exact case-sensitive workflow name |
 | Tuple iteration error | `'tuple' has no attribute` | Access `.data` on Pydantic response models |
 | DB conflicts | "another operation in progress" | Fresh DB connection per task |
 | Old code running | Fixed code but same error | Force rebuild container, clear `__pycache__` |
 ---
 ## Files Most Likely to Need Hatchet-Specific Handling
 - `server/reflector/hatchet/workflows/*.py` - Workflow and task definitions
 - `server/reflector/hatchet/client.py` - Client wrapper, SDK version compatibility
 - `server/reflector/hatchet/run_workers.py` - Worker startup and registration
 - `server/reflector/hatchet/progress.py` - Progress emission for UI updates
 - `docker-compose.yml` - Hatchet infrastructure services
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -40,6 +40,7 @@ dependencies = [
    "webvtt-py>=0.5.0",
    "icalendar>=6.0.0",
    "conductor-python>=1.2.3",
    "hatchet-sdk>=0.47.0",
 ]
 [dependency-groups]
@@ -135,5 +136,10 @@ select = [
 "reflector/processors/summary/summary_builder.py" = ["E501"]
 "gpu/modal_deployments/**.py" = ["PLC0415"]
 "reflector/tools/**.py" = ["PLC0415"]
 "reflector/hatchet/run_workers.py" = ["PLC0415"]
 "reflector/hatchet/workflows/**.py" = ["PLC0415"]
 "reflector/conductor/run_workers.py" = ["PLC0415"]
 "reflector/conductor/workers/**.py" = ["PLC0415"]
 "reflector/views/hatchet.py" = ["PLC0415"]
 "migrations/versions/**.py" = ["PLC0415"]
 "tests/**.py" = ["PLC0415"]
--- a/server/reflector/app.py
+++ b/server/reflector/app.py
@@ -14,6 +14,7 @@ from reflector.metrics import metrics_init
 from reflector.settings import settings
 from reflector.views.conductor import router as conductor_router
 from reflector.views.daily import router as daily_router
 from reflector.views.hatchet import router as hatchet_router
 from reflector.views.meetings import router as meetings_router
 from reflector.views.rooms import router as rooms_router
 from reflector.views.rtc_offer import router as rtc_offer_router
@@ -100,6 +101,7 @@ app.include_router(zulip_router, prefix="/v1")
 app.include_router(whereby_router, prefix="/v1")
 app.include_router(daily_router, prefix="/v1/daily")
 app.include_router(conductor_router, prefix="/v1")
 app.include_router(hatchet_router, prefix="/v1")
 add_pagination(app)
 # prepare celery
--- a/server/reflector/hatchet/init.py
+++ b/server/reflector/hatchet/init.py
@@ -0,0 +1,6 @@
 """Hatchet workflow orchestration for Reflector."""
 from reflector.hatchet.client import HatchetClientManager
 from reflector.hatchet.progress import emit_progress, emit_progress_async
 __all__ = ["HatchetClientManager", "emit_progress", "emit_progress_async"]
--- a/server/reflector/hatchet/client.py
+++ b/server/reflector/hatchet/client.py
@@ -0,0 +1,48 @@
 """Hatchet Python client wrapper."""
 from hatchet_sdk import Hatchet
 from reflector.settings import settings
 class HatchetClientManager:
    """Singleton manager for Hatchet client connections."""
    _instance: Hatchet | None = None
    @classmethod
    def get_client(cls) -> Hatchet:
        """Get or create the Hatchet client."""
        if cls._instance is None:
            if not settings.HATCHET_CLIENT_TOKEN:
                raise ValueError("HATCHET_CLIENT_TOKEN must be set")
            cls._instance = Hatchet(
                debug=settings.HATCHET_DEBUG,
            )
        return cls._instance
    @classmethod
    async def start_workflow(
        cls, workflow_name: str, input_data: dict, key: str | None = None
    ) -> str:
        """Start a workflow and return the workflow run ID."""
        client = cls.get_client()
        result = await client.runs.aio_create(
            workflow_name,
            input_data,
        )
        # SDK v1.21+ returns V1WorkflowRunDetails with run.metadata.id
        return result.run.metadata.id
    @classmethod
    async def get_workflow_status(cls, workflow_run_id: str) -> dict:
        """Get the current status of a workflow run."""
        client = cls.get_client()
        run = await client.runs.aio_get(workflow_run_id)
        return run.to_dict()
    @classmethod
    def reset(cls) -> None:
        """Reset the client instance (for testing)."""
        cls._instance = None
--- a/server/reflector/hatchet/progress.py
+++ b/server/reflector/hatchet/progress.py
@@ -0,0 +1,120 @@
 """Progress event emission for Hatchet workers."""
 import asyncio
 from typing import Literal
 from reflector.db.transcripts import PipelineProgressData
 from reflector.logger import logger
 from reflector.ws_manager import get_ws_manager
 # Step mapping for progress tracking (matches Conductor pipeline)
 PIPELINE_STEPS = {
    "get_recording": 1,
    "get_participants": 2,
    "pad_track": 3,  # Fork tasks share same step
    "mixdown_tracks": 4,
    "generate_waveform": 5,
    "transcribe_track": 6,  # Fork tasks share same step
    "merge_transcripts": 7,
    "detect_topics": 8,
    "generate_title": 9,  # Fork tasks share same step
    "generate_summary": 9,  # Fork tasks share same step
    "finalize": 10,
    "cleanup_consent": 11,
    "post_zulip": 12,
    "send_webhook": 13,
 }
 TOTAL_STEPS = 13
 async def _emit_progress_async(
    transcript_id: str,
    step: str,
    status: Literal["pending", "in_progress", "completed", "failed"],
    workflow_id: str | None = None,
 ) -> None:
    """Async implementation of progress emission."""
    ws_manager = get_ws_manager()
    step_index = PIPELINE_STEPS.get(step, 0)
    data = PipelineProgressData(
        workflow_id=workflow_id,
        current_step=step,
        step_index=step_index,
        total_steps=TOTAL_STEPS,
        step_status=status,
    )
    await ws_manager.send_json(
        room_id=f"ts:{transcript_id}",
        message={
            "event": "PIPELINE_PROGRESS",
            "data": data.model_dump(),
        },
    )
    logger.debug(
        "[Hatchet Progress] Emitted",
        transcript_id=transcript_id,
        step=step,
        status=status,
        step_index=step_index,
    )
 def emit_progress(
    transcript_id: str,
    step: str,
    status: Literal["pending", "in_progress", "completed", "failed"],
    workflow_id: str | None = None,
 ) -> None:
    """Emit a pipeline progress event (sync wrapper for Hatchet workers).
    Args:
        transcript_id: The transcript ID to emit progress for
        step: The current step name (e.g., "transcribe_track")
        status: The step status
        workflow_id: Optional workflow run ID
    """
    try:
        # Get or create event loop for sync context
        try:
            loop = asyncio.get_running_loop()
        except RuntimeError:
            loop = None
        if loop is not None and loop.is_running():
            # Already in async context, schedule the coroutine
            asyncio.create_task(
                _emit_progress_async(transcript_id, step, status, workflow_id)
            )
        else:
            # Not in async context, run synchronously
            asyncio.run(_emit_progress_async(transcript_id, step, status, workflow_id))
    except Exception as e:
        # Progress emission should never break the pipeline
        logger.warning(
            "[Hatchet Progress] Failed to emit progress event",
            error=str(e),
            transcript_id=transcript_id,
            step=step,
        )
 async def emit_progress_async(
    transcript_id: str,
    step: str,
    status: Literal["pending", "in_progress", "completed", "failed"],
    workflow_id: str | None = None,
 ) -> None:
    """Async version of emit_progress for use in async Hatchet tasks."""
    try:
        await _emit_progress_async(transcript_id, step, status, workflow_id)
    except Exception as e:
        logger.warning(
            "[Hatchet Progress] Failed to emit progress event",
            error=str(e),
            transcript_id=transcript_id,
            step=step,
        )
--- a/server/reflector/hatchet/run_workers.py
+++ b/server/reflector/hatchet/run_workers.py
@@ -0,0 +1,59 @@
 """
 Run Hatchet workers for the diarization pipeline.
 Usage:
    uv run -m reflector.hatchet.run_workers
    # Or via docker:
    docker compose exec server uv run -m reflector.hatchet.run_workers
 """
 import signal
 import sys
 from reflector.logger import logger
 from reflector.settings import settings
 def main() -> None:
    """Start Hatchet worker polling."""
    if not settings.HATCHET_ENABLED:
        logger.error("HATCHET_ENABLED is False, not starting workers")
        sys.exit(1)
    if not settings.HATCHET_CLIENT_TOKEN:
        logger.error("HATCHET_CLIENT_TOKEN is not set")
        sys.exit(1)
    logger.info(
        "Starting Hatchet workers",
        debug=settings.HATCHET_DEBUG,
    )
    # Import workflows to register them
    from reflector.hatchet.client import HatchetClientManager
    from reflector.hatchet.workflows import diarization_pipeline, track_workflow
    hatchet = HatchetClientManager.get_client()
    # Create worker with both workflows
    worker = hatchet.worker(
        "reflector-diarization-worker",
        workflows=[diarization_pipeline, track_workflow],
    )
    # Handle graceful shutdown
    def shutdown_handler(signum: int, frame) -> None:
        logger.info("Received shutdown signal, stopping workers...")
        # Worker cleanup happens automatically on exit
        sys.exit(0)
    signal.signal(signal.SIGINT, shutdown_handler)
    signal.signal(signal.SIGTERM, shutdown_handler)
    logger.info("Starting Hatchet worker polling...")
    worker.start()
 if __name__ == "__main__":
    main()
--- a/server/reflector/hatchet/workflows/init.py
+++ b/server/reflector/hatchet/workflows/init.py
@@ -0,0 +1,14 @@
 """Hatchet workflow definitions."""
 from reflector.hatchet.workflows.diarization_pipeline import (
    PipelineInput,
    diarization_pipeline,
 )
 from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
 __all__ = [
    "diarization_pipeline",
    "track_workflow",
    "PipelineInput",
    "TrackInput",
 ]
--- a/server/reflector/hatchet/workflows/diarization_pipeline.py
+++ b/server/reflector/hatchet/workflows/diarization_pipeline.py
@@ -0,0 +1,808 @@
 """
 Hatchet main workflow: DiarizationPipeline
 Multitrack diarization pipeline for Daily.co recordings.
 Orchestrates the full processing flow from recording metadata to final transcript.
 """
 import asyncio
 import tempfile
 from datetime import timedelta
 from pathlib import Path
 import av
 from hatchet_sdk import Context
 from pydantic import BaseModel
 from reflector.hatchet.client import HatchetClientManager
 from reflector.hatchet.progress import emit_progress_async
 from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
 from reflector.logger import logger
 # Audio constants
 OPUS_STANDARD_SAMPLE_RATE = 48000
 OPUS_DEFAULT_BIT_RATE = 64000
 PRESIGNED_URL_EXPIRATION_SECONDS = 7200
 class PipelineInput(BaseModel):
    """Input to trigger the diarization pipeline."""
    recording_id: str | None
    room_name: str | None
    tracks: list[dict]  # List of {"s3_key": str}
    bucket_name: str
    transcript_id: str
    room_id: str | None = None
 # Get hatchet client and define workflow
 hatchet = HatchetClientManager.get_client()
 diarization_pipeline = hatchet.workflow(
    name="DiarizationPipeline", input_validator=PipelineInput
 )
 # ============================================================================
 # Helper Functions
 # ============================================================================
 async def _get_fresh_db_connection():
    """Create fresh database connection for subprocess."""
    import databases
    from reflector.db import _database_context
    from reflector.settings import settings
    _database_context.set(None)
    db = databases.Database(settings.DATABASE_URL)
    _database_context.set(db)
    await db.connect()
    return db
 async def _close_db_connection(db):
    """Close database connection."""
    from reflector.db import _database_context
    await db.disconnect()
    _database_context.set(None)
 def _get_storage():
    """Create fresh storage instance."""
    from reflector.settings import settings
    from reflector.storage.storage_aws import AwsStorage
    return AwsStorage(
        aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
        aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
        aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
        aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
    )
 # ============================================================================
 # Pipeline Tasks
 # ============================================================================
@diarization_pipeline.task(execution_timeout=timedelta(seconds=60), retries=3)
 async def get_recording(input: PipelineInput, ctx: Context) -> dict:
    """Fetch recording metadata from Daily.co API."""
    logger.info("[Hatchet] get_recording", recording_id=input.recording_id)
    await emit_progress_async(
        input.transcript_id, "get_recording", "in_progress", ctx.workflow_run_id
    )
    try:
        from reflector.dailyco_api.client import DailyApiClient
        from reflector.settings import settings
        if not input.recording_id:
            # No recording_id in reprocess path - return minimal data
            await emit_progress_async(
                input.transcript_id, "get_recording", "completed", ctx.workflow_run_id
            )
            return {
                "id": None,
                "mtg_session_id": None,
                "room_name": input.room_name,
                "duration": 0,
            }
        if not settings.DAILY_API_KEY:
            raise ValueError("DAILY_API_KEY not configured")
        async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
            recording = await client.get_recording(input.recording_id)
        logger.info(
            "[Hatchet] get_recording complete",
            recording_id=input.recording_id,
            room_name=recording.room_name,
            duration=recording.duration,
        )
        await emit_progress_async(
            input.transcript_id, "get_recording", "completed", ctx.workflow_run_id
        )
        return {
            "id": recording.id,
            "mtg_session_id": recording.mtgSessionId,
            "room_name": recording.room_name,
            "duration": recording.duration,
        }
    except Exception as e:
        logger.error("[Hatchet] get_recording failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "get_recording", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[get_recording], execution_timeout=timedelta(seconds=60), retries=3
 )
 async def get_participants(input: PipelineInput, ctx: Context) -> dict:
    """Fetch participant list from Daily.co API."""
    logger.info("[Hatchet] get_participants", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "get_participants", "in_progress", ctx.workflow_run_id
    )
    try:
        recording_data = ctx.task_output(get_recording)
        mtg_session_id = recording_data.get("mtg_session_id")
        from reflector.dailyco_api.client import DailyApiClient
        from reflector.settings import settings
        if not mtg_session_id or not settings.DAILY_API_KEY:
            # Return empty participants if no session ID
            await emit_progress_async(
                input.transcript_id,
                "get_participants",
                "completed",
                ctx.workflow_run_id,
            )
            return {"participants": [], "num_tracks": len(input.tracks)}
        async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
            participants = await client.get_meeting_participants(mtg_session_id)
        participants_list = [
            {"participant_id": p.participant_id, "user_name": p.user_name}
            for p in participants.data
        ]
        logger.info(
            "[Hatchet] get_participants complete",
            participant_count=len(participants_list),
        )
        await emit_progress_async(
            input.transcript_id, "get_participants", "completed", ctx.workflow_run_id
        )
        return {"participants": participants_list, "num_tracks": len(input.tracks)}
    except Exception as e:
        logger.error("[Hatchet] get_participants failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "get_participants", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[get_participants], execution_timeout=timedelta(seconds=600), retries=3
 )
 async def process_tracks(input: PipelineInput, ctx: Context) -> dict:
    """Spawn child workflows for each track (dynamic fan-out).
    Processes pad_track and transcribe_track for each audio track in parallel.
    """
    logger.info(
        "[Hatchet] process_tracks",
        num_tracks=len(input.tracks),
        transcript_id=input.transcript_id,
    )
    # Spawn child workflows for each track
    child_coroutines = [
        track_workflow.aio_run(
            TrackInput(
                track_index=i,
                s3_key=track["s3_key"],
                bucket_name=input.bucket_name,
                transcript_id=input.transcript_id,
            )
        )
        for i, track in enumerate(input.tracks)
    ]
    # Wait for all child workflows to complete
    results = await asyncio.gather(*child_coroutines)
    # Collect all track results
    all_words = []
    padded_urls = []
    for result in results:
        transcribe_result = result.get("transcribe_track", {})
        all_words.extend(transcribe_result.get("words", []))
        pad_result = result.get("pad_track", {})
        padded_urls.append(pad_result.get("padded_url"))
    # Sort words by start time
    all_words.sort(key=lambda w: w.get("start", 0))
    logger.info(
        "[Hatchet] process_tracks complete",
        num_tracks=len(input.tracks),
        total_words=len(all_words),
    )
    return {
        "all_words": all_words,
        "padded_urls": padded_urls,
        "word_count": len(all_words),
        "num_tracks": len(input.tracks),
    }
@diarization_pipeline.task(
    parents=[process_tracks], execution_timeout=timedelta(seconds=300), retries=3
 )
 async def mixdown_tracks(input: PipelineInput, ctx: Context) -> dict:
    """Mix all padded tracks into single audio file."""
    logger.info("[Hatchet] mixdown_tracks", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "mixdown_tracks", "in_progress", ctx.workflow_run_id
    )
    try:
        track_data = ctx.task_output(process_tracks)
        padded_urls = track_data.get("padded_urls", [])
        if not padded_urls:
            raise ValueError("No padded tracks to mixdown")
        storage = _get_storage()
        # Download all tracks and mix
        temp_inputs = []
        try:
            for i, url in enumerate(padded_urls):
                if not url:
                    continue
                temp_input = tempfile.NamedTemporaryFile(suffix=".webm", delete=False)
                temp_inputs.append(temp_input.name)
                # Download track
                import httpx
                async with httpx.AsyncClient() as client:
                    response = await client.get(url)
                    response.raise_for_status()
                    with open(temp_input.name, "wb") as f:
                        f.write(response.content)
            # Mix using PyAV amix filter
            if len(temp_inputs) == 0:
                raise ValueError("No valid tracks to mixdown")
            output_path = tempfile.mktemp(suffix=".mp3")
            try:
                # Use ffmpeg-style mixing via PyAV
                containers = [av.open(path) for path in temp_inputs]
                # Get the longest duration
                max_duration = 0.0
                for container in containers:
                    if container.duration:
                        duration = float(container.duration * av.time_base)
                        max_duration = max(max_duration, duration)
                # Close containers for now
                for container in containers:
                    container.close()
                # Use subprocess for mixing (simpler than complex PyAV graph)
                import subprocess
                # Build ffmpeg command
                cmd = ["ffmpeg", "-y"]
                for path in temp_inputs:
                    cmd.extend(["-i", path])
                # Build filter for N inputs
                n = len(temp_inputs)
                filter_str = f"amix=inputs={n}:duration=longest:normalize=0"
                cmd.extend(["-filter_complex", filter_str])
                cmd.extend(["-ac", "2", "-ar", "48000", "-b:a", "128k", output_path])
                subprocess.run(cmd, check=True, capture_output=True)
                # Upload mixed file
                file_size = Path(output_path).stat().st_size
                storage_path = f"file_pipeline_hatchet/{input.transcript_id}/mixed.mp3"
                with open(output_path, "rb") as mixed_file:
                    await storage.put_file(storage_path, mixed_file)
                logger.info(
                    "[Hatchet] mixdown_tracks uploaded",
                    key=storage_path,
                    size=file_size,
                )
            finally:
                Path(output_path).unlink(missing_ok=True)
        finally:
            for path in temp_inputs:
                Path(path).unlink(missing_ok=True)
        await emit_progress_async(
            input.transcript_id, "mixdown_tracks", "completed", ctx.workflow_run_id
        )
        return {
            "audio_key": storage_path,
            "duration": max_duration,
            "tracks_mixed": len(temp_inputs),
        }
    except Exception as e:
        logger.error("[Hatchet] mixdown_tracks failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "mixdown_tracks", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[mixdown_tracks], execution_timeout=timedelta(seconds=120), retries=3
 )
 async def generate_waveform(input: PipelineInput, ctx: Context) -> dict:
    """Generate audio waveform visualization."""
    logger.info("[Hatchet] generate_waveform", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "generate_waveform", "in_progress", ctx.workflow_run_id
    )
    try:
        mixdown_data = ctx.task_output(mixdown_tracks)
        audio_key = mixdown_data.get("audio_key")
        storage = _get_storage()
        audio_url = await storage.get_file_url(
            audio_key,
            operation="get_object",
            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
        )
        from reflector.pipelines.waveform_helpers import generate_waveform_data
        waveform = await generate_waveform_data(audio_url)
        # Store waveform
        waveform_key = f"file_pipeline_hatchet/{input.transcript_id}/waveform.json"
        import json
        waveform_bytes = json.dumps(waveform).encode()
        import io
        await storage.put_file(waveform_key, io.BytesIO(waveform_bytes))
        logger.info("[Hatchet] generate_waveform complete")
        await emit_progress_async(
            input.transcript_id, "generate_waveform", "completed", ctx.workflow_run_id
        )
        return {"waveform_key": waveform_key}
    except Exception as e:
        logger.error("[Hatchet] generate_waveform failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "generate_waveform", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[mixdown_tracks], execution_timeout=timedelta(seconds=300), retries=3
 )
 async def detect_topics(input: PipelineInput, ctx: Context) -> dict:
    """Detect topics using LLM."""
    logger.info("[Hatchet] detect_topics", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "detect_topics", "in_progress", ctx.workflow_run_id
    )
    try:
        track_data = ctx.task_output(process_tracks)
        words = track_data.get("all_words", [])
        from reflector.pipelines import topic_processing
        from reflector.processors.types import Transcript as TranscriptType
        from reflector.processors.types import Word
        # Convert word dicts to Word objects
        word_objects = [Word(**w) for w in words]
        transcript = TranscriptType(words=word_objects)
        empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
        async def noop_callback(t):
            pass
        topics = await topic_processing.detect_topics(
            transcript,
            "en",  # target_language
            on_topic_callback=noop_callback,
            empty_pipeline=empty_pipeline,
        )
        topics_list = [t.model_dump() for t in topics]
        logger.info("[Hatchet] detect_topics complete", topic_count=len(topics_list))
        await emit_progress_async(
            input.transcript_id, "detect_topics", "completed", ctx.workflow_run_id
        )
        return {"topics": topics_list}
    except Exception as e:
        logger.error("[Hatchet] detect_topics failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "detect_topics", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[detect_topics], execution_timeout=timedelta(seconds=120), retries=3
 )
 async def generate_title(input: PipelineInput, ctx: Context) -> dict:
    """Generate meeting title using LLM."""
    logger.info("[Hatchet] generate_title", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "generate_title", "in_progress", ctx.workflow_run_id
    )
    try:
        topics_data = ctx.task_output(detect_topics)
        topics = topics_data.get("topics", [])
        from reflector.pipelines import topic_processing
        from reflector.processors.types import Topic
        topic_objects = [Topic(**t) for t in topics]
        title = await topic_processing.generate_title(topic_objects)
        logger.info("[Hatchet] generate_title complete", title=title)
        await emit_progress_async(
            input.transcript_id, "generate_title", "completed", ctx.workflow_run_id
        )
        return {"title": title}
    except Exception as e:
        logger.error("[Hatchet] generate_title failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "generate_title", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[detect_topics], execution_timeout=timedelta(seconds=300), retries=3
 )
 async def generate_summary(input: PipelineInput, ctx: Context) -> dict:
    """Generate meeting summary using LLM."""
    logger.info("[Hatchet] generate_summary", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "generate_summary", "in_progress", ctx.workflow_run_id
    )
    try:
        track_data = ctx.task_output(process_tracks)
        topics_data = ctx.task_output(detect_topics)
        words = track_data.get("all_words", [])
        topics = topics_data.get("topics", [])
        from reflector.pipelines import topic_processing
        from reflector.processors.types import Topic, Word
        from reflector.processors.types import Transcript as TranscriptType
        word_objects = [Word(**w) for w in words]
        transcript = TranscriptType(words=word_objects)
        topic_objects = [Topic(**t) for t in topics]
        summary, short_summary = await topic_processing.generate_summary(
            transcript, topic_objects
        )
        logger.info("[Hatchet] generate_summary complete")
        await emit_progress_async(
            input.transcript_id, "generate_summary", "completed", ctx.workflow_run_id
        )
        return {"summary": summary, "short_summary": short_summary}
    except Exception as e:
        logger.error("[Hatchet] generate_summary failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "generate_summary", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[generate_waveform, generate_title, generate_summary],
    execution_timeout=timedelta(seconds=60),
    retries=3,
 )
 async def finalize(input: PipelineInput, ctx: Context) -> dict:
    """Finalize transcript status and update database."""
    logger.info("[Hatchet] finalize", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "finalize", "in_progress", ctx.workflow_run_id
    )
    try:
        title_data = ctx.task_output(generate_title)
        summary_data = ctx.task_output(generate_summary)
        mixdown_data = ctx.task_output(mixdown_tracks)
        track_data = ctx.task_output(process_tracks)
        title = title_data.get("title", "")
        summary = summary_data.get("summary", "")
        short_summary = summary_data.get("short_summary", "")
        duration = mixdown_data.get("duration", 0)
        all_words = track_data.get("all_words", [])
        db = await _get_fresh_db_connection()
        try:
            from reflector.db.transcripts import transcripts_controller
            from reflector.processors.types import Word
            transcript = await transcripts_controller.get_by_id(input.transcript_id)
            if transcript is None:
                raise ValueError(
                    f"Transcript {input.transcript_id} not found in database"
                )
            # Convert words back to Word objects for storage
            word_objects = [Word(**w) for w in all_words]
            await transcripts_controller.update(
                transcript,
                {
                    "status": "ended",
                    "title": title,
                    "long_summary": summary,
                    "short_summary": short_summary,
                    "duration": duration,
                    "words": word_objects,
                },
            )
            logger.info(
                "[Hatchet] finalize complete", transcript_id=input.transcript_id
            )
        finally:
            await _close_db_connection(db)
        await emit_progress_async(
            input.transcript_id, "finalize", "completed", ctx.workflow_run_id
        )
        return {"status": "COMPLETED"}
    except Exception as e:
        logger.error("[Hatchet] finalize failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "finalize", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[finalize], execution_timeout=timedelta(seconds=60), retries=3
 )
 async def cleanup_consent(input: PipelineInput, ctx: Context) -> dict:
    """Check and handle consent requirements."""
    logger.info("[Hatchet] cleanup_consent", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "cleanup_consent", "in_progress", ctx.workflow_run_id
    )
    try:
        db = await _get_fresh_db_connection()
        try:
            from reflector.db.meetings import meetings_controller
            from reflector.db.transcripts import transcripts_controller
            transcript = await transcripts_controller.get_by_id(input.transcript_id)
            if transcript and transcript.meeting_id:
                meeting = await meetings_controller.get_by_id(transcript.meeting_id)
                if meeting:
                    # Check consent logic here
                    # For now just mark as checked
                    pass
            logger.info(
                "[Hatchet] cleanup_consent complete", transcript_id=input.transcript_id
            )
        finally:
            await _close_db_connection(db)
        await emit_progress_async(
            input.transcript_id, "cleanup_consent", "completed", ctx.workflow_run_id
        )
        return {"consent_checked": True}
    except Exception as e:
        logger.error("[Hatchet] cleanup_consent failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "cleanup_consent", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[cleanup_consent], execution_timeout=timedelta(seconds=60), retries=5
 )
 async def post_zulip(input: PipelineInput, ctx: Context) -> dict:
    """Post notification to Zulip."""
    logger.info("[Hatchet] post_zulip", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "post_zulip", "in_progress", ctx.workflow_run_id
    )
    try:
        from reflector.settings import settings
        if not settings.ZULIP_REALM:
            logger.info("[Hatchet] post_zulip skipped (Zulip not configured)")
            await emit_progress_async(
                input.transcript_id, "post_zulip", "completed", ctx.workflow_run_id
            )
            return {"zulip_message_id": None, "skipped": True}
        from reflector.zulip import post_transcript_notification
        db = await _get_fresh_db_connection()
        try:
            from reflector.db.transcripts import transcripts_controller
            transcript = await transcripts_controller.get_by_id(input.transcript_id)
            if transcript:
                message_id = await post_transcript_notification(transcript)
                logger.info(
                    "[Hatchet] post_zulip complete", zulip_message_id=message_id
                )
            else:
                message_id = None
        finally:
            await _close_db_connection(db)
        await emit_progress_async(
            input.transcript_id, "post_zulip", "completed", ctx.workflow_run_id
        )
        return {"zulip_message_id": message_id}
    except Exception as e:
        logger.error("[Hatchet] post_zulip failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "post_zulip", "failed", ctx.workflow_run_id
        )
        raise
@diarization_pipeline.task(
    parents=[post_zulip], execution_timeout=timedelta(seconds=120), retries=30
 )
 async def send_webhook(input: PipelineInput, ctx: Context) -> dict:
    """Send completion webhook to external service."""
    logger.info("[Hatchet] send_webhook", transcript_id=input.transcript_id)
    await emit_progress_async(
        input.transcript_id, "send_webhook", "in_progress", ctx.workflow_run_id
    )
    try:
        if not input.room_id:
            logger.info("[Hatchet] send_webhook skipped (no room_id)")
            await emit_progress_async(
                input.transcript_id, "send_webhook", "completed", ctx.workflow_run_id
            )
            return {"webhook_sent": False, "skipped": True}
        db = await _get_fresh_db_connection()
        try:
            from reflector.db.rooms import rooms_controller
            from reflector.db.transcripts import transcripts_controller
            room = await rooms_controller.get_by_id(input.room_id)
            transcript = await transcripts_controller.get_by_id(input.transcript_id)
            if room and room.webhook_url and transcript:
                import httpx
                webhook_payload = {
                    "event": "transcript.completed",
                    "transcript_id": input.transcript_id,
                    "title": transcript.title,
                    "duration": transcript.duration,
                }
                async with httpx.AsyncClient() as client:
                    response = await client.post(
                        room.webhook_url, json=webhook_payload, timeout=30
                    )
                    response.raise_for_status()
                logger.info(
                    "[Hatchet] send_webhook complete", status_code=response.status_code
                )
                await emit_progress_async(
                    input.transcript_id,
                    "send_webhook",
                    "completed",
                    ctx.workflow_run_id,
                )
                return {"webhook_sent": True, "response_code": response.status_code}
        finally:
            await _close_db_connection(db)
        await emit_progress_async(
            input.transcript_id, "send_webhook", "completed", ctx.workflow_run_id
        )
        return {"webhook_sent": False, "skipped": True}
    except Exception as e:
        logger.error("[Hatchet] send_webhook failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "send_webhook", "failed", ctx.workflow_run_id
        )
        raise
--- a/server/reflector/hatchet/workflows/track_processing.py
+++ b/server/reflector/hatchet/workflows/track_processing.py
@@ -0,0 +1,337 @@
 """
 Hatchet child workflow: TrackProcessing
 Handles individual audio track processing: padding and transcription.
 Spawned dynamically by the main diarization pipeline for each track.
 """
 import math
 import tempfile
 from datetime import timedelta
 from fractions import Fraction
 from pathlib import Path
 import av
 from av.audio.resampler import AudioResampler
 from hatchet_sdk import Context
 from pydantic import BaseModel
 from reflector.hatchet.client import HatchetClientManager
 from reflector.hatchet.progress import emit_progress_async
 from reflector.logger import logger
 # Audio constants matching existing pipeline
 OPUS_STANDARD_SAMPLE_RATE = 48000
 OPUS_DEFAULT_BIT_RATE = 64000
 PRESIGNED_URL_EXPIRATION_SECONDS = 7200
 class TrackInput(BaseModel):
    """Input for individual track processing."""
    track_index: int
    s3_key: str
    bucket_name: str
    transcript_id: str
    language: str = "en"
 # Get hatchet client and define workflow
 hatchet = HatchetClientManager.get_client()
 track_workflow = hatchet.workflow(name="TrackProcessing", input_validator=TrackInput)
 def _extract_stream_start_time_from_container(container, track_idx: int) -> float:
    """Extract meeting-relative start time from WebM stream metadata.
    Uses PyAV to read stream.start_time from WebM container.
    More accurate than filename timestamps by ~209ms due to network/encoding delays.
    """
    start_time_seconds = 0.0
    try:
        audio_streams = [s for s in container.streams if s.type == "audio"]
        stream = audio_streams[0] if audio_streams else container.streams[0]
        # 1) Try stream-level start_time (most reliable for Daily.co tracks)
        if stream.start_time is not None and stream.time_base is not None:
            start_time_seconds = float(stream.start_time * stream.time_base)
        # 2) Fallback to container-level start_time
        if (start_time_seconds <= 0) and (container.start_time is not None):
            start_time_seconds = float(container.start_time * av.time_base)
        # 3) Fallback to first packet DTS
        if start_time_seconds <= 0:
            for packet in container.demux(stream):
                if packet.dts is not None:
                    start_time_seconds = float(packet.dts * stream.time_base)
                    break
    except Exception as e:
        logger.warning(
            "PyAV metadata read failed; assuming 0 start_time",
            track_idx=track_idx,
            error=str(e),
        )
        start_time_seconds = 0.0
    logger.info(
        f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s",
        track_idx=track_idx,
    )
    return start_time_seconds
 def _apply_audio_padding_to_file(
    in_container,
    output_path: str,
    start_time_seconds: float,
    track_idx: int,
 ) -> None:
    """Apply silence padding to audio track using PyAV filter graph."""
    delay_ms = math.floor(start_time_seconds * 1000)
    logger.info(
        f"Padding track {track_idx} with {delay_ms}ms delay using PyAV",
        track_idx=track_idx,
        delay_ms=delay_ms,
    )
    with av.open(output_path, "w", format="webm") as out_container:
        in_stream = next((s for s in in_container.streams if s.type == "audio"), None)
        if in_stream is None:
            raise Exception("No audio stream in input")
        out_stream = out_container.add_stream("libopus", rate=OPUS_STANDARD_SAMPLE_RATE)
        out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
        graph = av.filter.Graph()
        abuf_args = (
            f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
            f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
            f"sample_fmt=s16:"
            f"channel_layout=stereo"
        )
        src = graph.add("abuffer", args=abuf_args, name="src")
        aresample_f = graph.add("aresample", args="async=1", name="ares")
        delays_arg = f"{delay_ms}|{delay_ms}"
        adelay_f = graph.add("adelay", args=f"delays={delays_arg}:all=1", name="delay")
        sink = graph.add("abuffersink", name="sink")
        src.link_to(aresample_f)
        aresample_f.link_to(adelay_f)
        adelay_f.link_to(sink)
        graph.configure()
        resampler = AudioResampler(
            format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
        )
        for frame in in_container.decode(in_stream):
            out_frames = resampler.resample(frame) or []
            for rframe in out_frames:
                rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
                rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
                src.push(rframe)
                while True:
                    try:
                        f_out = sink.pull()
                    except Exception:
                        break
                    f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
                    f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
                    for packet in out_stream.encode(f_out):
                        out_container.mux(packet)
        # Flush remaining frames
        src.push(None)
        while True:
            try:
                f_out = sink.pull()
            except Exception:
                break
            f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
            f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
            for packet in out_stream.encode(f_out):
                out_container.mux(packet)
        for packet in out_stream.encode(None):
            out_container.mux(packet)
@track_workflow.task(execution_timeout=timedelta(seconds=300), retries=3)
 async def pad_track(input: TrackInput, ctx: Context) -> dict:
    """Pad single audio track with silence for alignment.
    Extracts stream.start_time from WebM container metadata and applies
    silence padding using PyAV filter graph (adelay).
    """
    logger.info(
        "[Hatchet] pad_track",
        track_index=input.track_index,
        s3_key=input.s3_key,
        transcript_id=input.transcript_id,
    )
    await emit_progress_async(
        input.transcript_id, "pad_track", "in_progress", ctx.workflow_run_id
    )
    try:
        # Create fresh storage instance to avoid aioboto3 fork issues
        from reflector.settings import settings
        from reflector.storage.storage_aws import AwsStorage
        storage = AwsStorage(
            aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
            aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
            aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
            aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
        )
        # Get presigned URL for source file
        source_url = await storage.get_file_url(
            input.s3_key,
            operation="get_object",
            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
            bucket=input.bucket_name,
        )
        # Open container and extract start time
        with av.open(source_url) as in_container:
            start_time_seconds = _extract_stream_start_time_from_container(
                in_container, input.track_index
            )
            # If no padding needed, return original URL
            if start_time_seconds <= 0:
                logger.info(
                    f"Track {input.track_index} requires no padding",
                    track_index=input.track_index,
                )
                await emit_progress_async(
                    input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
                )
                return {
                    "padded_url": source_url,
                    "size": 0,
                    "track_index": input.track_index,
                }
            # Create temp file for padded output
            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
                temp_path = temp_file.name
            try:
                _apply_audio_padding_to_file(
                    in_container, temp_path, start_time_seconds, input.track_index
                )
                file_size = Path(temp_path).stat().st_size
                storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
                logger.info(
                    f"About to upload padded track",
                    key=storage_path,
                    size=file_size,
                )
                with open(temp_path, "rb") as padded_file:
                    await storage.put_file(storage_path, padded_file)
                logger.info(
                    f"Uploaded padded track to S3",
                    key=storage_path,
                    size=file_size,
                )
            finally:
                Path(temp_path).unlink(missing_ok=True)
        # Get presigned URL for padded file
        padded_url = await storage.get_file_url(
            storage_path,
            operation="get_object",
            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
        )
        logger.info(
            "[Hatchet] pad_track complete",
            track_index=input.track_index,
            padded_url=padded_url[:50] + "...",
        )
        await emit_progress_async(
            input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
        )
        return {
            "padded_url": padded_url,
            "size": file_size,
            "track_index": input.track_index,
        }
    except Exception as e:
        logger.error("[Hatchet] pad_track failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "pad_track", "failed", ctx.workflow_run_id
        )
        raise
@track_workflow.task(
    parents=[pad_track], execution_timeout=timedelta(seconds=600), retries=3
 )
 async def transcribe_track(input: TrackInput, ctx: Context) -> dict:
    """Transcribe audio track using GPU (Modal.com) or local Whisper."""
    logger.info(
        "[Hatchet] transcribe_track",
        track_index=input.track_index,
        language=input.language,
    )
    await emit_progress_async(
        input.transcript_id, "transcribe_track", "in_progress", ctx.workflow_run_id
    )
    try:
        pad_result = ctx.task_output(pad_track)
        audio_url = pad_result.get("padded_url")
        if not audio_url:
            raise ValueError("Missing padded_url from pad_track")
        from reflector.pipelines.transcription_helpers import (
            transcribe_file_with_processor,
        )
        transcript = await transcribe_file_with_processor(audio_url, input.language)
        # Tag all words with speaker index
        words = []
        for word in transcript.words:
            word_dict = word.model_dump()
            word_dict["speaker"] = input.track_index
            words.append(word_dict)
        logger.info(
            "[Hatchet] transcribe_track complete",
            track_index=input.track_index,
            word_count=len(words),
        )
        await emit_progress_async(
            input.transcript_id, "transcribe_track", "completed", ctx.workflow_run_id
        )
        return {
            "words": words,
            "track_index": input.track_index,
        }
    except Exception as e:
        logger.error("[Hatchet] transcribe_track failed", error=str(e), exc_info=True)
        await emit_progress_async(
            input.transcript_id, "transcribe_track", "failed", ctx.workflow_run_id
        )
        raise
--- a/server/reflector/services/transcript_process.py
+++ b/server/reflector/services/transcript_process.py
@@ -15,6 +15,7 @@ from celery.result import AsyncResult
 from reflector.conductor.client import ConductorClientManager
 from reflector.db.recordings import recordings_controller
 from reflector.db.transcripts import Transcript
 from reflector.hatchet.client import HatchetClientManager
 from reflector.logger import logger
 from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
 from reflector.pipelines.main_multitrack_pipeline import (
@@ -156,8 +157,47 @@ async def prepare_transcript_processing(
 def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult | None:
    if isinstance(config, MultitrackProcessingConfig):
-        # Start Conductor workflow if enabled
+        # Start durable workflow if enabled (Hatchet or Conductor)
-        if settings.CONDUCTOR_ENABLED:
+        durable_started = False
        if settings.HATCHET_ENABLED:
            import asyncio
            async def _start_hatchet():
                return await HatchetClientManager.start_workflow(
                    workflow_name="DiarizationPipeline",
                    input_data={
                        "recording_id": config.recording_id,
                        "room_name": None,  # Not available in reprocess path
                        "tracks": [{"s3_key": k} for k in config.track_keys],
                        "bucket_name": config.bucket_name,
                        "transcript_id": config.transcript_id,
                        "room_id": config.room_id,
                    },
                )
            try:
                loop = asyncio.get_running_loop()
            except RuntimeError:
                loop = None
            if loop and loop.is_running():
                # Already in async context
                import concurrent.futures
                with concurrent.futures.ThreadPoolExecutor() as pool:
                    workflow_id = pool.submit(asyncio.run, _start_hatchet()).result()
            else:
                workflow_id = asyncio.run(_start_hatchet())
            logger.info(
                "Started Hatchet workflow (reprocess)",
                workflow_id=workflow_id,
                transcript_id=config.transcript_id,
            )
            durable_started = True
        elif settings.CONDUCTOR_ENABLED:
            workflow_id = ConductorClientManager.start_workflow(
                name="diarization_pipeline",
                version=1,
@@ -175,11 +215,13 @@ def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult | No
                workflow_id=workflow_id,
                transcript_id=config.transcript_id,
            )
            durable_started = True
-            if not settings.CONDUCTOR_SHADOW_MODE:
+        # If durable workflow started and not in shadow mode, skip Celery
-                return None  # Conductor-only, no Celery result
+        if durable_started and not settings.DURABLE_WORKFLOW_SHADOW_MODE:
            return None
-        # Celery pipeline (shadow mode or Conductor disabled)
+        # Celery pipeline (shadow mode or durable workflows disabled)
        return task_pipeline_multitrack_process.delay(
            transcript_id=config.transcript_id,
            bucket_name=config.bucket_name,
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -150,11 +150,34 @@ class Settings(BaseSettings):
    ZULIP_API_KEY: str | None = None
    ZULIP_BOT_EMAIL: str | None = None
    # Durable workflow orchestration
    # Provider: "hatchet" or "conductor" (or "none" to disable)
    DURABLE_WORKFLOW_PROVIDER: str = "none"
    DURABLE_WORKFLOW_SHADOW_MODE: bool = False  # Run both provider + Celery
    # Conductor workflow orchestration
    CONDUCTOR_SERVER_URL: str = "http://conductor:8080/api"
    CONDUCTOR_DEBUG: bool = False
-    CONDUCTOR_ENABLED: bool = False
+
-    CONDUCTOR_SHADOW_MODE: bool = False
+    # Hatchet workflow orchestration
    HATCHET_CLIENT_TOKEN: str | None = None
    HATCHET_CLIENT_TLS_STRATEGY: str = "none"  # none, tls, mtls
    HATCHET_DEBUG: bool = False
    @property
    def CONDUCTOR_ENABLED(self) -> bool:
        """Legacy compatibility: True if Conductor is the active provider."""
        return self.DURABLE_WORKFLOW_PROVIDER == "conductor"
    @property
    def HATCHET_ENABLED(self) -> bool:
        """True if Hatchet is the active provider."""
        return self.DURABLE_WORKFLOW_PROVIDER == "hatchet"
    @property
    def CONDUCTOR_SHADOW_MODE(self) -> bool:
        """Legacy compatibility for shadow mode."""
        return self.DURABLE_WORKFLOW_SHADOW_MODE and self.CONDUCTOR_ENABLED
 settings = Settings()
--- a/server/reflector/views/hatchet.py
+++ b/server/reflector/views/hatchet.py
@@ -0,0 +1,57 @@
 """Hatchet health and status endpoints."""
 from fastapi import APIRouter
 from reflector.settings import settings
 router = APIRouter(prefix="/hatchet", tags=["hatchet"])
@router.get("/health")
 async def hatchet_health():
    """Check Hatchet connectivity and status."""
    if not settings.HATCHET_ENABLED:
        return {"status": "disabled", "connected": False}
    if not settings.HATCHET_CLIENT_TOKEN:
        return {
            "status": "unhealthy",
            "connected": False,
            "error": "HATCHET_CLIENT_TOKEN not configured",
        }
    try:
        from reflector.hatchet.client import HatchetClientManager
        # Get client to verify token is valid
        client = HatchetClientManager.get_client()
        # Try to get the client's gRPC connection status
        # The SDK doesn't have a simple health check, so we just verify we can create the client
        if client is not None:
            return {"status": "healthy", "connected": True}
        else:
            return {
                "status": "unhealthy",
                "connected": False,
                "error": "Failed to create client",
            }
    except ValueError as e:
        return {"status": "unhealthy", "connected": False, "error": str(e)}
    except Exception as e:
        return {"status": "unhealthy", "connected": False, "error": str(e)}
@router.get("/workflow/{workflow_run_id}")
 async def get_workflow_status(workflow_run_id: str):
    """Get the status of a workflow run."""
    if not settings.HATCHET_ENABLED:
        return {"error": "Hatchet is disabled"}
    try:
        from reflector.hatchet.client import HatchetClientManager
        status = await HatchetClientManager.get_workflow_status(workflow_run_id)
        return status
    except Exception as e:
        return {"error": str(e)}
--- a/server/reflector/worker/process.py
+++ b/server/reflector/worker/process.py
@@ -286,8 +286,34 @@ async def _process_multitrack_recording_inner(
            room_id=room.id,
        )
-    # Start Conductor workflow if enabled
+    # Start durable workflow if enabled (Hatchet or Conductor)
-    if settings.CONDUCTOR_ENABLED:
+    durable_started = False
    if settings.HATCHET_ENABLED:
        from reflector.hatchet.client import HatchetClientManager  # noqa: PLC0415
        workflow_id = await HatchetClientManager.start_workflow(
            workflow_name="DiarizationPipeline",
            input_data={
                "recording_id": recording_id,
                "room_name": daily_room_name,
                "tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
                "bucket_name": bucket_name,
                "transcript_id": transcript.id,
                "room_id": room.id,
            },
        )
        logger.info(
            "Started Hatchet workflow",
            workflow_id=workflow_id,
            transcript_id=transcript.id,
        )
        # Store workflow_id on recording for status tracking
        await recordings_controller.update(recording, {"workflow_id": workflow_id})
        durable_started = True
    elif settings.CONDUCTOR_ENABLED:
        from reflector.conductor.client import ConductorClientManager  # noqa: PLC0415
        workflow_id = ConductorClientManager.start_workflow(
@@ -310,11 +336,13 @@ async def _process_multitrack_recording_inner(
        # Store workflow_id on recording for status tracking
        await recordings_controller.update(recording, {"workflow_id": workflow_id})
        durable_started = True
-        if not settings.CONDUCTOR_SHADOW_MODE:
+    # If durable workflow started and not in shadow mode, skip Celery
-            return  # Don't trigger Celery
+    if durable_started and not settings.DURABLE_WORKFLOW_SHADOW_MODE:
        return
-    # Celery pipeline (runs when Conductor disabled OR in shadow mode)
+    # Celery pipeline (runs when durable workflows disabled OR in shadow mode)
    task_pipeline_multitrack_process.delay(
        transcript_id=transcript.id,
        bucket_name=bucket_name,
--- a/server/runserver.sh
+++ b/server/runserver.sh
@@ -9,6 +9,8 @@ elif [ "${ENTRYPOINT}" = "beat" ]; then
    uv run celery -A reflector.worker.app beat --loglevel=info
 elif [ "${ENTRYPOINT}" = "conductor-worker" ]; then
    uv run python -m reflector.conductor.run_workers
 elif [ "${ENTRYPOINT}" = "hatchet-worker" ]; then
    uv run python -m reflector.hatchet.run_workers
 else
    echo "Unknown command"
 fi
--- a/server/uv.lock
+++ b/server/uv.lock
@@ -1218,6 +1218,70 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/bf/c4/a839fcc28bebfa72925d9121c4d39398f77f95bcba0cf26c972a0cfb1de7/griffe-1.8.0-py3-none-any.whl", hash = "sha256:110faa744b2c5c84dd432f4fa9aa3b14805dd9519777dd55e8db214320593b02", size = 132487 },
 ]
 [[package]]
 name = "grpcio"
 version = "1.76.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/a0/00/8163a1beeb6971f66b4bbe6ac9457b97948beba8dd2fc8e1281dce7f79ec/grpcio-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a", size = 5843567 },
    { url = "https://files.pythonhosted.org/packages/10/c1/934202f5cf335e6d852530ce14ddb0fef21be612ba9ecbbcbd4d748ca32d/grpcio-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c", size = 11848017 },
    { url = "https://files.pythonhosted.org/packages/11/0b/8dec16b1863d74af6eb3543928600ec2195af49ca58b16334972f6775663/grpcio-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465", size = 6412027 },
    { url = "https://files.pythonhosted.org/packages/d7/64/7b9e6e7ab910bea9d46f2c090380bab274a0b91fb0a2fe9b0cd399fffa12/grpcio-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48", size = 7075913 },
    { url = "https://files.pythonhosted.org/packages/68/86/093c46e9546073cefa789bd76d44c5cb2abc824ca62af0c18be590ff13ba/grpcio-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da", size = 6615417 },
    { url = "https://files.pythonhosted.org/packages/f7/b6/5709a3a68500a9c03da6fb71740dcdd5ef245e39266461a03f31a57036d8/grpcio-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397", size = 7199683 },
    { url = "https://files.pythonhosted.org/packages/91/d3/4b1f2bf16ed52ce0b508161df3a2d186e4935379a159a834cb4a7d687429/grpcio-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749", size = 8163109 },
    { url = "https://files.pythonhosted.org/packages/5c/61/d9043f95f5f4cf085ac5dd6137b469d41befb04bd80280952ffa2a4c3f12/grpcio-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00", size = 7626676 },
    { url = "https://files.pythonhosted.org/packages/36/95/fd9a5152ca02d8881e4dd419cdd790e11805979f499a2e5b96488b85cf27/grpcio-1.76.0-cp311-cp311-win32.whl", hash = "sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054", size = 3997688 },
    { url = "https://files.pythonhosted.org/packages/60/9c/5c359c8d4c9176cfa3c61ecd4efe5affe1f38d9bae81e81ac7186b4c9cc8/grpcio-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d", size = 4709315 },
    { url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718 },
    { url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627 },
    { url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167 },
    { url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267 },
    { url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963 },
    { url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484 },
    { url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777 },
    { url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014 },
    { url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750 },
    { url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003 },
 ]
 [[package]]
 name = "grpcio-tools"
 version = "1.76.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "grpcio" },
    { name = "protobuf" },
    { name = "setuptools" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a0/77/17d60d636ccd86a0db0eccc24d02967bbc3eea86b9db7324b04507ebaa40/grpcio_tools-1.76.0.tar.gz", hash = "sha256:ce80169b5e6adf3e8302f3ebb6cb0c3a9f08089133abca4b76ad67f751f5ad88", size = 5390807 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/73/d1/efbeed1a864c846228c0a3b322e7a2d6545f025e35246aebf96496a36004/grpcio_tools-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6480f6af6833850a85cca1c6b435ef4ffd2ac8e88ef683b4065233827950243", size = 2545931 },
    { url = "https://files.pythonhosted.org/packages/af/8e/f257c0f565d9d44658301238b01a9353bc6f3b272bb4191faacae042579d/grpcio_tools-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c7c23fe1dc09818e16a48853477806ad77dd628b33996f78c05a293065f8210c", size = 5844794 },
    { url = "https://files.pythonhosted.org/packages/c7/c0/6c1e89c67356cb20e19ed670c5099b13e40fd678cac584c778f931666a86/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fcdce7f7770ff052cd4e60161764b0b3498c909bde69138f8bd2e7b24a3ecd8f", size = 2591772 },
    { url = "https://files.pythonhosted.org/packages/c0/10/5f33aa7bc3ddaad0cfd2f4e950ac4f1a310e8d0c7b1358622a581e8b7a2f/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b598fdcebffa931c7da5c9e90b5805fff7e9bc6cf238319358a1b85704c57d33", size = 2905140 },
    { url = "https://files.pythonhosted.org/packages/f4/3e/23e3a52a77368f47188ed83c34eb53866d3ce0f73835b2f6764844ae89eb/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6a9818ff884796b12dcf8db32126e40ec1098cacf5697f27af9cfccfca1c1fae", size = 2656475 },
    { url = "https://files.pythonhosted.org/packages/51/85/a74ae87ec7dbd3d2243881f5c548215aed1148660df7945be3a125ba9a21/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:105e53435b2eed3961da543db44a2a34479d98d18ea248219856f30a0ca4646b", size = 3106158 },
    { url = "https://files.pythonhosted.org/packages/54/d5/a6ed1e5823bc5d55a1eb93e0c14ccee0b75951f914832ab51fb64d522a0f/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:454a1232c7f99410d92fa9923c7851fd4cdaf657ee194eac73ea1fe21b406d6e", size = 3654980 },
    { url = "https://files.pythonhosted.org/packages/f9/29/c05d5501ba156a242079ef71d073116d2509c195b5e5e74c545f0a3a3a69/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ca9ccf667afc0268d45ab202af4556c72e57ea36ebddc93535e1a25cbd4f8aba", size = 3322658 },
    { url = "https://files.pythonhosted.org/packages/02/b6/ee0317b91da19a7537d93c4161cbc2a45a165c8893209b0bbd470d830ffa/grpcio_tools-1.76.0-cp311-cp311-win32.whl", hash = "sha256:a83c87513b708228b4cad7619311daba65b40937745103cadca3db94a6472d9c", size = 993837 },
    { url = "https://files.pythonhosted.org/packages/81/63/9623cadf0406b264737f16d4ed273bb2d65001d87fbd803b565c45d665d1/grpcio_tools-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:2ce5e87ec71f2e4041dce4351f2a8e3b713e3bca6b54c69c3fbc6c7ad1f4c386", size = 1158634 },
    { url = "https://files.pythonhosted.org/packages/4f/ca/a931c1439cabfe305c9afd07e233150cd0565aa062c20d1ee412ed188852/grpcio_tools-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:4ad555b8647de1ebaffb25170249f89057721ffb74f7da96834a07b4855bb46a", size = 2546852 },
    { url = "https://files.pythonhosted.org/packages/4c/07/935cfbb7dccd602723482a86d43fbd992f91e9867bca0056a1e9f348473e/grpcio_tools-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:243af7c8fc7ff22a40a42eb8e0f6f66963c1920b75aae2a2ec503a9c3c8b31c1", size = 5841777 },
    { url = "https://files.pythonhosted.org/packages/e4/92/8fcb5acebdccb647e0fa3f002576480459f6cf81e79692d7b3c4d6e29605/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8207b890f423142cc0025d041fb058f7286318df6a049565c27869d73534228b", size = 2594004 },
    { url = "https://files.pythonhosted.org/packages/9d/ea/64838e8113b7bfd4842b15c815a7354cb63242fdce9d6648d894b5d50897/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3dafa34c2626a6691d103877e8a145f54c34cf6530975f695b396ed2fc5c98f8", size = 2905563 },
    { url = "https://files.pythonhosted.org/packages/a6/d6/53798827d821098219e58518b6db52161ce4985620850aa74ce3795da8a7/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:30f1d2dda6ece285b3d9084e94f66fa721ebdba14ae76b2bc4c581c8a166535c", size = 2656936 },
    { url = "https://files.pythonhosted.org/packages/89/a3/d9c1cefc46a790eec520fe4e70e87279abb01a58b1a3b74cf93f62b824a2/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a889af059dc6dbb82d7b417aa581601316e364fe12eb54c1b8d95311ea50916d", size = 3109811 },
    { url = "https://files.pythonhosted.org/packages/50/75/5997752644b73b5d59377d333a51c8a916606df077f5a487853e37dca289/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c3f2c3c44c56eb5d479ab178f0174595d0a974c37dade442f05bb73dfec02f31", size = 3658786 },
    { url = "https://files.pythonhosted.org/packages/84/47/dcf8380df4bd7931ffba32fc6adc2de635b6569ca27fdec7121733797062/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:479ce02dff684046f909a487d452a83a96b4231f7c70a3b218a075d54e951f56", size = 3325144 },
    { url = "https://files.pythonhosted.org/packages/04/88/ea3e5fdb874d8c2d04488e4b9d05056537fba70915593f0c283ac77df188/grpcio_tools-1.76.0-cp312-cp312-win32.whl", hash = "sha256:9ba4bb539936642a44418b38ee6c3e8823c037699e2cb282bd8a44d76a4be833", size = 993523 },
    { url = "https://files.pythonhosted.org/packages/de/b1/ce7d59d147675ec191a55816be46bc47a343b5ff07279eef5817c09cc53e/grpcio_tools-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:0cd489016766b05f9ed8a6b6596004b62c57d323f49593eac84add032a6d43f7", size = 1158493 },
 ]
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -1227,6 +1291,27 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
 ]
 [[package]]
 name = "hatchet-sdk"
 version = "1.21.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "aiohttp" },
    { name = "grpcio" },
    { name = "grpcio-tools" },
    { name = "prometheus-client" },
    { name = "protobuf" },
    { name = "pydantic" },
    { name = "pydantic-settings" },
    { name = "python-dateutil" },
    { name = "tenacity" },
    { name = "urllib3" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7c/df/75dd02e1dc6b99f7151a57f084876c50f739ad4d643b060078f65d51d717/hatchet_sdk-1.21.6.tar.gz", hash = "sha256:b65741324ad721ce57f5fe3f960e2942c4ac2ceec6ca483dd35f84137ff7c46c", size = 219345 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/00/86/e4cd7928bcabd33c634c33d4e878e2454e03f97c87b72947c7ff5762d813/hatchet_sdk-1.21.6-py3-none-any.whl", hash = "sha256:589fba9104a6517e1ba677b9865fa0a20e221863a8c2a2724051198994c11399", size = 529167 },
 ]
 [[package]]
 name = "hf-xet"
 version = "1.1.5"
@@ -3150,6 +3235,7 @@ dependencies = [
    { name = "databases", extra = ["aiosqlite", "asyncpg"] },
    { name = "fastapi", extra = ["standard"] },
    { name = "fastapi-pagination" },
    { name = "hatchet-sdk" },
    { name = "httpx" },
    { name = "icalendar" },
    { name = "jsonschema" },
@@ -3227,6 +3313,7 @@ requires-dist = [
    { name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
    { name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
    { name = "fastapi-pagination", specifier = ">=0.12.6" },
    { name = "hatchet-sdk", specifier = ">=0.47.0" },
    { name = "httpx", specifier = ">=0.24.1" },
    { name = "icalendar", specifier = ">=6.0.0" },
    { name = "jsonschema", specifier = ">=4.23.0" },