self-review (no-mistakes)

2025-12-20 20:29:06 +00:00 · 2025-12-16 16:04:52 -05:00
parent e81e0cb5c3
commit fce0945564
9 changed files with 1034 additions and 1041 deletions
--- a/TASKS.md
+++ b/TASKS.md
@@ -1,115 +0,0 @@
-# Durable Workflow Migration Tasks
-
-This document defines atomic, isolated work items for migrating the Daily.co multitrack diarization pipeline from Celery to durable workflow orchestration using **Hatchet**.
-
---
-
-## Provider Selection
-
-```bash
-# .env
-DURABLE_WORKFLOW_PROVIDER=none      # Celery only (default)
-DURABLE_WORKFLOW_PROVIDER=hatchet   # Use Hatchet
-DURABLE_WORKFLOW_SHADOW_MODE=true   # Run both Hatchet + Celery (for comparison)
-```
-
---
-
-## Task Index
-
-| ID | Title | Status |
-|----|-------|--------|
-| INFRA-001 | Add container to docker-compose | Done |
-| INFRA-002 | Create Python client wrapper | Done |
-| INFRA-003 | Add environment configuration | Done |
-| TASK-001 | Create workflow definition | Done |
-| TASK-002 | get_recording task | Done |
-| TASK-003 | get_participants task | Done |
-| TASK-004 | pad_track task | Done |
-| TASK-005 | mixdown_tracks task | Done |
-| TASK-006 | generate_waveform task | Done |
-| TASK-007 | transcribe_track task | Done |
-| TASK-008 | merge_transcripts task | Done (in process_tracks) |
-| TASK-009 | detect_topics task | Done |
-| TASK-010 | generate_title task | Done |
-| TASK-011 | generate_summary task | Done |
-| TASK-012 | finalize task | Done |
-| TASK-013 | cleanup_consent task | Done |
-| TASK-014 | post_zulip task | Done |
-| TASK-015 | send_webhook task | Done |
-| EVENT-001 | Progress WebSocket events | Done |
-| INTEG-001 | Pipeline trigger integration | Done |
-| SHADOW-001 | Shadow mode toggle | Done |
-| TEST-001 | Integration tests | Pending |
-| TEST-002 | E2E workflow test | Pending |
-| CUTOVER-001 | Production cutover | Pending |
-| CLEANUP-001 | Remove Celery code | Pending |
-
---
-
-## File Structure
-
-```
-server/reflector/hatchet/
-├── client.py                    # SDK wrapper
-├── progress.py                  # WebSocket progress emission
-├── run_workers.py               # Worker startup
-└── workflows/
-    ├── diarization_pipeline.py  # Main workflow with all tasks
-    └── track_processing.py      # Child workflow (pad + transcribe)
-```
-
---
-
-## Remaining Work
-
-### TEST-001: Integration Tests
- [ ] Test each task with mocked external services
- [ ] Test error handling and retries
-
-### TEST-002: E2E Workflow Test
- [ ] Complete workflow run with real Daily.co recording
- [ ] Verify output matches Celery pipeline
- [ ] Performance comparison
-
-### CUTOVER-001: Production Cutover
- [ ] Deploy with `DURABLE_WORKFLOW_PROVIDER=hatchet`
- [ ] Monitor for failures
- [ ] Compare results with shadow mode if needed
-
-### CLEANUP-001: Remove Celery Code
- [ ] Remove `main_multitrack_pipeline.py`
- [ ] Remove Celery task triggers
- [ ] Update documentation
-
---
-
-## Known Issues
-
-### Hatchet
- See `HATCHET_LLM_OBSERVATIONS.md` for debugging notes
- SDK v1.21+ API changes (breaking)
- JWT token Docker networking issues
- Worker appears hung without debug mode
- Workflow replay is version-locked (use --force to run latest code)
-
---
-
-## Quick Start
-
-### Hatchet
-```bash
-# Start infrastructure
-docker compose up -d hatchet hatchet-worker
-
-# Workers auto-register on startup
-```
-
-### Trigger Workflow
-```bash
-# Set provider in .env
-DURABLE_WORKFLOW_PROVIDER=hatchet
-
-# Process a Daily.co recording via webhook or API
-# The pipeline trigger automatically uses the configured provider
-```
--- a/server/reflector/hatchet/client.py
+++ b/server/reflector/hatchet/client.py
@@ -1,37 +1,71 @@
-"""Hatchet Python client wrapper."""
+"""Hatchet Python client wrapper.

-from hatchet_sdk import Hatchet
+Uses singleton pattern because:
+1. Hatchet client maintains persistent gRPC connections for workflow registration
+2. Creating multiple clients would cause registration conflicts and resource leaks
+3. The SDK is designed for a single client instance per process
+4. Tests use `HatchetClientManager.reset()` to isolate state between tests
+"""
+
+import logging
+
+from hatchet_sdk import ClientConfig, Hatchet

 from reflector.logger import logger
 from reflector.settings import settings


 class HatchetClientManager:
-    """Singleton manager for Hatchet client connections."""
+    """Singleton manager for Hatchet client connections.
+
+    Singleton pattern is used because Hatchet SDK maintains persistent gRPC
+    connections for workflow registration, and multiple clients would conflict.
+
+    For testing, use the `reset()` method or the `reset_hatchet_client` fixture
+    to ensure test isolation.
+    """

    _instance: Hatchet | None = None

    @classmethod
    def get_client(cls) -> Hatchet:
-        """Get or create the Hatchet client."""
+        """Get or create the Hatchet client.
+
+        Configures root logger so all logger.info() calls in workflows
+        appear in the Hatchet dashboard logs.
+        """
        if cls._instance is None:
            if not settings.HATCHET_CLIENT_TOKEN:
                raise ValueError("HATCHET_CLIENT_TOKEN must be set")

+            # Pass root logger to Hatchet so workflow logs appear in dashboard
+            root_logger = logging.getLogger()
            cls._instance = Hatchet(
                debug=settings.HATCHET_DEBUG,
+                config=ClientConfig(logger=root_logger),
            )
        return cls._instance

    @classmethod
    async def start_workflow(
-        cls, workflow_name: str, input_data: dict, key: str | None = None
+        cls,
+        workflow_name: str,
+        input_data: dict,
+        additional_metadata: dict | None = None,
    ) -> str:
-        """Start a workflow and return the workflow run ID."""
+        """Start a workflow and return the workflow run ID.
+
+        Args:
+            workflow_name: Name of the workflow to trigger.
+            input_data: Input data for the workflow run.
+            additional_metadata: Optional metadata for filtering in dashboard
+                (e.g., transcript_id, recording_id).
+        """
        client = cls.get_client()
        result = await client.runs.aio_create(
            workflow_name,
            input_data,
+            additional_metadata=additional_metadata,
        )
        # SDK v1.21+ returns V1WorkflowRunDetails with run.metadata.id
        return result.run.metadata.id
--- a/server/reflector/hatchet/workflows/diarization_pipeline.py
+++ b/server/reflector/hatchet/workflows/diarization_pipeline.py
--- a/server/reflector/hatchet/workflows/models.py
+++ b/server/reflector/hatchet/workflows/models.py
@@ -0,0 +1,123 @@
+"""
+Pydantic models for Hatchet workflow task return types.
+
+Provides static typing for all task outputs, enabling type checking
+and better IDE support.
+"""
+
+from typing import Any
+
+from pydantic import BaseModel
+
+# ============================================================================
+# Track Processing Results (track_processing.py)
+# ============================================================================
+
+
+class PadTrackResult(BaseModel):
+    """Result from pad_track task."""
+
+    padded_url: str
+    size: int
+    track_index: int
+
+
+class TranscribeTrackResult(BaseModel):
+    """Result from transcribe_track task."""
+
+    words: list[dict[str, Any]]
+    track_index: int
+
+
+# ============================================================================
+# Diarization Pipeline Results (diarization_pipeline.py)
+# ============================================================================
+
+
+class RecordingResult(BaseModel):
+    """Result from get_recording task."""
+
+    id: str | None
+    mtg_session_id: str | None
+    room_name: str | None
+    duration: float
+
+
+class ParticipantsResult(BaseModel):
+    """Result from get_participants task."""
+
+    participants: list[dict[str, Any]]
+    num_tracks: int
+    source_language: str
+    target_language: str
+
+
+class ProcessTracksResult(BaseModel):
+    """Result from process_tracks task."""
+
+    all_words: list[dict[str, Any]]
+    padded_urls: list[str | None]
+    word_count: int
+    num_tracks: int
+    target_language: str
+    created_padded_files: list[str]
+
+
+class MixdownResult(BaseModel):
+    """Result from mixdown_tracks task."""
+
+    audio_key: str
+    duration: float
+    tracks_mixed: int
+
+
+class WaveformResult(BaseModel):
+    """Result from generate_waveform task."""
+
+    waveform_generated: bool
+
+
+class TopicsResult(BaseModel):
+    """Result from detect_topics task."""
+
+    topics: list[dict[str, Any]]
+
+
+class TitleResult(BaseModel):
+    """Result from generate_title task."""
+
+    title: str | None
+
+
+class SummaryResult(BaseModel):
+    """Result from generate_summary task."""
+
+    summary: str | None
+    short_summary: str | None
+
+
+class FinalizeResult(BaseModel):
+    """Result from finalize task."""
+
+    status: str
+
+
+class ConsentResult(BaseModel):
+    """Result from cleanup_consent task."""
+
+    consent_checked: bool
+
+
+class ZulipResult(BaseModel):
+    """Result from post_zulip task."""
+
+    zulip_message_id: int | None = None
+    skipped: bool = False
+
+
+class WebhookResult(BaseModel):
+    """Result from send_webhook task."""
+
+    webhook_sent: bool
+    skipped: bool = False
+    response_code: int | None = None
--- a/server/reflector/hatchet/workflows/track_processing.py
+++ b/server/reflector/hatchet/workflows/track_processing.py
@@ -18,8 +18,17 @@ from pydantic import BaseModel

 from reflector.hatchet.client import HatchetClientManager
 from reflector.hatchet.progress import emit_progress_async
+from reflector.hatchet.workflows.models import PadTrackResult, TranscribeTrackResult
 from reflector.logger import logger

+
+def _to_dict(output) -> dict:
+    """Convert task output to dict, handling both dict and Pydantic model returns."""
+    if isinstance(output, dict):
+        return output
+    return output.model_dump()
+
+
 # Audio constants matching existing pipeline
 OPUS_STANDARD_SAMPLE_RATE = 48000
 OPUS_DEFAULT_BIT_RATE = 64000
@@ -161,7 +170,7 @@ def _apply_audio_padding_to_file(


@track_workflow.task(execution_timeout=timedelta(seconds=300), retries=3)
-async def pad_track(input: TrackInput, ctx: Context) -> dict:
+async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
    """Pad single audio track with silence for alignment.

    Extracts stream.start_time from WebM container metadata and applies
@@ -213,11 +222,11 @@ async def pad_track(input: TrackInput, ctx: Context) -> dict:
                await emit_progress_async(
                    input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
                )
-                return {
-                    "padded_url": source_url,
-                    "size": 0,
-                    "track_index": input.track_index,
-                }
+                return PadTrackResult(
+                    padded_url=source_url,
+                    size=0,
+                    track_index=input.track_index,
+                )

            # Create temp file for padded output
            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
@@ -265,11 +274,11 @@ async def pad_track(input: TrackInput, ctx: Context) -> dict:
            input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
        )

-        return {
-            "padded_url": padded_url,
-            "size": file_size,
-            "track_index": input.track_index,
-        }
+        return PadTrackResult(
+            padded_url=padded_url,
+            size=file_size,
+            track_index=input.track_index,
+        )

    except Exception as e:
        logger.error("[Hatchet] pad_track failed", error=str(e), exc_info=True)
@@ -282,7 +291,7 @@ async def pad_track(input: TrackInput, ctx: Context) -> dict:
@track_workflow.task(
    parents=[pad_track], execution_timeout=timedelta(seconds=600), retries=3
 )
-async def transcribe_track(input: TrackInput, ctx: Context) -> dict:
+async def transcribe_track(input: TrackInput, ctx: Context) -> TranscribeTrackResult:
    """Transcribe audio track using GPU (Modal.com) or local Whisper."""
    logger.info(
        "[Hatchet] transcribe_track",
@@ -295,7 +304,7 @@ async def transcribe_track(input: TrackInput, ctx: Context) -> dict:
    )

    try:
-        pad_result = ctx.task_output(pad_track)
+        pad_result = _to_dict(ctx.task_output(pad_track))
        audio_url = pad_result.get("padded_url")

        if not audio_url:
@@ -324,10 +333,10 @@ async def transcribe_track(input: TrackInput, ctx: Context) -> dict:
            input.transcript_id, "transcribe_track", "completed", ctx.workflow_run_id
        )

-        return {
-            "words": words,
-            "track_index": input.track_index,
-        }
+        return TranscribeTrackResult(
+            words=words,
+            track_index=input.track_index,
+        )

    except Exception as e:
        logger.error("[Hatchet] transcribe_track failed", error=str(e), exc_info=True)
--- a/server/reflector/services/transcript_process.py
+++ b/server/reflector/services/transcript_process.py
@@ -224,6 +224,26 @@ def dispatch_transcript_processing(
                            transcript, {"workflow_run_id": None}
                        )

+                    # Re-fetch transcript to check for concurrent dispatch (TOCTOU protection)
+                    transcript = await transcripts_controller.get_by_id(
+                        config.transcript_id
+                    )
+                    if transcript and transcript.workflow_run_id:
+                        # Another process started a workflow between validation and now
+                        try:
+                            status = await HatchetClientManager.get_workflow_run_status(
+                                transcript.workflow_run_id
+                            )
+                            if "RUNNING" in status or "QUEUED" in status:
+                                logger.info(
+                                    "Concurrent workflow detected, skipping dispatch",
+                                    workflow_id=transcript.workflow_run_id,
+                                )
+                                return transcript.workflow_run_id
+                        except Exception:
+                            # If we can't get status, proceed with new workflow
+                            pass
+
                    workflow_id = await HatchetClientManager.start_workflow(
                        workflow_name="DiarizationPipeline",
                        input_data={
@@ -234,6 +254,11 @@ def dispatch_transcript_processing(
                            "transcript_id": config.transcript_id,
                            "room_id": config.room_id,
                        },
+                        additional_metadata={
+                            "transcript_id": config.transcript_id,
+                            "recording_id": config.recording_id,
+                            "daily_recording_id": config.recording_id,
+                        },
                    )

                    if transcript:
--- a/server/reflector/worker/process.py
+++ b/server/reflector/worker/process.py
@@ -302,6 +302,11 @@ async def _process_multitrack_recording_inner(
                "transcript_id": transcript.id,
                "room_id": room.id,
            },
+            additional_metadata={
+                "transcript_id": transcript.id,
+                "recording_id": recording_id,
+                "daily_recording_id": recording_id,
+            },
        )
        logger.info(
            "Started Hatchet workflow",
--- a/server/tests/conftest.py
+++ b/server/tests/conftest.py
@@ -527,6 +527,22 @@ def fake_mp3_upload():
        yield


+@pytest.fixture(autouse=True)
+def reset_hatchet_client():
+    """Reset HatchetClientManager singleton before and after each test.
+
+    This ensures test isolation - each test starts with a fresh client state.
+    The fixture is autouse=True so it applies to all tests automatically.
+    """
+    from reflector.hatchet.client import HatchetClientManager
+
+    # Reset before test
+    HatchetClientManager.reset()
+    yield
+    # Reset after test to clean up
+    HatchetClientManager.reset()
+
+
@pytest.fixture
 async def fake_transcript_with_topics(tmpdir, client):
    import shutil
--- a/server/tests/test_hatchet_client.py
+++ b/server/tests/test_hatchet_client.py
@@ -2,6 +2,9 @@
 Tests for HatchetClientManager error handling and validation.

 Only tests that catch real bugs - not mock verification tests.
+
+Note: The `reset_hatchet_client` fixture (autouse=True in conftest.py)
+automatically resets the singleton before and after each test.
 """

 from unittest.mock import AsyncMock, MagicMock, patch
@@ -18,8 +21,6 @@ async def test_hatchet_client_can_replay_handles_exception():
    """
    from reflector.hatchet.client import HatchetClientManager

-    HatchetClientManager._instance = None
-
    with patch("reflector.hatchet.client.settings") as mock_settings:
        mock_settings.HATCHET_CLIENT_TOKEN = "test-token"
        mock_settings.HATCHET_DEBUG = False
@@ -37,8 +38,6 @@ async def test_hatchet_client_can_replay_handles_exception():
            # Should return False on error (workflow might be gone)
            assert can_replay is False

-    HatchetClientManager._instance = None
-

 def test_hatchet_client_raises_without_token():
    """Test that get_client raises ValueError without token.
@@ -48,12 +47,8 @@ def test_hatchet_client_raises_without_token():
    """
    from reflector.hatchet.client import HatchetClientManager

-    HatchetClientManager._instance = None
-
    with patch("reflector.hatchet.client.settings") as mock_settings:
        mock_settings.HATCHET_CLIENT_TOKEN = None

        with pytest.raises(ValueError, match="HATCHET_CLIENT_TOKEN must be set"):
            HatchetClientManager.get_client()
-
-    HatchetClientManager._instance = None