dry hatched with celery - 2

2025-12-20 20:29:06 +00:00 · 2025-12-17 15:11:33 -05:00
parent d683a83906
commit f7f2957fc9
6 changed files with 426 additions and 6 deletions
--- a/server/reflector/hatchet/utils.py
+++ b/server/reflector/hatchet/utils.py
@@ -0,0 +1,17 @@
+"""
+Hatchet workflow utilities.
+
+Shared helpers for Hatchet task implementations.
+"""
+
+
+def to_dict(output) -> dict:
+    """Convert task output to dict, handling both dict and Pydantic model returns.
+
+    Hatchet SDK can return task outputs as either raw dicts or Pydantic models
+    depending on serialization context. This normalizes the output for consistent
+    downstream processing.
+    """
+    if isinstance(output, dict):
+        return output
+    return output.model_dump()
--- a/server/reflector/processors/summary/summary_builder.py
+++ b/server/reflector/processors/summary/summary_builder.py
@@ -166,7 +166,6 @@ class SummaryBuilder:
        self.model_name: str = llm.model_name
        self.logger = logger or structlog.get_logger()
        self.participant_instructions: str | None = None
-        self._logged_participant_instructions: bool = False
        if filename:
            self.read_transcript_from_file(filename)

@@ -209,9 +208,7 @@ class SummaryBuilder:
    def _enhance_prompt_with_participants(self, prompt: str) -> str:
        """Add participant instructions to any prompt if participants are known."""
        if self.participant_instructions:
-            if not self._logged_participant_instructions:
-                self.logger.debug("Adding participant instructions to prompts")
-                self._logged_participant_instructions = True
+            self.logger.debug("Adding participant instructions to prompts")
            return f"{prompt}\n\n{self.participant_instructions}"
        return prompt

--- a/server/reflector/utils/audio_mixdown.py
+++ b/server/reflector/utils/audio_mixdown.py
@@ -0,0 +1,221 @@
+"""
+Audio track mixdown utilities.
+
+Shared PyAV-based functions for mixing multiple audio tracks into a single output.
+Used by both Hatchet workflows and Celery pipelines.
+"""
+
+from fractions import Fraction
+
+import av
+from av.audio.resampler import AudioResampler
+
+
+def detect_sample_rate_from_tracks(track_urls: list[str], logger=None) -> int | None:
+    """Detect sample rate from first decodable audio frame.
+
+    Args:
+        track_urls: List of URLs to audio files (S3 presigned or local)
+        logger: Optional logger instance
+
+    Returns:
+        Sample rate in Hz, or None if no decodable frames found
+    """
+    for url in track_urls:
+        if not url:
+            continue
+        container = None
+        try:
+            container = av.open(url)
+            for frame in container.decode(audio=0):
+                return frame.sample_rate
+        except Exception:
+            continue
+        finally:
+            if container is not None:
+                container.close()
+    return None
+
+
+async def mixdown_tracks_pyav(
+    track_urls: list[str],
+    writer,
+    target_sample_rate: int,
+    offsets_seconds: list[float] | None = None,
+    logger=None,
+) -> None:
+    """Multi-track mixdown using PyAV filter graph (amix).
+
+    Builds a filter graph: N abuffer -> optional adelay -> amix -> aformat -> sink
+    Reads from S3 presigned URLs or local files, pushes mixed frames to writer.
+
+    Args:
+        track_urls: List of URLs to audio tracks (S3 presigned or local)
+        writer: AudioFileWriterProcessor instance with async push() method
+        target_sample_rate: Sample rate for output (Hz)
+        offsets_seconds: Optional per-track delays in seconds for alignment.
+            If provided, must have same length as track_urls. Delays are relative
+            to the minimum offset (earliest track has delay=0).
+        logger: Optional logger instance
+
+    Raises:
+        ValueError: If no valid tracks or containers can be opened
+    """
+    valid_track_urls = [url for url in track_urls if url]
+    if not valid_track_urls:
+        if logger:
+            logger.error("Mixdown failed - no valid track URLs provided")
+        raise ValueError("Mixdown failed: No valid track URLs")
+
+    # Calculate per-input delays if offsets provided
+    input_offsets_seconds = None
+    if offsets_seconds is not None:
+        input_offsets_seconds = [
+            offsets_seconds[i] for i, url in enumerate(track_urls) if url
+        ]
+
+    # Build PyAV filter graph:
+    # N abuffer (s32/stereo)
+    #   -> optional adelay per input (for alignment)
+    #   -> amix (s32)
+    #   -> aformat(s16)
+    #   -> sink
+    graph = av.filter.Graph()
+    inputs = []
+
+    for idx, url in enumerate(valid_track_urls):
+        args = (
+            f"time_base=1/{target_sample_rate}:"
+            f"sample_rate={target_sample_rate}:"
+            f"sample_fmt=s32:"
+            f"channel_layout=stereo"
+        )
+        in_ctx = graph.add("abuffer", args=args, name=f"in{idx}")
+        inputs.append(in_ctx)
+
+    if not inputs:
+        if logger:
+            logger.error("Mixdown failed - no valid inputs for graph")
+        raise ValueError("Mixdown failed: No valid inputs for filter graph")
+
+    mixer = graph.add("amix", args=f"inputs={len(inputs)}:normalize=0", name="mix")
+
+    fmt = graph.add(
+        "aformat",
+        args=f"sample_fmts=s32:channel_layouts=stereo:sample_rates={target_sample_rate}",
+        name="fmt",
+    )
+
+    sink = graph.add("abuffersink", name="out")
+
+    # Optional per-input delay before mixing
+    delays_ms: list[int] = []
+    if input_offsets_seconds is not None:
+        base = min(input_offsets_seconds) if input_offsets_seconds else 0.0
+        delays_ms = [
+            max(0, int(round((o - base) * 1000))) for o in input_offsets_seconds
+        ]
+    else:
+        delays_ms = [0 for _ in inputs]
+
+    for idx, in_ctx in enumerate(inputs):
+        delay_ms = delays_ms[idx] if idx < len(delays_ms) else 0
+        if delay_ms > 0:
+            # adelay requires one value per channel; use same for stereo
+            adelay = graph.add(
+                "adelay",
+                args=f"delays={delay_ms}|{delay_ms}:all=1",
+                name=f"delay{idx}",
+            )
+            in_ctx.link_to(adelay)
+            adelay.link_to(mixer, 0, idx)
+        else:
+            in_ctx.link_to(mixer, 0, idx)
+
+    mixer.link_to(fmt)
+    fmt.link_to(sink)
+    graph.configure()
+
+    containers = []
+    try:
+        # Open all containers with cleanup guaranteed
+        for i, url in enumerate(valid_track_urls):
+            try:
+                c = av.open(
+                    url,
+                    options={
+                        # S3 streaming options
+                        "reconnect": "1",
+                        "reconnect_streamed": "1",
+                        "reconnect_delay_max": "5",
+                    },
+                )
+                containers.append(c)
+            except Exception as e:
+                if logger:
+                    logger.warning(
+                        "Mixdown: failed to open container from URL",
+                        input=i,
+                        url=url,
+                        error=str(e),
+                    )
+
+        if not containers:
+            if logger:
+                logger.error("Mixdown failed - no valid containers opened")
+            raise ValueError("Mixdown failed: Could not open any track containers")
+
+        decoders = [c.decode(audio=0) for c in containers]
+        active = [True] * len(decoders)
+        resamplers = [
+            AudioResampler(format="s32", layout="stereo", rate=target_sample_rate)
+            for _ in decoders
+        ]
+
+        while any(active):
+            for i, (dec, is_active) in enumerate(zip(decoders, active)):
+                if not is_active:
+                    continue
+                try:
+                    frame = next(dec)
+                except StopIteration:
+                    active[i] = False
+                    # Signal end of stream to filter graph
+                    inputs[i].push(None)
+                    continue
+
+                if frame.sample_rate != target_sample_rate:
+                    continue
+                out_frames = resamplers[i].resample(frame) or []
+                for rf in out_frames:
+                    rf.sample_rate = target_sample_rate
+                    rf.time_base = Fraction(1, target_sample_rate)
+                    inputs[i].push(rf)
+
+                while True:
+                    try:
+                        mixed = sink.pull()
+                    except Exception:
+                        break
+                    mixed.sample_rate = target_sample_rate
+                    mixed.time_base = Fraction(1, target_sample_rate)
+                    await writer.push(mixed)
+
+        # Flush remaining frames from filter graph
+        while True:
+            try:
+                mixed = sink.pull()
+            except Exception:
+                break
+            mixed.sample_rate = target_sample_rate
+            mixed.time_base = Fraction(1, target_sample_rate)
+            await writer.push(mixed)
+
+    finally:
+        # Cleanup all containers, even if processing failed
+        for c in containers:
+            if c is not None:
+                try:
+                    c.close()
+                except Exception:
+                    pass  # Best effort cleanup
--- a/server/reflector/utils/audio_padding.py
+++ b/server/reflector/utils/audio_padding.py
@@ -0,0 +1,186 @@
+"""
+Audio track padding utilities.
+
+Shared PyAV-based functions for extracting stream metadata and applying
+silence padding to audio tracks. Used by both Hatchet workflows and Celery pipelines.
+"""
+
+import math
+from fractions import Fraction
+
+import av
+from av.audio.resampler import AudioResampler
+
+from reflector.utils.audio_constants import (
+    OPUS_DEFAULT_BIT_RATE,
+    OPUS_STANDARD_SAMPLE_RATE,
+)
+
+
+def extract_stream_start_time_from_container(
+    container,
+    track_idx: int,
+    logger=None,
+) -> float:
+    """Extract meeting-relative start time from WebM stream metadata.
+
+    Uses PyAV to read stream.start_time from WebM container.
+    More accurate than filename timestamps by ~209ms due to network/encoding delays.
+
+    Args:
+        container: PyAV container opened from audio file/URL
+        track_idx: Track index for logging context
+        logger: Optional logger instance (structlog or stdlib compatible)
+
+    Returns:
+        Start time in seconds (0.0 if extraction fails)
+    """
+    start_time_seconds = 0.0
+    try:
+        audio_streams = [s for s in container.streams if s.type == "audio"]
+        stream = audio_streams[0] if audio_streams else container.streams[0]
+
+        # 1) Try stream-level start_time (most reliable for Daily.co tracks)
+        if stream.start_time is not None and stream.time_base is not None:
+            start_time_seconds = float(stream.start_time * stream.time_base)
+
+        # 2) Fallback to container-level start_time (in av.time_base units)
+        if (start_time_seconds <= 0) and (container.start_time is not None):
+            start_time_seconds = float(container.start_time * av.time_base)
+
+        # 3) Fallback to first packet DTS in stream.time_base
+        if start_time_seconds <= 0:
+            for packet in container.demux(stream):
+                if packet.dts is not None:
+                    start_time_seconds = float(packet.dts * stream.time_base)
+                    break
+    except Exception as e:
+        if logger:
+            logger.warning(
+                "PyAV metadata read failed; assuming 0 start_time",
+                track_idx=track_idx,
+                error=str(e),
+            )
+        start_time_seconds = 0.0
+
+    if logger:
+        logger.info(
+            f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s",
+            track_idx=track_idx,
+        )
+    return start_time_seconds
+
+
+def apply_audio_padding_to_file(
+    in_container,
+    output_path: str,
+    start_time_seconds: float,
+    track_idx: int,
+    logger=None,
+) -> None:
+    """Apply silence padding to audio track using PyAV filter graph.
+
+    Uses adelay filter to prepend silence, aligning track to meeting start time.
+    Output is WebM/Opus format.
+
+    Args:
+        in_container: PyAV container opened from source audio
+        output_path: Path for output WebM file
+        start_time_seconds: Amount of silence to prepend (in seconds)
+        track_idx: Track index for logging context
+        logger: Optional logger instance (structlog or stdlib compatible)
+
+    Raises:
+        Exception: If no audio stream found or PyAV processing fails
+    """
+    delay_ms = math.floor(start_time_seconds * 1000)
+
+    if logger:
+        logger.info(
+            f"Padding track {track_idx} with {delay_ms}ms delay using PyAV",
+            track_idx=track_idx,
+            delay_ms=delay_ms,
+        )
+
+    try:
+        with av.open(output_path, "w", format="webm") as out_container:
+            in_stream = next(
+                (s for s in in_container.streams if s.type == "audio"), None
+            )
+            if in_stream is None:
+                raise Exception("No audio stream in input")
+
+            out_stream = out_container.add_stream(
+                "libopus", rate=OPUS_STANDARD_SAMPLE_RATE
+            )
+            out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
+            graph = av.filter.Graph()
+
+            abuf_args = (
+                f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
+                f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
+                f"sample_fmt=s16:"
+                f"channel_layout=stereo"
+            )
+            src = graph.add("abuffer", args=abuf_args, name="src")
+            aresample_f = graph.add("aresample", args="async=1", name="ares")
+            # adelay requires one delay value per channel separated by '|'
+            delays_arg = f"{delay_ms}|{delay_ms}"
+            adelay_f = graph.add(
+                "adelay", args=f"delays={delays_arg}:all=1", name="delay"
+            )
+            sink = graph.add("abuffersink", name="sink")
+
+            src.link_to(aresample_f)
+            aresample_f.link_to(adelay_f)
+            adelay_f.link_to(sink)
+            graph.configure()
+
+            resampler = AudioResampler(
+                format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
+            )
+
+            # Decode -> resample -> push through graph -> encode Opus
+            for frame in in_container.decode(in_stream):
+                out_frames = resampler.resample(frame) or []
+                for rframe in out_frames:
+                    rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
+                    rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
+                    src.push(rframe)
+
+                    while True:
+                        try:
+                            f_out = sink.pull()
+                        except Exception:
+                            break
+                        f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
+                        f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
+                        for packet in out_stream.encode(f_out):
+                            out_container.mux(packet)
+
+            # Flush remaining frames from filter graph
+            src.push(None)
+            while True:
+                try:
+                    f_out = sink.pull()
+                except Exception:
+                    break
+                f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
+                f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
+                for packet in out_stream.encode(f_out):
+                    out_container.mux(packet)
+
+            # Flush encoder
+            for packet in out_stream.encode(None):
+                out_container.mux(packet)
+
+    except Exception as e:
+        if logger:
+            logger.error(
+                "PyAV padding failed for track",
+                track_idx=track_idx,
+                delay_ms=delay_ms,
+                error=str(e),
+                exc_info=True,
+            )
+        raise
--- a/server/reflector/worker/process.py
+++ b/server/reflector/worker/process.py
@@ -320,13 +320,11 @@ async def _process_multitrack_recording_inner(
            transcript_id=transcript.id,
        )

-        # Store workflow_run_id on transcript for replay/resume
        await transcripts_controller.update(
            transcript, {"workflow_run_id": workflow_id}
        )
        durable_started = True

-    # If durable workflow started, skip Celery
    if durable_started:
        return

--- a/server/reflector/zulip.py
+++ b/server/reflector/zulip.py
@@ -119,6 +119,7 @@ async def post_transcript_notification(transcript: Transcript) -> int | None:

    Uses transcript.room_id directly (Hatchet flow).
    Celery's pipeline_post_to_zulip uses recording→meeting→room path instead.
+    DUPLICATION NOTE: This function will stay when we use Celery no more, and Celery one will be removed.
    """
    if not transcript.room_id:
        return None