feat: durable (#794)

* durable (no-mistakes)

* hatchet no-mistake

* hatchet no-mistake

* hatchet no-mistake, better logging

* remove conductor and add hatchet tests (no-mistakes)

* self-review (no-mistakes)

* hatched logs

* remove shadow mode for hatchet

* and add hatchet processor setting to room

* .

* cleanup

* hatchet init db

* self-review (no-mistakes)

* self-review (no-mistakes)

* hatchet: restore zullip report

* self-review round

* self-review round

* self-review round

* dry hatchet with celery

* dry hatched with celery - 2

* self-review round

* more NES instead of str

* self-review wip

* self-review round

* self-review round

* self-review round

* can_replay cancelled

* add forgotten file

* pr autoreviewer fixes

* better log webhook events

* durable_started return

* migration sync

* latest changes feature parity

* migration merge

* pr review

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
2025-12-22 12:09:20 -05:00
committed by GitHub
parent f580b996ee
commit 1dac999b56
36 changed files with 4908 additions and 2009 deletions

View File

@@ -0,0 +1,15 @@
"""
Shared audio processing constants.
Used by both Hatchet workflows and Celery pipelines for consistent audio encoding.
"""
# Opus codec settings
OPUS_STANDARD_SAMPLE_RATE = 48000
OPUS_DEFAULT_BIT_RATE = 128000 # 128kbps for good speech quality
# S3 presigned URL expiration
PRESIGNED_URL_EXPIRATION_SECONDS = 7200 # 2 hours
# Waveform visualization
WAVEFORM_SEGMENTS = 255

View File

@@ -0,0 +1,227 @@
"""
Audio track mixdown utilities.
Shared PyAV-based functions for mixing multiple audio tracks into a single output.
Used by both Hatchet workflows and Celery pipelines.
"""
from fractions import Fraction
import av
from av.audio.resampler import AudioResampler
def detect_sample_rate_from_tracks(track_urls: list[str], logger=None) -> int | None:
"""Detect sample rate from first decodable audio frame.
Args:
track_urls: List of URLs to audio files (S3 presigned or local)
logger: Optional logger instance
Returns:
Sample rate in Hz, or None if no decodable frames found
"""
for url in track_urls:
if not url:
continue
container = None
try:
container = av.open(url)
for frame in container.decode(audio=0):
return frame.sample_rate
except Exception:
continue
finally:
if container is not None:
container.close()
return None
async def mixdown_tracks_pyav(
track_urls: list[str],
writer,
target_sample_rate: int,
offsets_seconds: list[float] | None = None,
logger=None,
) -> None:
"""Multi-track mixdown using PyAV filter graph (amix).
Builds a filter graph: N abuffer -> optional adelay -> amix -> aformat -> sink
Reads from S3 presigned URLs or local files, pushes mixed frames to writer.
Args:
track_urls: List of URLs to audio tracks (S3 presigned or local)
writer: AudioFileWriterProcessor instance with async push() method
target_sample_rate: Sample rate for output (Hz)
offsets_seconds: Optional per-track delays in seconds for alignment.
If provided, must have same length as track_urls. Delays are relative
to the minimum offset (earliest track has delay=0).
logger: Optional logger instance
Raises:
ValueError: If offsets_seconds length doesn't match track_urls,
no valid tracks provided, or no containers can be opened
"""
if offsets_seconds is not None and len(offsets_seconds) != len(track_urls):
raise ValueError(
f"offsets_seconds length ({len(offsets_seconds)}) must match track_urls ({len(track_urls)})"
)
valid_track_urls = [url for url in track_urls if url]
if not valid_track_urls:
if logger:
logger.error("Mixdown failed - no valid track URLs provided")
raise ValueError("Mixdown failed: No valid track URLs")
# Calculate per-input delays if offsets provided
input_offsets_seconds = None
if offsets_seconds is not None:
input_offsets_seconds = [
offsets_seconds[i] for i, url in enumerate(track_urls) if url
]
# Build PyAV filter graph:
# N abuffer (s32/stereo)
# -> optional adelay per input (for alignment)
# -> amix (s32)
# -> aformat(s16)
# -> sink
graph = av.filter.Graph()
inputs = []
for idx, url in enumerate(valid_track_urls):
args = (
f"time_base=1/{target_sample_rate}:"
f"sample_rate={target_sample_rate}:"
f"sample_fmt=s32:"
f"channel_layout=stereo"
)
in_ctx = graph.add("abuffer", args=args, name=f"in{idx}")
inputs.append(in_ctx)
if not inputs:
if logger:
logger.error("Mixdown failed - no valid inputs for graph")
raise ValueError("Mixdown failed: No valid inputs for filter graph")
mixer = graph.add("amix", args=f"inputs={len(inputs)}:normalize=0", name="mix")
fmt = graph.add(
"aformat",
args=f"sample_fmts=s32:channel_layouts=stereo:sample_rates={target_sample_rate}",
name="fmt",
)
sink = graph.add("abuffersink", name="out")
# Optional per-input delay before mixing
delays_ms: list[int] = []
if input_offsets_seconds is not None:
base = min(input_offsets_seconds) if input_offsets_seconds else 0.0
delays_ms = [
max(0, int(round((o - base) * 1000))) for o in input_offsets_seconds
]
else:
delays_ms = [0 for _ in inputs]
for idx, in_ctx in enumerate(inputs):
delay_ms = delays_ms[idx] if idx < len(delays_ms) else 0
if delay_ms > 0:
# adelay requires one value per channel; use same for stereo
adelay = graph.add(
"adelay",
args=f"delays={delay_ms}|{delay_ms}:all=1",
name=f"delay{idx}",
)
in_ctx.link_to(adelay)
adelay.link_to(mixer, 0, idx)
else:
in_ctx.link_to(mixer, 0, idx)
mixer.link_to(fmt)
fmt.link_to(sink)
graph.configure()
containers = []
try:
# Open all containers with cleanup guaranteed
for i, url in enumerate(valid_track_urls):
try:
c = av.open(
url,
options={
# S3 streaming options
"reconnect": "1",
"reconnect_streamed": "1",
"reconnect_delay_max": "5",
},
)
containers.append(c)
except Exception as e:
if logger:
logger.warning(
"Mixdown: failed to open container from URL",
input=i,
url=url,
error=str(e),
)
if not containers:
if logger:
logger.error("Mixdown failed - no valid containers opened")
raise ValueError("Mixdown failed: Could not open any track containers")
decoders = [c.decode(audio=0) for c in containers]
active = [True] * len(decoders)
resamplers = [
AudioResampler(format="s32", layout="stereo", rate=target_sample_rate)
for _ in decoders
]
while any(active):
for i, (dec, is_active) in enumerate(zip(decoders, active)):
if not is_active:
continue
try:
frame = next(dec)
except StopIteration:
active[i] = False
# Signal end of stream to filter graph
inputs[i].push(None)
continue
if frame.sample_rate != target_sample_rate:
continue
out_frames = resamplers[i].resample(frame) or []
for rf in out_frames:
rf.sample_rate = target_sample_rate
rf.time_base = Fraction(1, target_sample_rate)
inputs[i].push(rf)
while True:
try:
mixed = sink.pull()
except Exception:
break
mixed.sample_rate = target_sample_rate
mixed.time_base = Fraction(1, target_sample_rate)
await writer.push(mixed)
# Flush remaining frames from filter graph
while True:
try:
mixed = sink.pull()
except Exception:
break
mixed.sample_rate = target_sample_rate
mixed.time_base = Fraction(1, target_sample_rate)
await writer.push(mixed)
finally:
# Cleanup all containers, even if processing failed
for c in containers:
if c is not None:
try:
c.close()
except Exception:
pass # Best effort cleanup

View File

@@ -0,0 +1,186 @@
"""
Audio track padding utilities.
Shared PyAV-based functions for extracting stream metadata and applying
silence padding to audio tracks. Used by both Hatchet workflows and Celery pipelines.
"""
import math
from fractions import Fraction
import av
from av.audio.resampler import AudioResampler
from reflector.utils.audio_constants import (
OPUS_DEFAULT_BIT_RATE,
OPUS_STANDARD_SAMPLE_RATE,
)
def extract_stream_start_time_from_container(
container,
track_idx: int,
logger=None,
) -> float:
"""Extract meeting-relative start time from WebM stream metadata.
Uses PyAV to read stream.start_time from WebM container.
More accurate than filename timestamps by ~209ms due to network/encoding delays.
Args:
container: PyAV container opened from audio file/URL
track_idx: Track index for logging context
logger: Optional logger instance (structlog or stdlib compatible)
Returns:
Start time in seconds (0.0 if extraction fails)
"""
start_time_seconds = 0.0
try:
audio_streams = [s for s in container.streams if s.type == "audio"]
stream = audio_streams[0] if audio_streams else container.streams[0]
# 1) Try stream-level start_time (most reliable for Daily.co tracks)
if stream.start_time is not None and stream.time_base is not None:
start_time_seconds = float(stream.start_time * stream.time_base)
# 2) Fallback to container-level start_time (in av.time_base units)
if (start_time_seconds <= 0) and (container.start_time is not None):
start_time_seconds = float(container.start_time * av.time_base)
# 3) Fallback to first packet DTS in stream.time_base
if start_time_seconds <= 0:
for packet in container.demux(stream):
if packet.dts is not None:
start_time_seconds = float(packet.dts * stream.time_base)
break
except Exception as e:
if logger:
logger.warning(
"PyAV metadata read failed; assuming 0 start_time",
track_idx=track_idx,
error=str(e),
)
start_time_seconds = 0.0
if logger:
logger.info(
f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s",
track_idx=track_idx,
)
return start_time_seconds
def apply_audio_padding_to_file(
in_container,
output_path: str,
start_time_seconds: float,
track_idx: int,
logger=None,
) -> None:
"""Apply silence padding to audio track using PyAV filter graph.
Uses adelay filter to prepend silence, aligning track to meeting start time.
Output is WebM/Opus format.
Args:
in_container: PyAV container opened from source audio
output_path: Path for output WebM file
start_time_seconds: Amount of silence to prepend (in seconds)
track_idx: Track index for logging context
logger: Optional logger instance (structlog or stdlib compatible)
Raises:
Exception: If no audio stream found or PyAV processing fails
"""
delay_ms = math.floor(start_time_seconds * 1000)
if logger:
logger.info(
f"Padding track {track_idx} with {delay_ms}ms delay using PyAV",
track_idx=track_idx,
delay_ms=delay_ms,
)
try:
with av.open(output_path, "w", format="webm") as out_container:
in_stream = next(
(s for s in in_container.streams if s.type == "audio"), None
)
if in_stream is None:
raise Exception("No audio stream in input")
out_stream = out_container.add_stream(
"libopus", rate=OPUS_STANDARD_SAMPLE_RATE
)
out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
graph = av.filter.Graph()
abuf_args = (
f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
f"sample_fmt=s16:"
f"channel_layout=stereo"
)
src = graph.add("abuffer", args=abuf_args, name="src")
aresample_f = graph.add("aresample", args="async=1", name="ares")
# adelay requires one delay value per channel separated by '|'
delays_arg = f"{delay_ms}|{delay_ms}"
adelay_f = graph.add(
"adelay", args=f"delays={delays_arg}:all=1", name="delay"
)
sink = graph.add("abuffersink", name="sink")
src.link_to(aresample_f)
aresample_f.link_to(adelay_f)
adelay_f.link_to(sink)
graph.configure()
resampler = AudioResampler(
format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
)
# Decode -> resample -> push through graph -> encode Opus
for frame in in_container.decode(in_stream):
out_frames = resampler.resample(frame) or []
for rframe in out_frames:
rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
src.push(rframe)
while True:
try:
f_out = sink.pull()
except Exception:
break
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
for packet in out_stream.encode(f_out):
out_container.mux(packet)
# Flush remaining frames from filter graph
src.push(None)
while True:
try:
f_out = sink.pull()
except Exception:
break
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
for packet in out_stream.encode(f_out):
out_container.mux(packet)
# Flush encoder
for packet in out_stream.encode(None):
out_container.mux(packet)
except Exception as e:
if logger:
logger.error(
"PyAV padding failed for track",
track_idx=track_idx,
delay_ms=delay_ms,
error=str(e),
exc_info=True,
)
raise

View File

@@ -0,0 +1,4 @@
def assert_not_none[T](value: T | None, message: str = "Value is None") -> T:
if value is None:
raise ValueError(message)
return value

View File

@@ -2,6 +2,17 @@ from typing import Annotated, TypeVar
from pydantic import Field, TypeAdapter, constr
T_NotNone = TypeVar("T_NotNone")
def assert_not_none(
value: T_NotNone | None, message: str = "Value is None"
) -> T_NotNone:
if value is None:
raise ValueError(message)
return value
NonEmptyStringBase = constr(min_length=1, strip_whitespace=False)
NonEmptyString = Annotated[
NonEmptyStringBase,
@@ -23,10 +34,18 @@ def try_parse_non_empty_string(s: str) -> NonEmptyString | None:
return parse_non_empty_string(s)
T = TypeVar("T", bound=str)
T_Str = TypeVar("T_Str", bound=str)
def assert_equal[T](s1: T, s2: T) -> T:
def assert_equal(s1: T_Str, s2: T_Str) -> T_Str:
if s1 != s2:
raise ValueError(f"assert_equal: {s1} != {s2}")
return s1
def assert_non_none_and_non_empty(
value: str | None, error: str | None = None
) -> NonEmptyString:
return parse_non_empty_string(
assert_not_none(value, error or "Value is None"), error
)