mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-19 11:46:55 +00:00
feat: migrate file and live post-processing pipelines from Celery to Hatchet workflow engine (#911)
* feat: migrate file and live post-processing pipelines from Celery to Hatchet workflow engine * fix: always force reprocessing * fix: ci tests with live pipelines * fix: ci tests with live pipelines
This commit is contained in:
committed by
GitHub
parent
72dca7cacc
commit
37a1f01850
@@ -26,6 +26,21 @@ class TaskName(StrEnum):
|
||||
DETECT_CHUNK_TOPIC = "detect_chunk_topic"
|
||||
GENERATE_DETAILED_SUMMARY = "generate_detailed_summary"
|
||||
|
||||
# File pipeline tasks
|
||||
EXTRACT_AUDIO = "extract_audio"
|
||||
UPLOAD_AUDIO = "upload_audio"
|
||||
TRANSCRIBE = "transcribe"
|
||||
DIARIZE = "diarize"
|
||||
ASSEMBLE_TRANSCRIPT = "assemble_transcript"
|
||||
GENERATE_SUMMARIES = "generate_summaries"
|
||||
|
||||
# Live post-processing pipeline tasks
|
||||
WAVEFORM = "waveform"
|
||||
CONVERT_MP3 = "convert_mp3"
|
||||
UPLOAD_MP3 = "upload_mp3"
|
||||
REMOVE_UPLOAD = "remove_upload"
|
||||
FINAL_SUMMARIES = "final_summaries"
|
||||
|
||||
|
||||
# Rate limit key for LLM API calls (shared across all LLM-calling tasks)
|
||||
LLM_RATE_LIMIT_KEY = "llm"
|
||||
|
||||
@@ -10,6 +10,8 @@ from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
daily_multitrack_pipeline,
|
||||
)
|
||||
from reflector.hatchet.workflows.file_pipeline import file_pipeline
|
||||
from reflector.hatchet.workflows.live_post_pipeline import live_post_pipeline
|
||||
from reflector.hatchet.workflows.subject_processing import subject_workflow
|
||||
from reflector.hatchet.workflows.topic_chunk_processing import topic_chunk_workflow
|
||||
from reflector.hatchet.workflows.track_processing import track_workflow
|
||||
@@ -47,6 +49,8 @@ def main():
|
||||
},
|
||||
workflows=[
|
||||
daily_multitrack_pipeline,
|
||||
file_pipeline,
|
||||
live_post_pipeline,
|
||||
topic_chunk_workflow,
|
||||
subject_workflow,
|
||||
track_workflow,
|
||||
|
||||
885
server/reflector/hatchet/workflows/file_pipeline.py
Normal file
885
server/reflector/hatchet/workflows/file_pipeline.py
Normal file
@@ -0,0 +1,885 @@
|
||||
"""
|
||||
Hatchet workflow: FilePipeline
|
||||
|
||||
Processing pipeline for file uploads and Whereby recordings.
|
||||
Orchestrates: extract audio → upload → transcribe/diarize/waveform (parallel)
|
||||
→ assemble → detect topics → title/summaries (parallel) → finalize
|
||||
→ cleanup consent → post zulip / send webhook.
|
||||
|
||||
Note: This file uses deferred imports (inside functions/tasks) intentionally.
|
||||
Hatchet workers run in forked processes; fresh imports per task ensure DB connections
|
||||
are not shared across forks, avoiding connection pooling issues.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from hatchet_sdk import Context
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.hatchet.broadcast import (
|
||||
append_event_and_broadcast,
|
||||
set_status_and_broadcast,
|
||||
)
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.constants import (
|
||||
TIMEOUT_HEAVY,
|
||||
TIMEOUT_MEDIUM,
|
||||
TIMEOUT_SHORT,
|
||||
TIMEOUT_TITLE,
|
||||
TaskName,
|
||||
)
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
fresh_db_connection,
|
||||
set_workflow_error_status,
|
||||
with_error_handling,
|
||||
)
|
||||
from reflector.hatchet.workflows.models import (
|
||||
ConsentResult,
|
||||
TitleResult,
|
||||
TopicsResult,
|
||||
WaveformResult,
|
||||
WebhookResult,
|
||||
ZulipResult,
|
||||
)
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines import topic_processing
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.audio_constants import WAVEFORM_SEGMENTS
|
||||
from reflector.utils.audio_waveform import get_audio_waveform
|
||||
|
||||
|
||||
class FilePipelineInput(BaseModel):
|
||||
transcript_id: str
|
||||
room_id: str | None = None
|
||||
|
||||
|
||||
# --- Result models specific to file pipeline ---
|
||||
|
||||
|
||||
class ExtractAudioResult(BaseModel):
|
||||
audio_path: str
|
||||
duration_ms: float = 0.0
|
||||
|
||||
|
||||
class UploadAudioResult(BaseModel):
|
||||
audio_url: str
|
||||
audio_path: str
|
||||
|
||||
|
||||
class TranscribeResult(BaseModel):
|
||||
words: list[dict]
|
||||
translation: str | None = None
|
||||
|
||||
|
||||
class DiarizeResult(BaseModel):
|
||||
diarization: list[dict] | None = None
|
||||
|
||||
|
||||
class AssembleTranscriptResult(BaseModel):
|
||||
assembled: bool
|
||||
|
||||
|
||||
class SummariesResult(BaseModel):
|
||||
generated: bool
|
||||
|
||||
|
||||
class FinalizeResult(BaseModel):
|
||||
status: str
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
file_pipeline = hatchet.workflow(name="FilePipeline", input_validator=FilePipelineInput)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.EXTRACT_AUDIO)
|
||||
async def extract_audio(input: FilePipelineInput, ctx: Context) -> ExtractAudioResult:
|
||||
"""Extract audio from upload file, convert to MP3."""
|
||||
ctx.log(f"extract_audio: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
await set_status_and_broadcast(input.transcript_id, "processing", logger=logger)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
# Clear transcript as we're going to regenerate everything
|
||||
await transcripts_controller.update(
|
||||
transcript,
|
||||
{
|
||||
"events": [],
|
||||
"topics": [],
|
||||
},
|
||||
)
|
||||
|
||||
# Find upload file
|
||||
audio_file = next(transcript.data_path.glob("upload.*"), None)
|
||||
if not audio_file:
|
||||
audio_file = next(transcript.data_path.glob("audio.*"), None)
|
||||
if not audio_file:
|
||||
raise ValueError("No audio file found to process")
|
||||
|
||||
ctx.log(f"extract_audio: processing {audio_file}")
|
||||
|
||||
# Extract audio and write as MP3
|
||||
import av # noqa: PLC0415
|
||||
|
||||
from reflector.processors import AudioFileWriterProcessor # noqa: PLC0415
|
||||
|
||||
duration_ms_container = [0.0]
|
||||
|
||||
async def capture_duration(d):
|
||||
duration_ms_container[0] = d
|
||||
|
||||
mp3_writer = AudioFileWriterProcessor(
|
||||
path=transcript.audio_mp3_filename,
|
||||
on_duration=capture_duration,
|
||||
)
|
||||
input_container = av.open(str(audio_file))
|
||||
for frame in input_container.decode(audio=0):
|
||||
await mp3_writer.push(frame)
|
||||
await mp3_writer.flush()
|
||||
input_container.close()
|
||||
|
||||
duration_ms = duration_ms_container[0]
|
||||
audio_path = str(transcript.audio_mp3_filename)
|
||||
|
||||
# Persist duration to database and broadcast to websocket clients
|
||||
from reflector.db.transcripts import TranscriptDuration # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller as tc
|
||||
|
||||
await tc.update(transcript, {"duration": duration_ms})
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"DURATION",
|
||||
TranscriptDuration(duration=duration_ms),
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log(f"extract_audio complete: {audio_path}, duration={duration_ms}ms")
|
||||
return ExtractAudioResult(audio_path=audio_path, duration_ms=duration_ms)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[extract_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.UPLOAD_AUDIO)
|
||||
async def upload_audio(input: FilePipelineInput, ctx: Context) -> UploadAudioResult:
|
||||
"""Upload audio to S3/storage, return audio_url."""
|
||||
ctx.log(f"upload_audio: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
extract_result = ctx.task_output(extract_audio)
|
||||
audio_path = extract_result.audio_path
|
||||
|
||||
from reflector.storage import get_transcripts_storage # noqa: PLC0415
|
||||
|
||||
storage = get_transcripts_storage()
|
||||
if not storage:
|
||||
raise ValueError(
|
||||
"Storage backend required for file processing. "
|
||||
"Configure TRANSCRIPT_STORAGE_* settings."
|
||||
)
|
||||
|
||||
with open(audio_path, "rb") as f:
|
||||
audio_data = f.read()
|
||||
|
||||
storage_path = f"file_pipeline/{input.transcript_id}/audio.mp3"
|
||||
await storage.put_file(storage_path, audio_data)
|
||||
audio_url = await storage.get_file_url(storage_path)
|
||||
|
||||
ctx.log(f"upload_audio complete: {audio_url}")
|
||||
return UploadAudioResult(audio_url=audio_url, audio_path=audio_path)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[upload_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.TRANSCRIBE)
|
||||
async def transcribe(input: FilePipelineInput, ctx: Context) -> TranscribeResult:
|
||||
"""Transcribe the audio file using the configured backend."""
|
||||
ctx.log(f"transcribe: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
upload_result = ctx.task_output(upload_audio)
|
||||
audio_url = upload_result.audio_url
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
source_language = transcript.source_language
|
||||
|
||||
from reflector.pipelines.transcription_helpers import ( # noqa: PLC0415
|
||||
transcribe_file_with_processor,
|
||||
)
|
||||
|
||||
result = await transcribe_file_with_processor(audio_url, source_language)
|
||||
|
||||
ctx.log(f"transcribe complete: {len(result.words)} words")
|
||||
return TranscribeResult(
|
||||
words=[w.model_dump() for w in result.words],
|
||||
translation=result.translation,
|
||||
)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[upload_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.DIARIZE)
|
||||
async def diarize(input: FilePipelineInput, ctx: Context) -> DiarizeResult:
|
||||
"""Diarize the audio file (speaker identification)."""
|
||||
ctx.log(f"diarize: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
if not settings.DIARIZATION_BACKEND:
|
||||
ctx.log("diarize: diarization disabled, skipping")
|
||||
return DiarizeResult(diarization=None)
|
||||
|
||||
upload_result = ctx.task_output(upload_audio)
|
||||
audio_url = upload_result.audio_url
|
||||
|
||||
from reflector.processors.file_diarization import ( # noqa: PLC0415
|
||||
FileDiarizationInput,
|
||||
)
|
||||
from reflector.processors.file_diarization_auto import ( # noqa: PLC0415
|
||||
FileDiarizationAutoProcessor,
|
||||
)
|
||||
|
||||
processor = FileDiarizationAutoProcessor()
|
||||
input_data = FileDiarizationInput(audio_url=audio_url)
|
||||
|
||||
result = None
|
||||
|
||||
async def capture_result(diarization_output):
|
||||
nonlocal result
|
||||
result = diarization_output.diarization
|
||||
|
||||
try:
|
||||
processor.on(capture_result)
|
||||
await processor.push(input_data)
|
||||
await processor.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"Diarization failed: {e}")
|
||||
return DiarizeResult(diarization=None)
|
||||
|
||||
ctx.log(f"diarize complete: {len(result) if result else 0} segments")
|
||||
return DiarizeResult(diarization=list(result) if result else None)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[upload_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_WAVEFORM)
|
||||
async def generate_waveform(input: FilePipelineInput, ctx: Context) -> WaveformResult:
|
||||
"""Generate audio waveform visualization."""
|
||||
ctx.log(f"generate_waveform: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
upload_result = ctx.task_output(upload_audio)
|
||||
audio_path = upload_result.audio_path
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptWaveform,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
waveform = get_audio_waveform(
|
||||
path=Path(audio_path), segments_count=WAVEFORM_SEGMENTS
|
||||
)
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
transcript.data_path.mkdir(parents=True, exist_ok=True)
|
||||
with open(transcript.audio_waveform_filename, "w") as f:
|
||||
json.dump(waveform, f)
|
||||
|
||||
waveform_data = TranscriptWaveform(waveform=waveform)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"WAVEFORM",
|
||||
waveform_data,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log("generate_waveform complete")
|
||||
return WaveformResult(waveform_generated=True)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[transcribe, diarize, generate_waveform],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.ASSEMBLE_TRANSCRIPT)
|
||||
async def assemble_transcript(
|
||||
input: FilePipelineInput, ctx: Context
|
||||
) -> AssembleTranscriptResult:
|
||||
"""Merge transcription + diarization results."""
|
||||
ctx.log(f"assemble_transcript: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
transcribe_result = ctx.task_output(transcribe)
|
||||
diarize_result = ctx.task_output(diarize)
|
||||
|
||||
from reflector.processors.transcript_diarization_assembler import ( # noqa: PLC0415
|
||||
TranscriptDiarizationAssemblerInput,
|
||||
TranscriptDiarizationAssemblerProcessor,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
DiarizationSegment,
|
||||
Word,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
Transcript as TranscriptType,
|
||||
)
|
||||
|
||||
words = [Word(**w) for w in transcribe_result.words]
|
||||
transcript_data = TranscriptType(
|
||||
words=words, translation=transcribe_result.translation
|
||||
)
|
||||
|
||||
diarization = None
|
||||
if diarize_result.diarization:
|
||||
diarization = [DiarizationSegment(**s) for s in diarize_result.diarization]
|
||||
|
||||
processor = TranscriptDiarizationAssemblerProcessor()
|
||||
assembler_input = TranscriptDiarizationAssemblerInput(
|
||||
transcript=transcript_data, diarization=diarization or []
|
||||
)
|
||||
|
||||
diarized_transcript = None
|
||||
|
||||
async def capture_result(transcript):
|
||||
nonlocal diarized_transcript
|
||||
diarized_transcript = transcript
|
||||
|
||||
processor.on(capture_result)
|
||||
await processor.push(assembler_input)
|
||||
await processor.flush()
|
||||
|
||||
if not diarized_transcript:
|
||||
raise ValueError("No diarized transcript captured")
|
||||
|
||||
# Save the assembled transcript events to the database
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptText,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
assembled_text = diarized_transcript.text if diarized_transcript else ""
|
||||
assembled_translation = (
|
||||
diarized_transcript.translation if diarized_transcript else None
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"TRANSCRIPT",
|
||||
TranscriptText(text=assembled_text, translation=assembled_translation),
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log("assemble_transcript complete")
|
||||
return AssembleTranscriptResult(assembled=True)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[assemble_transcript],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.DETECT_TOPICS)
|
||||
async def detect_topics(input: FilePipelineInput, ctx: Context) -> TopicsResult:
|
||||
"""Detect topics from the assembled transcript."""
|
||||
ctx.log(f"detect_topics: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
# Re-read the transcript to get the diarized words
|
||||
transcribe_result = ctx.task_output(transcribe)
|
||||
diarize_result = ctx.task_output(diarize)
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptTopic,
|
||||
transcripts_controller,
|
||||
)
|
||||
from reflector.processors.transcript_diarization_assembler import ( # noqa: PLC0415
|
||||
TranscriptDiarizationAssemblerInput,
|
||||
TranscriptDiarizationAssemblerProcessor,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
DiarizationSegment,
|
||||
Word,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
Transcript as TranscriptType,
|
||||
)
|
||||
|
||||
words = [Word(**w) for w in transcribe_result.words]
|
||||
transcript_data = TranscriptType(
|
||||
words=words, translation=transcribe_result.translation
|
||||
)
|
||||
|
||||
diarization = None
|
||||
if diarize_result.diarization:
|
||||
diarization = [DiarizationSegment(**s) for s in diarize_result.diarization]
|
||||
|
||||
# Re-assemble to get the diarized transcript for topic detection
|
||||
processor = TranscriptDiarizationAssemblerProcessor()
|
||||
assembler_input = TranscriptDiarizationAssemblerInput(
|
||||
transcript=transcript_data, diarization=diarization or []
|
||||
)
|
||||
|
||||
diarized_transcript = None
|
||||
|
||||
async def capture_result(transcript):
|
||||
nonlocal diarized_transcript
|
||||
diarized_transcript = transcript
|
||||
|
||||
processor.on(capture_result)
|
||||
await processor.push(assembler_input)
|
||||
await processor.flush()
|
||||
|
||||
if not diarized_transcript:
|
||||
raise ValueError("No diarized transcript for topic detection")
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
target_language = transcript.target_language
|
||||
|
||||
empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
|
||||
|
||||
async def on_topic_callback(data):
|
||||
topic = TranscriptTopic(
|
||||
title=data.title,
|
||||
summary=data.summary,
|
||||
timestamp=data.timestamp,
|
||||
transcript=data.transcript.text
|
||||
if hasattr(data.transcript, "text")
|
||||
else "",
|
||||
words=data.transcript.words
|
||||
if hasattr(data.transcript, "words")
|
||||
else [],
|
||||
)
|
||||
await transcripts_controller.upsert_topic(transcript, topic)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id, transcript, "TOPIC", topic, logger=logger
|
||||
)
|
||||
|
||||
topics = await topic_processing.detect_topics(
|
||||
diarized_transcript,
|
||||
target_language,
|
||||
on_topic_callback=on_topic_callback,
|
||||
empty_pipeline=empty_pipeline,
|
||||
)
|
||||
|
||||
ctx.log(f"detect_topics complete: {len(topics)} topics")
|
||||
return TopicsResult(topics=topics)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[detect_topics],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_TITLE),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_TITLE)
|
||||
async def generate_title(input: FilePipelineInput, ctx: Context) -> TitleResult:
|
||||
"""Generate meeting title using LLM."""
|
||||
ctx.log(f"generate_title: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
topics_result = ctx.task_output(detect_topics)
|
||||
topics = topics_result.topics
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptFinalTitle,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
|
||||
title_result = None
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
async def on_title_callback(data):
|
||||
nonlocal title_result
|
||||
title_result = data.title
|
||||
final_title = TranscriptFinalTitle(title=data.title)
|
||||
if not transcript.title:
|
||||
await transcripts_controller.update(
|
||||
transcript, {"title": final_title.title}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"FINAL_TITLE",
|
||||
final_title,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
await topic_processing.generate_title(
|
||||
topics,
|
||||
on_title_callback=on_title_callback,
|
||||
empty_pipeline=empty_pipeline,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log(f"generate_title complete: '{title_result}'")
|
||||
return TitleResult(title=title_result)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[detect_topics],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_SUMMARIES)
|
||||
async def generate_summaries(input: FilePipelineInput, ctx: Context) -> SummariesResult:
|
||||
"""Generate long/short summaries and action items."""
|
||||
ctx.log(f"generate_summaries: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
topics_result = ctx.task_output(detect_topics)
|
||||
topics = topics_result.topics
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptActionItems,
|
||||
TranscriptFinalLongSummary,
|
||||
TranscriptFinalShortSummary,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
async def on_long_summary_callback(data):
|
||||
final_long = TranscriptFinalLongSummary(long_summary=data.long_summary)
|
||||
await transcripts_controller.update(
|
||||
transcript, {"long_summary": final_long.long_summary}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"FINAL_LONG_SUMMARY",
|
||||
final_long,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
async def on_short_summary_callback(data):
|
||||
final_short = TranscriptFinalShortSummary(short_summary=data.short_summary)
|
||||
await transcripts_controller.update(
|
||||
transcript, {"short_summary": final_short.short_summary}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"FINAL_SHORT_SUMMARY",
|
||||
final_short,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
async def on_action_items_callback(data):
|
||||
action_items = TranscriptActionItems(action_items=data.action_items)
|
||||
await transcripts_controller.update(
|
||||
transcript, {"action_items": action_items.action_items}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"ACTION_ITEMS",
|
||||
action_items,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
await topic_processing.generate_summaries(
|
||||
topics,
|
||||
transcript,
|
||||
on_long_summary_callback=on_long_summary_callback,
|
||||
on_short_summary_callback=on_short_summary_callback,
|
||||
on_action_items_callback=on_action_items_callback,
|
||||
empty_pipeline=empty_pipeline,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log("generate_summaries complete")
|
||||
return SummariesResult(generated=True)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[generate_title, generate_summaries],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=5,
|
||||
)
|
||||
@with_error_handling(TaskName.FINALIZE)
|
||||
async def finalize(input: FilePipelineInput, ctx: Context) -> FinalizeResult:
|
||||
"""Set transcript status to 'ended' and broadcast."""
|
||||
ctx.log("finalize: setting status to 'ended'")
|
||||
|
||||
async with fresh_db_connection():
|
||||
await set_status_and_broadcast(input.transcript_id, "ended", logger=logger)
|
||||
|
||||
ctx.log("finalize complete")
|
||||
return FinalizeResult(status="COMPLETED")
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[finalize],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.CLEANUP_CONSENT, set_error_status=False)
|
||||
async def cleanup_consent(input: FilePipelineInput, ctx: Context) -> ConsentResult:
|
||||
"""Check consent and delete audio files if any participant denied."""
|
||||
ctx.log(f"cleanup_consent: transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.meetings import ( # noqa: PLC0415
|
||||
meeting_consent_controller,
|
||||
meetings_controller,
|
||||
)
|
||||
from reflector.db.recordings import recordings_controller # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.storage import get_transcripts_storage # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
ctx.log("cleanup_consent: transcript not found")
|
||||
return ConsentResult()
|
||||
|
||||
consent_denied = False
|
||||
recording = None
|
||||
if transcript.recording_id:
|
||||
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||
if recording and recording.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
if meeting:
|
||||
consent_denied = await meeting_consent_controller.has_any_denial(
|
||||
meeting.id
|
||||
)
|
||||
|
||||
if not consent_denied:
|
||||
ctx.log("cleanup_consent: consent approved, keeping all files")
|
||||
return ConsentResult()
|
||||
|
||||
ctx.log("cleanup_consent: consent denied, deleting audio files")
|
||||
|
||||
deletion_errors = []
|
||||
if recording and recording.bucket_name:
|
||||
keys_to_delete = []
|
||||
if recording.track_keys:
|
||||
keys_to_delete = recording.track_keys
|
||||
elif recording.object_key:
|
||||
keys_to_delete = [recording.object_key]
|
||||
|
||||
master_storage = get_transcripts_storage()
|
||||
for key in keys_to_delete:
|
||||
try:
|
||||
await master_storage.delete_file(key, bucket=recording.bucket_name)
|
||||
ctx.log(f"Deleted recording file: {recording.bucket_name}/{key}")
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to delete {key}: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
deletion_errors.append(error_msg)
|
||||
|
||||
if transcript.audio_location == "storage":
|
||||
storage = get_transcripts_storage()
|
||||
try:
|
||||
await storage.delete_file(transcript.storage_audio_path)
|
||||
ctx.log(f"Deleted processed audio: {transcript.storage_audio_path}")
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to delete processed audio: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
deletion_errors.append(error_msg)
|
||||
|
||||
try:
|
||||
if (
|
||||
hasattr(transcript, "audio_mp3_filename")
|
||||
and transcript.audio_mp3_filename
|
||||
):
|
||||
transcript.audio_mp3_filename.unlink(missing_ok=True)
|
||||
if (
|
||||
hasattr(transcript, "audio_wav_filename")
|
||||
and transcript.audio_wav_filename
|
||||
):
|
||||
transcript.audio_wav_filename.unlink(missing_ok=True)
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to delete local audio files: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
deletion_errors.append(error_msg)
|
||||
|
||||
if deletion_errors:
|
||||
logger.warning(
|
||||
"[Hatchet] cleanup_consent completed with errors",
|
||||
transcript_id=input.transcript_id,
|
||||
error_count=len(deletion_errors),
|
||||
)
|
||||
else:
|
||||
await transcripts_controller.update(transcript, {"audio_deleted": True})
|
||||
ctx.log("cleanup_consent: all audio deleted successfully")
|
||||
|
||||
return ConsentResult()
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[cleanup_consent],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.POST_ZULIP, set_error_status=False)
|
||||
async def post_zulip(input: FilePipelineInput, ctx: Context) -> ZulipResult:
|
||||
"""Post notification to Zulip."""
|
||||
ctx.log(f"post_zulip: transcript_id={input.transcript_id}")
|
||||
|
||||
if not settings.ZULIP_REALM:
|
||||
ctx.log("post_zulip skipped (Zulip not configured)")
|
||||
return ZulipResult(zulip_message_id=None, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.zulip import post_transcript_notification # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
message_id = await post_transcript_notification(transcript)
|
||||
ctx.log(f"post_zulip complete: zulip_message_id={message_id}")
|
||||
else:
|
||||
message_id = None
|
||||
|
||||
return ZulipResult(zulip_message_id=message_id)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[cleanup_consent],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.SEND_WEBHOOK, set_error_status=False)
|
||||
async def send_webhook(input: FilePipelineInput, ctx: Context) -> WebhookResult:
|
||||
"""Send completion webhook to external service."""
|
||||
ctx.log(f"send_webhook: transcript_id={input.transcript_id}")
|
||||
|
||||
if not input.room_id:
|
||||
ctx.log("send_webhook skipped (no room_id)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.rooms import rooms_controller # noqa: PLC0415
|
||||
from reflector.utils.webhook import ( # noqa: PLC0415
|
||||
fetch_transcript_webhook_payload,
|
||||
send_webhook_request,
|
||||
)
|
||||
|
||||
room = await rooms_controller.get_by_id(input.room_id)
|
||||
if not room or not room.webhook_url:
|
||||
ctx.log("send_webhook skipped (no webhook_url configured)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
payload = await fetch_transcript_webhook_payload(
|
||||
transcript_id=input.transcript_id,
|
||||
room_id=input.room_id,
|
||||
)
|
||||
|
||||
if isinstance(payload, str):
|
||||
ctx.log(f"send_webhook skipped (could not build payload): {payload}")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
import httpx # noqa: PLC0415
|
||||
|
||||
try:
|
||||
response = await send_webhook_request(
|
||||
url=room.webhook_url,
|
||||
payload=payload,
|
||||
event_type="transcript.completed",
|
||||
webhook_secret=room.webhook_secret,
|
||||
timeout=30.0,
|
||||
)
|
||||
ctx.log(f"send_webhook complete: status_code={response.status_code}")
|
||||
return WebhookResult(webhook_sent=True, response_code=response.status_code)
|
||||
except httpx.HTTPStatusError as e:
|
||||
ctx.log(f"send_webhook failed (HTTP {e.response.status_code}), continuing")
|
||||
return WebhookResult(
|
||||
webhook_sent=False, response_code=e.response.status_code
|
||||
)
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
ctx.log(f"send_webhook failed ({e}), continuing")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
except Exception as e:
|
||||
ctx.log(f"send_webhook unexpected error: {e}")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
|
||||
|
||||
# --- On failure handler ---
|
||||
|
||||
|
||||
async def on_workflow_failure(input: FilePipelineInput, ctx: Context) -> None:
|
||||
"""Set transcript status to 'error' only if not already 'ended'."""
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript and transcript.status == "ended":
|
||||
logger.info(
|
||||
"[Hatchet] FilePipeline on_workflow_failure: transcript already ended, skipping error status",
|
||||
transcript_id=input.transcript_id,
|
||||
)
|
||||
ctx.log(
|
||||
"on_workflow_failure: transcript already ended, skipping error status"
|
||||
)
|
||||
return
|
||||
await set_workflow_error_status(input.transcript_id)
|
||||
|
||||
|
||||
@file_pipeline.on_failure_task()
|
||||
async def _register_on_workflow_failure(input: FilePipelineInput, ctx: Context) -> None:
|
||||
await on_workflow_failure(input, ctx)
|
||||
389
server/reflector/hatchet/workflows/live_post_pipeline.py
Normal file
389
server/reflector/hatchet/workflows/live_post_pipeline.py
Normal file
@@ -0,0 +1,389 @@
|
||||
"""
|
||||
Hatchet workflow: LivePostProcessingPipeline
|
||||
|
||||
Post-processing pipeline for live WebRTC meetings.
|
||||
Triggered after a live meeting ends. Orchestrates:
|
||||
Left branch: waveform → convert_mp3 → upload_mp3 → remove_upload → diarize → cleanup_consent
|
||||
Right branch: generate_title (parallel with left branch)
|
||||
Fan-in: final_summaries → post_zulip → send_webhook
|
||||
|
||||
Note: This file uses deferred imports (inside functions/tasks) intentionally.
|
||||
Hatchet workers run in forked processes; fresh imports per task ensure DB connections
|
||||
are not shared across forks, avoiding connection pooling issues.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from hatchet_sdk import Context
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.constants import (
|
||||
TIMEOUT_HEAVY,
|
||||
TIMEOUT_MEDIUM,
|
||||
TIMEOUT_SHORT,
|
||||
TIMEOUT_TITLE,
|
||||
TaskName,
|
||||
)
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
fresh_db_connection,
|
||||
set_workflow_error_status,
|
||||
with_error_handling,
|
||||
)
|
||||
from reflector.hatchet.workflows.models import (
|
||||
ConsentResult,
|
||||
TitleResult,
|
||||
WaveformResult,
|
||||
WebhookResult,
|
||||
ZulipResult,
|
||||
)
|
||||
from reflector.logger import logger
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
class LivePostPipelineInput(BaseModel):
|
||||
transcript_id: str
|
||||
room_id: str | None = None
|
||||
|
||||
|
||||
# --- Result models specific to live post pipeline ---
|
||||
|
||||
|
||||
class ConvertMp3Result(BaseModel):
|
||||
converted: bool
|
||||
|
||||
|
||||
class UploadMp3Result(BaseModel):
|
||||
uploaded: bool
|
||||
|
||||
|
||||
class RemoveUploadResult(BaseModel):
|
||||
removed: bool
|
||||
|
||||
|
||||
class DiarizeResult(BaseModel):
|
||||
diarized: bool
|
||||
|
||||
|
||||
class FinalSummariesResult(BaseModel):
|
||||
generated: bool
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
live_post_pipeline = hatchet.workflow(
|
||||
name="LivePostProcessingPipeline", input_validator=LivePostPipelineInput
|
||||
)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.WAVEFORM)
|
||||
async def waveform(input: LivePostPipelineInput, ctx: Context) -> WaveformResult:
|
||||
"""Generate waveform visualization from recorded audio."""
|
||||
ctx.log(f"waveform: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
PipelineMainWaveform,
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
runner = PipelineMainWaveform(transcript_id=transcript.id)
|
||||
await runner.run()
|
||||
|
||||
ctx.log("waveform complete")
|
||||
return WaveformResult(waveform_generated=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_TITLE),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_TITLE)
|
||||
async def generate_title(input: LivePostPipelineInput, ctx: Context) -> TitleResult:
|
||||
"""Generate meeting title from topics (runs in parallel with audio chain)."""
|
||||
ctx.log(f"generate_title: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
PipelineMainTitle,
|
||||
)
|
||||
|
||||
runner = PipelineMainTitle(transcript_id=input.transcript_id)
|
||||
await runner.run()
|
||||
|
||||
ctx.log("generate_title complete")
|
||||
return TitleResult(title=None)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[waveform],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.CONVERT_MP3)
|
||||
async def convert_mp3(input: LivePostPipelineInput, ctx: Context) -> ConvertMp3Result:
|
||||
"""Convert WAV recording to MP3."""
|
||||
ctx.log(f"convert_mp3: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_convert_to_mp3,
|
||||
)
|
||||
|
||||
await pipeline_convert_to_mp3(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("convert_mp3 complete")
|
||||
return ConvertMp3Result(converted=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[convert_mp3],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.UPLOAD_MP3)
|
||||
async def upload_mp3(input: LivePostPipelineInput, ctx: Context) -> UploadMp3Result:
|
||||
"""Upload MP3 to external storage."""
|
||||
ctx.log(f"upload_mp3: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_upload_mp3,
|
||||
)
|
||||
|
||||
await pipeline_upload_mp3(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("upload_mp3 complete")
|
||||
return UploadMp3Result(uploaded=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[upload_mp3],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=5,
|
||||
)
|
||||
@with_error_handling(TaskName.REMOVE_UPLOAD)
|
||||
async def remove_upload(
|
||||
input: LivePostPipelineInput, ctx: Context
|
||||
) -> RemoveUploadResult:
|
||||
"""Remove the original upload file."""
|
||||
ctx.log(f"remove_upload: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_remove_upload,
|
||||
)
|
||||
|
||||
await pipeline_remove_upload(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("remove_upload complete")
|
||||
return RemoveUploadResult(removed=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[remove_upload],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.DIARIZE)
|
||||
async def diarize(input: LivePostPipelineInput, ctx: Context) -> DiarizeResult:
|
||||
"""Run diarization on the recorded audio."""
|
||||
ctx.log(f"diarize: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_diarization,
|
||||
)
|
||||
|
||||
await pipeline_diarization(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("diarize complete")
|
||||
return DiarizeResult(diarized=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[diarize],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.CLEANUP_CONSENT, set_error_status=False)
|
||||
async def cleanup_consent(input: LivePostPipelineInput, ctx: Context) -> ConsentResult:
|
||||
"""Check consent and delete audio files if any participant denied."""
|
||||
ctx.log(f"cleanup_consent: transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
cleanup_consent as _cleanup_consent,
|
||||
)
|
||||
|
||||
await _cleanup_consent(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("cleanup_consent complete")
|
||||
return ConsentResult()
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[cleanup_consent, generate_title],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.FINAL_SUMMARIES)
|
||||
async def final_summaries(
|
||||
input: LivePostPipelineInput, ctx: Context
|
||||
) -> FinalSummariesResult:
|
||||
"""Generate final summaries (fan-in after audio chain + title)."""
|
||||
ctx.log(f"final_summaries: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_summaries,
|
||||
)
|
||||
|
||||
await pipeline_summaries(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("final_summaries complete")
|
||||
return FinalSummariesResult(generated=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[final_summaries],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.POST_ZULIP, set_error_status=False)
|
||||
async def post_zulip(input: LivePostPipelineInput, ctx: Context) -> ZulipResult:
|
||||
"""Post notification to Zulip."""
|
||||
ctx.log(f"post_zulip: transcript_id={input.transcript_id}")
|
||||
|
||||
if not settings.ZULIP_REALM:
|
||||
ctx.log("post_zulip skipped (Zulip not configured)")
|
||||
return ZulipResult(zulip_message_id=None, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.zulip import post_transcript_notification # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
message_id = await post_transcript_notification(transcript)
|
||||
ctx.log(f"post_zulip complete: zulip_message_id={message_id}")
|
||||
else:
|
||||
message_id = None
|
||||
|
||||
return ZulipResult(zulip_message_id=message_id)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[final_summaries],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.SEND_WEBHOOK, set_error_status=False)
|
||||
async def send_webhook(input: LivePostPipelineInput, ctx: Context) -> WebhookResult:
|
||||
"""Send completion webhook to external service."""
|
||||
ctx.log(f"send_webhook: transcript_id={input.transcript_id}")
|
||||
|
||||
if not input.room_id:
|
||||
ctx.log("send_webhook skipped (no room_id)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.rooms import rooms_controller # noqa: PLC0415
|
||||
from reflector.utils.webhook import ( # noqa: PLC0415
|
||||
fetch_transcript_webhook_payload,
|
||||
send_webhook_request,
|
||||
)
|
||||
|
||||
room = await rooms_controller.get_by_id(input.room_id)
|
||||
if not room or not room.webhook_url:
|
||||
ctx.log("send_webhook skipped (no webhook_url configured)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
payload = await fetch_transcript_webhook_payload(
|
||||
transcript_id=input.transcript_id,
|
||||
room_id=input.room_id,
|
||||
)
|
||||
|
||||
if isinstance(payload, str):
|
||||
ctx.log(f"send_webhook skipped (could not build payload): {payload}")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
import httpx # noqa: PLC0415
|
||||
|
||||
try:
|
||||
response = await send_webhook_request(
|
||||
url=room.webhook_url,
|
||||
payload=payload,
|
||||
event_type="transcript.completed",
|
||||
webhook_secret=room.webhook_secret,
|
||||
timeout=30.0,
|
||||
)
|
||||
ctx.log(f"send_webhook complete: status_code={response.status_code}")
|
||||
return WebhookResult(webhook_sent=True, response_code=response.status_code)
|
||||
except httpx.HTTPStatusError as e:
|
||||
ctx.log(f"send_webhook failed (HTTP {e.response.status_code}), continuing")
|
||||
return WebhookResult(
|
||||
webhook_sent=False, response_code=e.response.status_code
|
||||
)
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
ctx.log(f"send_webhook failed ({e}), continuing")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
except Exception as e:
|
||||
ctx.log(f"send_webhook unexpected error: {e}")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
|
||||
|
||||
# --- On failure handler ---
|
||||
|
||||
|
||||
async def on_workflow_failure(input: LivePostPipelineInput, ctx: Context) -> None:
|
||||
"""Set transcript status to 'error' only if not already 'ended'."""
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript and transcript.status == "ended":
|
||||
logger.info(
|
||||
"[Hatchet] LivePostProcessingPipeline on_workflow_failure: transcript already ended",
|
||||
transcript_id=input.transcript_id,
|
||||
)
|
||||
ctx.log(
|
||||
"on_workflow_failure: transcript already ended, skipping error status"
|
||||
)
|
||||
return
|
||||
await set_workflow_error_status(input.transcript_id)
|
||||
|
||||
|
||||
@live_post_pipeline.on_failure_task()
|
||||
async def _register_on_workflow_failure(
|
||||
input: LivePostPipelineInput, ctx: Context
|
||||
) -> None:
|
||||
await on_workflow_failure(input, ctx)
|
||||
@@ -17,7 +17,7 @@ from contextlib import asynccontextmanager
|
||||
from typing import Generic
|
||||
|
||||
import av
|
||||
from celery import chord, current_task, group, shared_task
|
||||
from celery import current_task, shared_task
|
||||
from pydantic import BaseModel
|
||||
from structlog import BoundLogger as Logger
|
||||
|
||||
@@ -397,7 +397,9 @@ class PipelineMainLive(PipelineMainBase):
|
||||
# when the pipeline ends, connect to the post pipeline
|
||||
logger.info("Pipeline main live ended", transcript_id=self.transcript_id)
|
||||
logger.info("Scheduling pipeline main post", transcript_id=self.transcript_id)
|
||||
pipeline_post(transcript_id=self.transcript_id)
|
||||
transcript = await transcripts_controller.get_by_id(self.transcript_id)
|
||||
room_id = transcript.room_id if transcript else None
|
||||
await pipeline_post(transcript_id=self.transcript_id, room_id=room_id)
|
||||
|
||||
|
||||
class PipelineMainDiarization(PipelineMainBase[AudioDiarizationInput]):
|
||||
@@ -792,29 +794,20 @@ async def task_pipeline_post_to_zulip(*, transcript_id: str):
|
||||
await pipeline_post_to_zulip(transcript_id=transcript_id)
|
||||
|
||||
|
||||
def pipeline_post(*, transcript_id: str):
|
||||
async def pipeline_post(*, transcript_id: str, room_id: str | None = None):
|
||||
"""
|
||||
Run the post pipeline
|
||||
Run the post pipeline via Hatchet.
|
||||
"""
|
||||
chain_mp3_and_diarize = (
|
||||
task_pipeline_waveform.si(transcript_id=transcript_id)
|
||||
| task_pipeline_convert_to_mp3.si(transcript_id=transcript_id)
|
||||
| task_pipeline_upload_mp3.si(transcript_id=transcript_id)
|
||||
| task_pipeline_remove_upload.si(transcript_id=transcript_id)
|
||||
| task_pipeline_diarization.si(transcript_id=transcript_id)
|
||||
| task_cleanup_consent.si(transcript_id=transcript_id)
|
||||
)
|
||||
chain_title_preview = task_pipeline_title.si(transcript_id=transcript_id)
|
||||
chain_final_summaries = task_pipeline_final_summaries.si(
|
||||
transcript_id=transcript_id
|
||||
)
|
||||
from reflector.hatchet.client import HatchetClientManager # noqa: PLC0415
|
||||
|
||||
chain = chord(
|
||||
group(chain_mp3_and_diarize, chain_title_preview),
|
||||
chain_final_summaries,
|
||||
) | task_pipeline_post_to_zulip.si(transcript_id=transcript_id)
|
||||
|
||||
return chain.delay()
|
||||
await HatchetClientManager.start_workflow(
|
||||
"LivePostProcessingPipeline",
|
||||
{
|
||||
"transcript_id": str(transcript_id),
|
||||
"room_id": str(room_id) if room_id else None,
|
||||
},
|
||||
additional_metadata={"transcript_id": str(transcript_id)},
|
||||
)
|
||||
|
||||
|
||||
@get_transcript
|
||||
|
||||
@@ -10,7 +10,6 @@ from dataclasses import dataclass
|
||||
from typing import Literal, Union, assert_never
|
||||
|
||||
import celery
|
||||
from celery.result import AsyncResult
|
||||
from hatchet_sdk.clients.rest.exceptions import ApiException, NotFoundException
|
||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||
|
||||
@@ -18,7 +17,6 @@ from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.transcripts import Transcript, transcripts_controller
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||
from reflector.utils.string import NonEmptyString
|
||||
|
||||
|
||||
@@ -105,11 +103,8 @@ async def validate_transcript_for_processing(
|
||||
):
|
||||
return ValidationNotReady(detail="Recording is not ready for processing")
|
||||
|
||||
# Check Celery tasks
|
||||
# Check Celery tasks (multitrack still uses Celery for some paths)
|
||||
if task_is_scheduled_or_active(
|
||||
"reflector.pipelines.main_file_pipeline.task_pipeline_file_process",
|
||||
transcript_id=transcript.id,
|
||||
) or task_is_scheduled_or_active(
|
||||
"reflector.pipelines.main_multitrack_pipeline.task_pipeline_multitrack_process",
|
||||
transcript_id=transcript.id,
|
||||
):
|
||||
@@ -175,11 +170,8 @@ async def prepare_transcript_processing(validation: ValidationOk) -> PrepareResu
|
||||
|
||||
async def dispatch_transcript_processing(
|
||||
config: ProcessingConfig, force: bool = False
|
||||
) -> AsyncResult | None:
|
||||
"""Dispatch transcript processing to appropriate backend (Hatchet or Celery).
|
||||
|
||||
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
|
||||
"""
|
||||
) -> None:
|
||||
"""Dispatch transcript processing to Hatchet workflow engine."""
|
||||
if isinstance(config, MultitrackProcessingConfig):
|
||||
# Multitrack processing always uses Hatchet (no Celery fallback)
|
||||
# First check if we can replay (outside transaction since it's read-only)
|
||||
@@ -275,7 +267,21 @@ async def dispatch_transcript_processing(
|
||||
return None
|
||||
|
||||
elif isinstance(config, FileProcessingConfig):
|
||||
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
|
||||
# File processing uses Hatchet workflow
|
||||
workflow_id = await HatchetClientManager.start_workflow(
|
||||
workflow_name="FilePipeline",
|
||||
input_data={"transcript_id": config.transcript_id},
|
||||
additional_metadata={"transcript_id": config.transcript_id},
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
||||
if transcript:
|
||||
await transcripts_controller.update(
|
||||
transcript, {"workflow_run_id": workflow_id}
|
||||
)
|
||||
|
||||
logger.info("File pipeline dispatched via Hatchet", workflow_id=workflow_id)
|
||||
return None
|
||||
else:
|
||||
assert_never(config)
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ import asyncio
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from urllib.parse import unquote, urlparse
|
||||
@@ -15,10 +14,8 @@ from urllib.parse import unquote, urlparse
|
||||
from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
|
||||
|
||||
from reflector.db.transcripts import SourceKind, TranscriptTopic, transcripts_controller
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines.main_file_pipeline import (
|
||||
task_pipeline_file_process as task_pipeline_file_process,
|
||||
)
|
||||
from reflector.pipelines.main_live_pipeline import pipeline_post as live_pipeline_post
|
||||
from reflector.pipelines.main_live_pipeline import (
|
||||
pipeline_process as live_pipeline_process,
|
||||
@@ -237,29 +234,22 @@ async def process_live_pipeline(
|
||||
# assert documented behaviour: after process, the pipeline isn't ended. this is the reason of calling pipeline_post
|
||||
assert pre_final_transcript.status != "ended"
|
||||
|
||||
# at this point, diarization is running but we have no access to it. run diarization in parallel - one will hopefully win after polling
|
||||
result = live_pipeline_post(transcript_id=transcript_id)
|
||||
|
||||
# result.ready() blocks even without await; it mutates result also
|
||||
while not result.ready():
|
||||
print(f"Status: {result.state}")
|
||||
time.sleep(2)
|
||||
# Trigger post-processing via Hatchet (fire-and-forget)
|
||||
await live_pipeline_post(transcript_id=transcript_id)
|
||||
print("Live post-processing pipeline triggered via Hatchet", file=sys.stderr)
|
||||
|
||||
|
||||
async def process_file_pipeline(
|
||||
transcript_id: TranscriptId,
|
||||
):
|
||||
"""Process audio/video file using the optimized file pipeline"""
|
||||
"""Process audio/video file using the optimized file pipeline via Hatchet"""
|
||||
|
||||
# task_pipeline_file_process is a Celery task, need to use .delay() for async execution
|
||||
result = task_pipeline_file_process.delay(transcript_id=transcript_id)
|
||||
|
||||
# Wait for the Celery task to complete
|
||||
while not result.ready():
|
||||
print(f"File pipeline status: {result.state}", file=sys.stderr)
|
||||
time.sleep(2)
|
||||
|
||||
logger.info("File pipeline processing complete")
|
||||
await HatchetClientManager.start_workflow(
|
||||
"FilePipeline",
|
||||
{"transcript_id": str(transcript_id)},
|
||||
additional_metadata={"transcript_id": str(transcript_id)},
|
||||
)
|
||||
print("File pipeline triggered via Hatchet", file=sys.stderr)
|
||||
|
||||
|
||||
async def process(
|
||||
@@ -293,7 +283,16 @@ async def process(
|
||||
|
||||
await handler(transcript_id)
|
||||
|
||||
await extract_result_from_entry(transcript_id, output_path)
|
||||
if pipeline == "file":
|
||||
# File pipeline is async via Hatchet — results not available immediately.
|
||||
# Use reflector.tools.process_transcript with --sync for polling.
|
||||
print(
|
||||
f"File pipeline dispatched for transcript {transcript_id}. "
|
||||
f"Results will be available once the Hatchet workflow completes.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
else:
|
||||
await extract_result_from_entry(transcript_id, output_path)
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
@@ -11,10 +11,8 @@ Usage:
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
from typing import Callable
|
||||
|
||||
from celery.result import AsyncResult
|
||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||
|
||||
import reflector._warnings_filter # noqa: F401 -- side effect: suppress pydantic validate_default warning
|
||||
@@ -39,7 +37,7 @@ async def process_transcript_inner(
|
||||
on_validation: Callable[[ValidationResult], None],
|
||||
on_preprocess: Callable[[PrepareResult], None],
|
||||
force: bool = False,
|
||||
) -> AsyncResult | None:
|
||||
) -> None:
|
||||
validation = await validate_transcript_for_processing(transcript)
|
||||
on_validation(validation)
|
||||
config = await prepare_transcript_processing(validation)
|
||||
@@ -87,56 +85,39 @@ async def process_transcript(
|
||||
elif isinstance(config, FileProcessingConfig):
|
||||
print(f"Dispatching file pipeline", file=sys.stderr)
|
||||
|
||||
result = await process_transcript_inner(
|
||||
await process_transcript_inner(
|
||||
transcript,
|
||||
on_validation=on_validation,
|
||||
on_preprocess=on_preprocess,
|
||||
force=force,
|
||||
)
|
||||
|
||||
if result is None:
|
||||
# Hatchet workflow dispatched
|
||||
if sync:
|
||||
# Re-fetch transcript to get workflow_run_id
|
||||
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||
if not transcript or not transcript.workflow_run_id:
|
||||
print("Error: workflow_run_id not found", file=sys.stderr)
|
||||
if sync:
|
||||
# Re-fetch transcript to get workflow_run_id
|
||||
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||
if not transcript or not transcript.workflow_run_id:
|
||||
print("Error: workflow_run_id not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("Waiting for Hatchet workflow...", file=sys.stderr)
|
||||
while True:
|
||||
status = await HatchetClientManager.get_workflow_run_status(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
print(f" Status: {status.value}", file=sys.stderr)
|
||||
|
||||
if status == V1TaskStatus.COMPLETED:
|
||||
print("Workflow completed successfully", file=sys.stderr)
|
||||
break
|
||||
elif status in (V1TaskStatus.FAILED, V1TaskStatus.CANCELLED):
|
||||
print(f"Workflow failed: {status}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("Waiting for Hatchet workflow...", file=sys.stderr)
|
||||
while True:
|
||||
status = await HatchetClientManager.get_workflow_run_status(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
print(f" Status: {status.value}", file=sys.stderr)
|
||||
|
||||
if status == V1TaskStatus.COMPLETED:
|
||||
print("Workflow completed successfully", file=sys.stderr)
|
||||
break
|
||||
elif status in (V1TaskStatus.FAILED, V1TaskStatus.CANCELLED):
|
||||
print(f"Workflow failed: {status}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
print(
|
||||
"Task dispatched (use --sync to wait for completion)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
elif sync:
|
||||
print("Waiting for task completion...", file=sys.stderr)
|
||||
while not result.ready():
|
||||
print(f" Status: {result.state}", file=sys.stderr)
|
||||
time.sleep(5)
|
||||
|
||||
if result.successful():
|
||||
print("Task completed successfully", file=sys.stderr)
|
||||
else:
|
||||
print(f"Task failed: {result.result}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
print(
|
||||
"Task dispatched (use --sync to wait for completion)", file=sys.stderr
|
||||
"Task dispatched (use --sync to wait for completion)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
finally:
|
||||
|
||||
@@ -52,8 +52,5 @@ async def transcript_process(
|
||||
if isinstance(config, ProcessError):
|
||||
raise HTTPException(status_code=500, detail=config.detail)
|
||||
else:
|
||||
# When transcript is in error state, force a new workflow instead of replaying
|
||||
# (replay would re-run from failure point with same conditions and likely fail again)
|
||||
force = transcript.status == "error"
|
||||
await dispatch_transcript_processing(config, force=force)
|
||||
await dispatch_transcript_processing(config, force=True)
|
||||
return ProcessStatus(status="ok")
|
||||
|
||||
@@ -6,7 +6,7 @@ from pydantic import BaseModel
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -95,7 +95,14 @@ async def transcript_record_upload(
|
||||
transcript, {"status": "uploaded", "source_kind": SourceKind.FILE}
|
||||
)
|
||||
|
||||
# launch a background task to process the file
|
||||
task_pipeline_file_process.delay(transcript_id=transcript_id)
|
||||
# launch Hatchet workflow to process the file
|
||||
workflow_id = await HatchetClientManager.start_workflow(
|
||||
"FilePipeline",
|
||||
{"transcript_id": str(transcript_id)},
|
||||
additional_metadata={"transcript_id": str(transcript_id)},
|
||||
)
|
||||
|
||||
# Save workflow_run_id for duplicate detection and status polling
|
||||
await transcripts_controller.update(transcript, {"workflow_run_id": workflow_id})
|
||||
|
||||
return UploadStatus(status="ok")
|
||||
|
||||
@@ -25,7 +25,6 @@ from reflector.db.transcripts import (
|
||||
transcripts_controller,
|
||||
)
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||
from reflector.pipelines.main_live_pipeline import asynctask
|
||||
from reflector.pipelines.topic_processing import EmptyPipeline
|
||||
from reflector.processors import AudioFileWriterProcessor
|
||||
@@ -163,7 +162,14 @@ async def process_recording(bucket_name: str, object_key: str):
|
||||
|
||||
await transcripts_controller.update(transcript, {"status": "uploaded"})
|
||||
|
||||
task_pipeline_file_process.delay(transcript_id=transcript.id)
|
||||
await HatchetClientManager.start_workflow(
|
||||
"FilePipeline",
|
||||
{
|
||||
"transcript_id": str(transcript.id),
|
||||
"room_id": str(room.id) if room else None,
|
||||
},
|
||||
additional_metadata={"transcript_id": str(transcript.id)},
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
|
||||
Reference in New Issue
Block a user