mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-24 14:19:05 +00:00
feat: daily.co support as alternative to whereby (#691)
* llm instructions * vibe dailyco * vibe dailyco * doc update (vibe) * dont show recording ui on call * stub processor (vibe) * stub processor (vibe) self-review * stub processor (vibe) self-review * chore(main): release 0.14.0 (#670) * Add multitrack pipeline * Mixdown audio tracks * Mixdown with pyav filter graph * Trigger multitrack processing for daily recordings * apply platform from envs in priority: non-dry * Use explicit track keys for processing * Align tracks of a multitrack recording * Generate waveforms for the mixed audio * Emit multriack pipeline events * Fix multitrack pipeline track alignment * dailico docs * Enable multitrack reprocessing * modal temp files uniform names, cleanup. remove llm temporary docs * docs cleanup * dont proceed with raw recordings if any of the downloads fail * dry transcription pipelines * remove is_miltitrack * comments * explicit dailyco room name * docs * remove stub data/method * frontend daily/whereby code self-review (no-mistake) * frontend daily/whereby code self-review (no-mistakes) * frontend daily/whereby code self-review (no-mistakes) * consent cleanup for multitrack (no-mistakes) * llm fun * remove extra comments * fix tests * merge migrations * Store participant names * Get participants by meeting session id * pop back main branch migration * s3 paddington (no-mistakes) * comment * pr comments * pr comments * pr comments * platform / meeting cleanup * Use participant names in summary generation * platform assignment to meeting at controller level * pr comment * room playform properly default none * room playform properly default none * restore migration lost * streaming WIP * extract storage / use common storage / proper env vars for storage * fix mocks tests * remove fall back * streaming for multifile * cenrtal storage abstraction (no-mistakes) * remove dead code / vars * Set participant user id for authenticated users * whereby recording name parsing fix * whereby recording name parsing fix * more file stream * storage dry + tests * remove homemade boto3 streaming and use proper boto * update migration guide * webhook creation script - print uuid --------- Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com> Co-authored-by: Mathieu Virbel <mat@meltingrocks.com> Co-authored-by: Sergey Mankovsky <sergey@monadical.com>
This commit is contained in:
109
server/reflector/pipelines/topic_processing.py
Normal file
109
server/reflector/pipelines/topic_processing.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Topic processing utilities
|
||||
==========================
|
||||
|
||||
Shared topic detection, title generation, and summarization logic
|
||||
used across file and multitrack pipelines.
|
||||
"""
|
||||
|
||||
from typing import Callable
|
||||
|
||||
import structlog
|
||||
|
||||
from reflector.db.transcripts import Transcript
|
||||
from reflector.processors import (
|
||||
TranscriptFinalSummaryProcessor,
|
||||
TranscriptFinalTitleProcessor,
|
||||
TranscriptTopicDetectorProcessor,
|
||||
)
|
||||
from reflector.processors.types import TitleSummary
|
||||
from reflector.processors.types import Transcript as TranscriptType
|
||||
|
||||
|
||||
class EmptyPipeline:
|
||||
def __init__(self, logger: structlog.BoundLogger):
|
||||
self.logger = logger
|
||||
|
||||
def get_pref(self, k, d=None):
|
||||
return d
|
||||
|
||||
async def emit(self, event):
|
||||
pass
|
||||
|
||||
|
||||
async def detect_topics(
|
||||
transcript: TranscriptType,
|
||||
target_language: str,
|
||||
*,
|
||||
on_topic_callback: Callable,
|
||||
empty_pipeline: EmptyPipeline,
|
||||
) -> list[TitleSummary]:
|
||||
chunk_size = 300
|
||||
topics: list[TitleSummary] = []
|
||||
|
||||
async def on_topic(topic: TitleSummary):
|
||||
topics.append(topic)
|
||||
return await on_topic_callback(topic)
|
||||
|
||||
topic_detector = TranscriptTopicDetectorProcessor(callback=on_topic)
|
||||
topic_detector.set_pipeline(empty_pipeline)
|
||||
|
||||
for i in range(0, len(transcript.words), chunk_size):
|
||||
chunk_words = transcript.words[i : i + chunk_size]
|
||||
if not chunk_words:
|
||||
continue
|
||||
|
||||
chunk_transcript = TranscriptType(
|
||||
words=chunk_words, translation=transcript.translation
|
||||
)
|
||||
|
||||
await topic_detector.push(chunk_transcript)
|
||||
|
||||
await topic_detector.flush()
|
||||
return topics
|
||||
|
||||
|
||||
async def generate_title(
|
||||
topics: list[TitleSummary],
|
||||
*,
|
||||
on_title_callback: Callable,
|
||||
empty_pipeline: EmptyPipeline,
|
||||
logger: structlog.BoundLogger,
|
||||
):
|
||||
if not topics:
|
||||
logger.warning("No topics for title generation")
|
||||
return
|
||||
|
||||
processor = TranscriptFinalTitleProcessor(callback=on_title_callback)
|
||||
processor.set_pipeline(empty_pipeline)
|
||||
|
||||
for topic in topics:
|
||||
await processor.push(topic)
|
||||
|
||||
await processor.flush()
|
||||
|
||||
|
||||
async def generate_summaries(
|
||||
topics: list[TitleSummary],
|
||||
transcript: Transcript,
|
||||
*,
|
||||
on_long_summary_callback: Callable,
|
||||
on_short_summary_callback: Callable,
|
||||
empty_pipeline: EmptyPipeline,
|
||||
logger: structlog.BoundLogger,
|
||||
):
|
||||
if not topics:
|
||||
logger.warning("No topics for summary generation")
|
||||
return
|
||||
|
||||
processor = TranscriptFinalSummaryProcessor(
|
||||
transcript=transcript,
|
||||
callback=on_long_summary_callback,
|
||||
on_short_summary=on_short_summary_callback,
|
||||
)
|
||||
processor.set_pipeline(empty_pipeline)
|
||||
|
||||
for topic in topics:
|
||||
await processor.push(topic)
|
||||
|
||||
await processor.flush()
|
||||
Reference in New Issue
Block a user