mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
* llm instructions * vibe dailyco * vibe dailyco * doc update (vibe) * dont show recording ui on call * stub processor (vibe) * stub processor (vibe) self-review * stub processor (vibe) self-review * chore(main): release 0.14.0 (#670) * Add multitrack pipeline * Mixdown audio tracks * Mixdown with pyav filter graph * Trigger multitrack processing for daily recordings * apply platform from envs in priority: non-dry * Use explicit track keys for processing * Align tracks of a multitrack recording * Generate waveforms for the mixed audio * Emit multriack pipeline events * Fix multitrack pipeline track alignment * dailico docs * Enable multitrack reprocessing * modal temp files uniform names, cleanup. remove llm temporary docs * docs cleanup * dont proceed with raw recordings if any of the downloads fail * dry transcription pipelines * remove is_miltitrack * comments * explicit dailyco room name * docs * remove stub data/method * frontend daily/whereby code self-review (no-mistake) * frontend daily/whereby code self-review (no-mistakes) * frontend daily/whereby code self-review (no-mistakes) * consent cleanup for multitrack (no-mistakes) * llm fun * remove extra comments * fix tests * merge migrations * Store participant names * Get participants by meeting session id * pop back main branch migration * s3 paddington (no-mistakes) * comment * pr comments * pr comments * pr comments * platform / meeting cleanup * Use participant names in summary generation * platform assignment to meeting at controller level * pr comment * room playform properly default none * room playform properly default none * restore migration lost * streaming WIP * extract storage / use common storage / proper env vars for storage * fix mocks tests * remove fall back * streaming for multifile * cenrtal storage abstraction (no-mistakes) * remove dead code / vars * Set participant user id for authenticated users * whereby recording name parsing fix * whereby recording name parsing fix * more file stream * storage dry + tests * remove homemade boto3 streaming and use proper boto * update migration guide * webhook creation script - print uuid --------- Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com> Co-authored-by: Mathieu Virbel <mat@meltingrocks.com> Co-authored-by: Sergey Mankovsky <sergey@monadical.com>
110 lines
2.7 KiB
Python
110 lines
2.7 KiB
Python
"""
|
|
Topic processing utilities
|
|
==========================
|
|
|
|
Shared topic detection, title generation, and summarization logic
|
|
used across file and multitrack pipelines.
|
|
"""
|
|
|
|
from typing import Callable
|
|
|
|
import structlog
|
|
|
|
from reflector.db.transcripts import Transcript
|
|
from reflector.processors import (
|
|
TranscriptFinalSummaryProcessor,
|
|
TranscriptFinalTitleProcessor,
|
|
TranscriptTopicDetectorProcessor,
|
|
)
|
|
from reflector.processors.types import TitleSummary
|
|
from reflector.processors.types import Transcript as TranscriptType
|
|
|
|
|
|
class EmptyPipeline:
|
|
def __init__(self, logger: structlog.BoundLogger):
|
|
self.logger = logger
|
|
|
|
def get_pref(self, k, d=None):
|
|
return d
|
|
|
|
async def emit(self, event):
|
|
pass
|
|
|
|
|
|
async def detect_topics(
|
|
transcript: TranscriptType,
|
|
target_language: str,
|
|
*,
|
|
on_topic_callback: Callable,
|
|
empty_pipeline: EmptyPipeline,
|
|
) -> list[TitleSummary]:
|
|
chunk_size = 300
|
|
topics: list[TitleSummary] = []
|
|
|
|
async def on_topic(topic: TitleSummary):
|
|
topics.append(topic)
|
|
return await on_topic_callback(topic)
|
|
|
|
topic_detector = TranscriptTopicDetectorProcessor(callback=on_topic)
|
|
topic_detector.set_pipeline(empty_pipeline)
|
|
|
|
for i in range(0, len(transcript.words), chunk_size):
|
|
chunk_words = transcript.words[i : i + chunk_size]
|
|
if not chunk_words:
|
|
continue
|
|
|
|
chunk_transcript = TranscriptType(
|
|
words=chunk_words, translation=transcript.translation
|
|
)
|
|
|
|
await topic_detector.push(chunk_transcript)
|
|
|
|
await topic_detector.flush()
|
|
return topics
|
|
|
|
|
|
async def generate_title(
|
|
topics: list[TitleSummary],
|
|
*,
|
|
on_title_callback: Callable,
|
|
empty_pipeline: EmptyPipeline,
|
|
logger: structlog.BoundLogger,
|
|
):
|
|
if not topics:
|
|
logger.warning("No topics for title generation")
|
|
return
|
|
|
|
processor = TranscriptFinalTitleProcessor(callback=on_title_callback)
|
|
processor.set_pipeline(empty_pipeline)
|
|
|
|
for topic in topics:
|
|
await processor.push(topic)
|
|
|
|
await processor.flush()
|
|
|
|
|
|
async def generate_summaries(
|
|
topics: list[TitleSummary],
|
|
transcript: Transcript,
|
|
*,
|
|
on_long_summary_callback: Callable,
|
|
on_short_summary_callback: Callable,
|
|
empty_pipeline: EmptyPipeline,
|
|
logger: structlog.BoundLogger,
|
|
):
|
|
if not topics:
|
|
logger.warning("No topics for summary generation")
|
|
return
|
|
|
|
processor = TranscriptFinalSummaryProcessor(
|
|
transcript=transcript,
|
|
callback=on_long_summary_callback,
|
|
on_short_summary=on_short_summary_callback,
|
|
)
|
|
processor.set_pipeline(empty_pipeline)
|
|
|
|
for topic in topics:
|
|
await processor.push(topic)
|
|
|
|
await processor.flush()
|