mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
* feat: use llamaindex for transcript final title too * refactor: removed llm backend, replaced with one single class+llamaindex * refactor: self-review * fix: typing * fix: tests * refactor: extract clean_title and add tests * test: fix * test: remove ensure_casing/nltk * fix: tiny mistake
85 lines
2.9 KiB
Python
85 lines
2.9 KiB
Python
from reflector.llm import LLM
|
|
from reflector.processors.base import Processor
|
|
from reflector.processors.summary.summary_builder import SummaryBuilder
|
|
from reflector.processors.types import FinalLongSummary, FinalShortSummary, TitleSummary
|
|
from reflector.settings import settings
|
|
|
|
|
|
class TranscriptFinalSummaryProcessor(Processor):
|
|
"""
|
|
Get the final (long and short) summary
|
|
"""
|
|
|
|
INPUT_TYPE = TitleSummary
|
|
OUTPUT_TYPE = FinalLongSummary
|
|
|
|
def __init__(self, transcript=None, **kwargs):
|
|
super().__init__(**kwargs)
|
|
self.transcript = transcript
|
|
self.chunks: list[TitleSummary] = []
|
|
self.llm = LLM(settings=settings)
|
|
self.builder = None
|
|
|
|
async def _push(self, data: TitleSummary):
|
|
self.chunks.append(data)
|
|
|
|
async def get_summary_builder(self, text) -> SummaryBuilder:
|
|
builder = SummaryBuilder(self.llm, logger=self.logger)
|
|
builder.set_transcript(text)
|
|
await builder.identify_participants()
|
|
await builder.generate_summary()
|
|
return builder
|
|
|
|
async def get_long_summary(self, text) -> str:
|
|
if not self.builder:
|
|
self.builder = await self.get_summary_builder(text)
|
|
return self.builder.as_markdown()
|
|
|
|
async def get_short_summary(self, text) -> str | None:
|
|
if not self.builder:
|
|
self.builder = await self.get_summary_builder(text)
|
|
return self.builder.recap
|
|
|
|
async def _flush(self):
|
|
if not self.chunks:
|
|
self.logger.warning("No summary to output")
|
|
return
|
|
|
|
# build the speakermap from the transcript
|
|
speakermap = {}
|
|
if self.transcript:
|
|
speakermap = {
|
|
participant["speaker"]: participant["name"]
|
|
for participant in self.transcript.participants
|
|
}
|
|
|
|
# build the transcript as a single string
|
|
# XXX: unsure if the participants name as replaced directly in speaker ?
|
|
text_transcript = []
|
|
for topic in self.chunks:
|
|
for segment in topic.transcript.as_segments():
|
|
name = speakermap.get(segment.speaker, f"Speaker {segment.speaker}")
|
|
text_transcript.append(f"{name}: {segment.text}")
|
|
|
|
text_transcript = "\n".join(text_transcript)
|
|
|
|
last_chunk = self.chunks[-1]
|
|
duration = last_chunk.timestamp + last_chunk.duration
|
|
|
|
long_summary = await self.get_long_summary(text_transcript)
|
|
short_summary = await self.get_short_summary(text_transcript)
|
|
|
|
final_long_summary = FinalLongSummary(
|
|
long_summary=long_summary,
|
|
duration=duration,
|
|
)
|
|
|
|
if short_summary:
|
|
final_short_summary = FinalShortSummary(
|
|
short_summary=short_summary,
|
|
duration=duration,
|
|
)
|
|
await self.emit(final_short_summary, name="short_summary")
|
|
|
|
await self.emit(final_long_summary)
|