Feature additions (#210)

* initial * add LLM features * update LLM logic * update llm functions: change control flow * add generation config * update return types * update processors and tests * update rtc_offer * revert new title processor change * fix unit tests * add comments and fix HTTP 500 * adjust prompt * test with reflector app * revert new event for final title * update * move onus onto processors * move onus onto processors * stash * add provision for gen config * dynamically pack the LLM input using context length * tune final summary params * update consolidated class structures * update consolidated class structures * update precommit * add broadcast processors * working baseline * Organize LLMParams * minor fixes * minor fixes * minor fixes * fix unit tests * fix unit tests * fix unit tests * update tests * update tests * edit pipeline response events * update summary return types * configure tests * alembic db migration * change LLM response flow * edit main llm functions * edit main llm functions * change llm name and gen cf * Update transcript_topic_detector.py * PR review comments * checkpoint before db event migration * update DB migration of past events * update DB migration of past events * edit LLM classes * Delete unwanted file * remove List typing * remove List typing * update oobabooga API call * topic enhancements * update UI event handling * move ensure_casing to llm base * update tests * update tests
2025-12-22 05:09:05 +00:00 · 2023-09-13 11:26:08 +05:30
parent 762d7bfc3c
commit 9fe261406c
33 changed files with 1334 additions and 202 deletions
--- a/server/reflector/processors/init.py
+++ b/server/reflector/processors/init.py
@@ -4,7 +4,20 @@ from .audio_merge import AudioMergeProcessor  # noqa: F401
 from .audio_transcript import AudioTranscriptProcessor  # noqa: F401
 from .audio_transcript_auto import AudioTranscriptAutoProcessor  # noqa: F401
 from .base import Pipeline, PipelineEvent, Processor, ThreadedProcessor  # noqa: F401
-from .transcript_final_summary import TranscriptFinalSummaryProcessor  # noqa: F401
+from .transcript_final_long_summary import (  # noqa: F401
+    TranscriptFinalLongSummaryProcessor,
+)
+from .transcript_final_short_summary import (  # noqa: F401
+    TranscriptFinalShortSummaryProcessor,
+)
+from .transcript_final_title import TranscriptFinalTitleProcessor  # noqa: F401
 from .transcript_liner import TranscriptLinerProcessor  # noqa: F401
 from .transcript_topic_detector import TranscriptTopicDetectorProcessor  # noqa: F401
-from .types import AudioFile, FinalSummary, TitleSummary, Transcript, Word  # noqa: F401
+from .types import (  # noqa: F401
+    AudioFile,
+    FinalLongSummary,
+    FinalShortSummary,
+    TitleSummary,
+    Transcript,
+    Word,
+)
--- a/server/reflector/processors/base.py
+++ b/server/reflector/processors/base.py
@@ -5,6 +5,7 @@ from uuid import uuid4

 from prometheus_client import Counter, Gauge, Histogram
 from pydantic import BaseModel
+
 from reflector.logger import logger


@@ -296,7 +297,7 @@ class BroadcastProcessor(Processor):
    types of input.
    """

-    def __init__(self, processors: Processor):
+    def __init__(self, processors: list[Processor]):
        super().__init__()
        self.processors = processors
        self.INPUT_TYPE = processors[0].INPUT_TYPE
--- a/server/reflector/processors/transcript_final_long_summary.py
+++ b/server/reflector/processors/transcript_final_long_summary.py
@@ -0,0 +1,59 @@
+from reflector.llm import LLM, LLMTaskParams
+from reflector.processors.base import Processor
+from reflector.processors.types import FinalLongSummary, TitleSummary
+
+
+class TranscriptFinalLongSummaryProcessor(Processor):
+    """
+    Get the final long summary
+    """
+
+    INPUT_TYPE = TitleSummary
+    OUTPUT_TYPE = FinalLongSummary
+    TASK = "final_long_summary"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.chunks: list[TitleSummary] = []
+        self.llm = LLM.get_instance()
+        self.params = LLMTaskParams.get_instance(self.TASK).task_params
+
+    async def _push(self, data: TitleSummary):
+        self.chunks.append(data)
+
+    async def get_long_summary(self, text: str) -> str:
+        """
+        Generate a long version of the final summary
+        """
+        self.logger.info(f"Smoothing out {len(text)} length summary to a long summary")
+        chunks = list(self.llm.split_corpus(corpus=text, task_params=self.params))
+
+        accumulated_summaries = ""
+        for chunk in chunks:
+            prompt = self.llm.create_prompt(instruct=self.params.instruct, text=chunk)
+            summary_result = await self.llm.generate(
+                prompt=prompt,
+                gen_schema=self.params.gen_schema,
+                gen_cfg=self.params.gen_cfg,
+                logger=self.logger,
+            )
+            accumulated_summaries += summary_result["long_summary"]
+
+        return accumulated_summaries
+
+    async def _flush(self):
+        if not self.chunks:
+            self.logger.warning("No summary to output")
+            return
+
+        accumulated_summaries = " ".join([chunk.summary for chunk in self.chunks])
+        long_summary = await self.get_long_summary(accumulated_summaries)
+
+        last_chunk = self.chunks[-1]
+        duration = last_chunk.timestamp + last_chunk.duration
+
+        final_long_summary = FinalLongSummary(
+            long_summary=long_summary,
+            duration=duration,
+        )
+        await self.emit(final_long_summary)
--- a/server/reflector/processors/transcript_final_short_summary.py
+++ b/server/reflector/processors/transcript_final_short_summary.py
@@ -0,0 +1,72 @@
+from reflector.llm import LLM, LLMTaskParams
+from reflector.processors.base import Processor
+from reflector.processors.types import FinalShortSummary, TitleSummary
+
+
+class TranscriptFinalShortSummaryProcessor(Processor):
+    """
+    Get the final summary using a tree summarizer
+    """
+
+    INPUT_TYPE = TitleSummary
+    OUTPUT_TYPE = FinalShortSummary
+    TASK = "final_short_summary"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.chunks: list[TitleSummary] = []
+        self.llm = LLM.get_instance()
+        self.params = LLMTaskParams.get_instance(self.TASK).task_params
+
+    async def _push(self, data: TitleSummary):
+        self.chunks.append(data)
+
+    async def get_short_summary(self, text: str) -> dict:
+        """
+        Generata a short summary using tree summarizer
+        """
+        self.logger.info(f"Smoothing out {len(text)} length summary to a short summary")
+        chunks = list(self.llm.split_corpus(corpus=text, task_params=self.params))
+
+        if len(chunks) == 1:
+            chunk = chunks[0]
+            prompt = self.llm.create_prompt(instruct=self.params.instruct, text=chunk)
+            summary_result = await self.llm.generate(
+                prompt=prompt,
+                gen_schema=self.params.gen_schema,
+                gen_cfg=self.params.gen_cfg,
+                logger=self.logger,
+            )
+            return summary_result
+        else:
+            accumulated_summaries = ""
+            for chunk in chunks:
+                prompt = self.llm.create_prompt(
+                    instruct=self.params.instruct, text=chunk
+                )
+                summary_result = await self.llm.generate(
+                    prompt=prompt,
+                    gen_schema=self.params.gen_schema,
+                    gen_cfg=self.params.gen_cfg,
+                    logger=self.logger,
+                )
+                accumulated_summaries += summary_result["short_summary"]
+
+            return await self.get_short_summary(accumulated_summaries)
+
+    async def _flush(self):
+        if not self.chunks:
+            self.logger.warning("No summary to output")
+            return
+
+        accumulated_summaries = " ".join([chunk.summary for chunk in self.chunks])
+        short_summary_result = await self.get_short_summary(accumulated_summaries)
+
+        last_chunk = self.chunks[-1]
+        duration = last_chunk.timestamp + last_chunk.duration
+
+        final_summary = FinalShortSummary(
+            short_summary=short_summary_result["short_summary"],
+            duration=duration,
+        )
+        await self.emit(final_summary)
--- a/server/reflector/processors/transcript_final_summary.py
+++ b/server/reflector/processors/transcript_final_summary.py
@@ -1,30 +0,0 @@
-from reflector.processors.base import Processor
-from reflector.processors.types import TitleSummary, FinalSummary
-
-
-class TranscriptFinalSummaryProcessor(Processor):
-    """
-    Assemble all summary into a line-based json
-    """
-
-    INPUT_TYPE = TitleSummary
-    OUTPUT_TYPE = FinalSummary
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.chunks: list[TitleSummary] = []
-
-    async def _push(self, data: TitleSummary):
-        self.chunks.append(data)
-
-    async def _flush(self):
-        if not self.chunks:
-            self.logger.warning("No summary to output")
-            return
-
-        # FIXME improve final summary
-        result = "\n".join([chunk.summary for chunk in self.chunks])
-        last_chunk = self.chunks[-1]
-        duration = last_chunk.timestamp + last_chunk.duration
-
-        await self.emit(FinalSummary(summary=result, duration=duration))
--- a/server/reflector/processors/transcript_final_title.py
+++ b/server/reflector/processors/transcript_final_title.py
@@ -0,0 +1,65 @@
+from reflector.llm import LLM, LLMTaskParams
+from reflector.processors.base import Processor
+from reflector.processors.types import FinalTitle, TitleSummary
+
+
+class TranscriptFinalTitleProcessor(Processor):
+    """
+    Assemble all summary into a line-based json
+    """
+
+    INPUT_TYPE = TitleSummary
+    OUTPUT_TYPE = FinalTitle
+    TASK = "final_title"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.chunks: list[TitleSummary] = []
+        self.llm = LLM.get_instance()
+        self.params = LLMTaskParams.get_instance(self.TASK).task_params
+
+    async def _push(self, data: TitleSummary):
+        self.chunks.append(data)
+
+    async def get_title(self, text: str) -> dict:
+        """
+        Generate a title for the whole recording
+        """
+        chunks = list(self.llm.split_corpus(corpus=text, task_params=self.params))
+
+        if len(chunks) == 1:
+            chunk = chunks[0]
+            prompt = self.llm.create_prompt(instruct=self.params.instruct, text=chunk)
+            title_result = await self.llm.generate(
+                prompt=prompt,
+                gen_schema=self.params.gen_schema,
+                gen_cfg=self.params.gen_cfg,
+                logger=self.logger,
+            )
+            return title_result
+        else:
+            accumulated_titles = ""
+            for chunk in chunks:
+                prompt = self.llm.create_prompt(
+                    instruct=self.params.instruct, text=chunk
+                )
+                title_result = await self.llm.generate(
+                    prompt=prompt,
+                    gen_schema=self.params.gen_schema,
+                    gen_cfg=self.params.gen_cfg,
+                    logger=self.logger,
+                )
+                accumulated_titles += title_result["summary"]
+
+            return await self.get_title(accumulated_titles)
+
+    async def _flush(self):
+        if not self.chunks:
+            self.logger.warning("No summary to output")
+            return
+
+        accumulated_titles = ".".join([chunk.title for chunk in self.chunks])
+        title_result = await self.get_title(accumulated_titles)
+
+        final_title = FinalTitle(title=title_result["title"])
+        await self.emit(final_title)
--- a/server/reflector/processors/transcript_topic_detector.py
+++ b/server/reflector/processors/transcript_topic_detector.py
@@ -1,7 +1,6 @@
-from reflector.llm import LLM
+from reflector.llm import LLM, LLMTaskParams
 from reflector.processors.base import Processor
 from reflector.processors.types import TitleSummary, Transcript
-from reflector.utils.retry import retry


 class TranscriptTopicDetectorProcessor(Processor):
@@ -11,34 +10,14 @@ class TranscriptTopicDetectorProcessor(Processor):

    INPUT_TYPE = Transcript
    OUTPUT_TYPE = TitleSummary
+    TASK = "topic"

-    PROMPT = """
-        ### Human:
-        Create a JSON object as response.The JSON object must have 2 fields:
-        i) title and ii) summary.
-
-        For the title field, generate a short title for the given text.
-        For the summary field, summarize the given text in a maximum of
-        three sentences.
-
-        {input_text}
-
-        ### Assistant:
-
-    """
-
-    def __init__(self, min_transcript_length=750, **kwargs):
+    def __init__(self, min_transcript_length: int = 750, **kwargs):
        super().__init__(**kwargs)
        self.transcript = None
        self.min_transcript_length = min_transcript_length
        self.llm = LLM.get_instance()
-        self.topic_detector_schema = {
-            "type": "object",
-            "properties": {
-                "title": {"type": "string"},
-                "summary": {"type": "string"},
-            },
-        }
+        self.params = LLMTaskParams.get_instance(self.TASK).task_params

    async def _warmup(self):
        await self.llm.warmup(logger=self.logger)
@@ -55,18 +34,30 @@ class TranscriptTopicDetectorProcessor(Processor):
            return
        await self.flush()

+    async def get_topic(self, text: str) -> dict:
+        """
+        Generate a topic and description for a transcription excerpt
+        """
+        prompt = self.llm.create_prompt(instruct=self.params.instruct, text=text)
+        topic_result = await self.llm.generate(
+            prompt=prompt,
+            gen_schema=self.params.gen_schema,
+            gen_cfg=self.params.gen_cfg,
+            logger=self.logger,
+        )
+        return topic_result
+
    async def _flush(self):
        if not self.transcript:
            return
+
        text = self.transcript.text
        self.logger.info(f"Topic detector got {len(text)} length transcript")
-        prompt = self.PROMPT.format(input_text=text)
-        result = await retry(self.llm.generate)(
-            prompt=prompt, schema=self.topic_detector_schema, logger=self.logger
-        )
+        topic_result = await self.get_topic(text=text)
+
        summary = TitleSummary(
-            title=result["title"],
-            summary=result["summary"],
+            title=self.llm.ensure_casing(topic_result["title"]),
+            summary=topic_result["summary"],
            timestamp=self.transcript.timestamp,
            duration=self.transcript.duration,
            transcript=self.transcript,
--- a/server/reflector/processors/types.py
+++ b/server/reflector/processors/types.py
@@ -103,11 +103,20 @@ class TitleSummary(BaseModel):
        return f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"


-class FinalSummary(BaseModel):
-    summary: str
+class FinalLongSummary(BaseModel):
+    long_summary: str
    duration: float


+class FinalShortSummary(BaseModel):
+    short_summary: str
+    duration: float
+
+
+class FinalTitle(BaseModel):
+    title: str
+
+
 class TranslationLanguages(BaseModel):
    language_to_id_mapping: dict = {
        "Afrikaans": "af",