feat: use llamaindex everywhere (#525)

* feat: use llamaindex for transcript final title too * refactor: removed llm backend, replaced with one single class+llamaindex * refactor: self-review * fix: typing * fix: tests * refactor: extract clean_title and add tests * test: fix * test: remove ensure_casing/nltk * fix: tiny mistake
2025-12-22 05:09:05 +00:00 · 2025-08-01 12:13:00 -06:00
parent 1878834ce6
commit 28ac031ff6
25 changed files with 284 additions and 1539 deletions
--- a/server/reflector/processors/summary/summary_builder.py
+++ b/server/reflector/processors/summary/summary_builder.py
@@ -12,15 +12,9 @@ from textwrap import dedent
 from typing import Type, TypeVar

 import structlog
-from llama_index.core import Settings
-from llama_index.core.output_parsers import PydanticOutputParser
-from llama_index.core.program import LLMTextCompletionProgram
-from llama_index.core.response_synthesizers import TreeSummarize
-from llama_index.llms.openai_like import OpenAILike
 from pydantic import BaseModel, Field

-from reflector.llm.base import LLM
-from reflector.llm.openai_llm import OpenAILLM
+from reflector.llm import LLM
 from reflector.settings import settings

 T = TypeVar("T", bound=BaseModel)
@@ -168,23 +162,12 @@ class SummaryBuilder:
        self.summaries: list[dict[str, str]] = []
        self.subjects: list[str] = []
        self.transcription_type: TranscriptionType | None = None
-        self.llm_instance: LLM = llm
+        self.llm: LLM = llm
        self.model_name: str = llm.model_name
        self.logger = logger or structlog.get_logger()
        if filename:
            self.read_transcript_from_file(filename)

-        Settings.llm = OpenAILike(
-            model=llm.model_name,
-            api_base=llm.url,
-            api_key=llm.api_key,
-            context_window=settings.SUMMARY_LLM_CONTEXT_SIZE_TOKENS,
-            is_chat_model=True,
-            is_function_calling_model=llm.has_structured_output,
-            temperature=llm.temperature,
-            max_tokens=llm.max_tokens,
-        )
-
    def read_transcript_from_file(self, filename: str) -> None:
        """
        Load a transcript from a text file.
@@ -202,40 +185,16 @@ class SummaryBuilder:
        self.transcript = transcript

    def set_llm_instance(self, llm: LLM) -> None:
-        self.llm_instance = llm
+        self.llm = llm

    async def _get_structured_response(
        self, prompt: str, output_cls: Type[T], tone_name: str | None = None
-    ) -> Type[T]:
+    ) -> T:
        """Generic function to get structured output from LLM for non-function-calling models."""
-        # First, use TreeSummarize to get the response
-        summarizer = TreeSummarize(verbose=True)
-
-        response = await summarizer.aget_response(
-            prompt, [self.transcript], tone_name=tone_name
+        return await self.llm.get_structured_response(
+            prompt, [self.transcript], output_cls, tone_name=tone_name
        )

-        # Then, use PydanticOutputParser to structure the response
-        output_parser = PydanticOutputParser(output_cls)
-
-        prompt_template_str = STRUCTURED_RESPONSE_PROMPT_TEMPLATE
-
-        program = LLMTextCompletionProgram.from_defaults(
-            output_parser=output_parser,
-            prompt_template_str=prompt_template_str,
-            verbose=False,
-        )
-
-        format_instructions = output_parser.format(
-            "Please structure the above information in the following JSON format:"
-        )
-
-        output = await program.acall(
-            analysis=str(response), format_instructions=format_instructions
-        )
-
-        return output
-
    # ----------------------------------------------------------------------------
    # Participants
    # ----------------------------------------------------------------------------
@@ -354,19 +313,18 @@ class SummaryBuilder:
    async def generate_subject_summaries(self) -> None:
        """Generate detailed summaries for each extracted subject."""
        assert self.transcript is not None
-        summarizer = TreeSummarize(verbose=False)
        summaries = []

        for subject in self.subjects:
            detailed_prompt = DETAILED_SUBJECT_PROMPT_TEMPLATE.format(subject=subject)

-            detailed_response = await summarizer.aget_response(
+            detailed_response = await self.llm.get_response(
                detailed_prompt, [self.transcript], tone_name="Topic assistant"
            )

            paragraph_prompt = PARAGRAPH_SUMMARY_PROMPT

-            paragraph_response = await summarizer.aget_response(
+            paragraph_response = await self.llm.get_response(
                paragraph_prompt, [str(detailed_response)], tone_name="Topic summarizer"
            )

@@ -377,7 +335,6 @@ class SummaryBuilder:

    async def generate_recap(self) -> None:
        """Generate a quick recap from the subject summaries."""
-        summarizer = TreeSummarize(verbose=True)

        summaries_text = "\n\n".join(
            [
@@ -388,7 +345,7 @@ class SummaryBuilder:

        recap_prompt = RECAP_PROMPT

-        recap_response = await summarizer.aget_response(
+        recap_response = await self.llm.get_response(
            recap_prompt, [summaries_text], tone_name="Recap summarizer"
        )

@@ -483,7 +440,7 @@ if __name__ == "__main__":
    async def main():
        # build the summary

-        llm = OpenAILLM(config_prefix="SUMMARY", settings=settings)
+        llm = LLM(settings=settings)
        sm = SummaryBuilder(llm=llm, filename=args.transcript)

        if args.subjects:
--- a/server/reflector/processors/transcript_final_summary.py
+++ b/server/reflector/processors/transcript_final_summary.py
@@ -1,4 +1,4 @@
-from reflector.llm.openai_llm import OpenAILLM
+from reflector.llm import LLM
 from reflector.processors.base import Processor
 from reflector.processors.summary.summary_builder import SummaryBuilder
 from reflector.processors.types import FinalLongSummary, FinalShortSummary, TitleSummary
@@ -17,7 +17,7 @@ class TranscriptFinalSummaryProcessor(Processor):
        super().__init__(**kwargs)
        self.transcript = transcript
        self.chunks: list[TitleSummary] = []
-        self.llm = OpenAILLM(config_prefix="SUMMARY", settings=settings)
+        self.llm = LLM(settings=settings)
        self.builder = None

    async def _push(self, data: TitleSummary):
--- a/server/reflector/processors/transcript_final_title.py
+++ b/server/reflector/processors/transcript_final_title.py
@@ -1,67 +1,72 @@
-from reflector.llm import LLM, LLMTaskParams
+from textwrap import dedent
+
+from reflector.llm import LLM
 from reflector.processors.base import Processor
 from reflector.processors.types import FinalTitle, TitleSummary
+from reflector.settings import settings
+from reflector.utils.text import clean_title
+
+TITLE_PROMPT = dedent(
+    """
+    Generate a concise title for this meeting based on the following topic titles.
+    Ignore casual conversation, greetings, or administrative matters.
+
+    The title must:
+    - Be maximum 10 words
+    - Use noun phrases when possible (e.g., "Q1 Budget Review" not "Reviewing the Q1 Budget")
+    - Avoid generic terms like "Team Meeting" or "Discussion"
+
+    If multiple unrelated topics were discussed, prioritize the most significant one.
+    or create a compound title (e.g., "Product Launch and Budget Planning").
+
+    <topics_discussed>
+    {titles}
+    </topics_discussed>
+
+    Do not explain, just output the meeting title as a single line.
+    """
+).strip()


 class TranscriptFinalTitleProcessor(Processor):
    """
-    Assemble all summary into a line-based json
+    Generate a final title from topic titles using LlamaIndex
    """

    INPUT_TYPE = TitleSummary
    OUTPUT_TYPE = FinalTitle
-    TASK = "final_title"

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.chunks: list[TitleSummary] = []
-        self.llm = LLM.get_instance()
-        self.params = LLMTaskParams.get_instance(self.TASK).task_params
+        self.llm = LLM(settings=settings, temperature=0.5, max_tokens=200)

    async def _push(self, data: TitleSummary):
        self.chunks.append(data)

-    async def get_title(self, text: str) -> dict:
+    async def get_title(self, accumulated_titles: str) -> str:
        """
-        Generate a title for the whole recording
+        Generate a title for the whole recording using LLM
        """
-        chunks = list(self.llm.split_corpus(corpus=text, task_params=self.params))
+        prompt = TITLE_PROMPT.format(titles=accumulated_titles)
+        response = await self.llm.get_response(
+            prompt,
+            [accumulated_titles],
+            tone_name="Title generator",
+        )

-        if len(chunks) == 1:
-            chunk = chunks[0]
-            prompt = self.llm.create_prompt(instruct=self.params.instruct, text=chunk)
-            title_result = await self.llm.generate(
-                prompt=prompt,
-                gen_schema=self.params.gen_schema,
-                gen_cfg=self.params.gen_cfg,
-                logger=self.logger,
-            )
-            return title_result
-        else:
-            accumulated_titles = ""
-            for chunk in chunks:
-                prompt = self.llm.create_prompt(
-                    instruct=self.params.instruct, text=chunk
-                )
-                title_result = await self.llm.generate(
-                    prompt=prompt,
-                    gen_schema=self.params.gen_schema,
-                    gen_cfg=self.params.gen_cfg,
-                    logger=self.logger,
-                )
-                accumulated_titles += title_result["title"]
+        self.logger.info(f"Generated title response: {response}")

-            return await self.get_title(accumulated_titles)
+        return response

    async def _flush(self):
        if not self.chunks:
            self.logger.warning("No summary to output")
            return

-        accumulated_titles = ".".join([chunk.title for chunk in self.chunks])
-        title_result = await self.get_title(accumulated_titles)
-        final_title = self.llm.trim_title(title_result["title"])
-        final_title = self.llm.ensure_casing(final_title)
+        accumulated_titles = "\n".join([f"- {chunk.title}" for chunk in self.chunks])
+        title = await self.get_title(accumulated_titles)
+        title = clean_title(title)

-        final_title = FinalTitle(title=final_title)
+        final_title = FinalTitle(title=title)
        await self.emit(final_title)
--- a/server/reflector/processors/transcript_topic_detector.py
+++ b/server/reflector/processors/transcript_topic_detector.py
@@ -1,7 +1,41 @@
-from reflector.llm import LLM, LLMTaskParams
+from textwrap import dedent
+
+from pydantic import BaseModel, Field
+
+from reflector.llm import LLM
 from reflector.processors.base import Processor
 from reflector.processors.types import TitleSummary, Transcript
 from reflector.settings import settings
+from reflector.utils.text import clean_title
+
+TOPIC_PROMPT = dedent(
+    """
+    Analyze the following transcript segment and extract the main topic being discussed.
+    Focus on the substantive content and ignore small talk or administrative chatter.
+
+    Create a title that:
+    - Captures the specific subject matter being discussed
+    - Is descriptive and self-explanatory
+    - Uses professional language
+    - Is specific rather than generic
+
+    For the summary:
+    - Summarize the key points in maximum two sentences
+    - Focus on what was discussed, decided, or accomplished
+    - Be concise but informative
+
+    <transcript>
+    {text}
+    </transcript>
+    """
+).strip()
+
+
+class TopicResponse(BaseModel):
+    """Structured response for topic detection"""
+
+    title: str = Field(description="A descriptive title for the topic being discussed")
+    summary: str = Field(description="A concise 1-2 sentence summary of the discussion")


 class TranscriptTopicDetectorProcessor(Processor):
@@ -11,7 +45,6 @@ class TranscriptTopicDetectorProcessor(Processor):

    INPUT_TYPE = Transcript
    OUTPUT_TYPE = TitleSummary
-    TASK = "topic"

    def __init__(
        self, min_transcript_length: int = int(settings.MIN_TRANSCRIPT_LENGTH), **kwargs
@@ -19,8 +52,7 @@ class TranscriptTopicDetectorProcessor(Processor):
        super().__init__(**kwargs)
        self.transcript = None
        self.min_transcript_length = min_transcript_length
-        self.llm = LLM.get_instance()
-        self.params = LLMTaskParams.get_instance(self.TASK).task_params
+        self.llm = LLM(settings=settings, temperature=0.9, max_tokens=500)

    async def _push(self, data: Transcript):
        if self.transcript is None:
@@ -34,18 +66,15 @@ class TranscriptTopicDetectorProcessor(Processor):
            return
        await self.flush()

-    async def get_topic(self, text: str) -> dict:
+    async def get_topic(self, text: str) -> TopicResponse:
        """
-        Generate a topic and description for a transcription excerpt
+        Generate a topic and description for a transcription excerpt using LLM
        """
-        prompt = self.llm.create_prompt(instruct=self.params.instruct, text=text)
-        topic_result = await self.llm.generate(
-            prompt=prompt,
-            gen_schema=self.params.gen_schema,
-            gen_cfg=self.params.gen_cfg,
-            logger=self.logger,
+        prompt = TOPIC_PROMPT.format(text=text)
+        response = await self.llm.get_structured_response(
+            prompt, [text], TopicResponse, tone_name="Topic analyzer"
        )
-        return topic_result
+        return response

    async def _flush(self):
        if not self.transcript:
@@ -53,13 +82,13 @@ class TranscriptTopicDetectorProcessor(Processor):

        text = self.transcript.text
        self.logger.info(f"Topic detector got {len(text)} length transcript")
+
        topic_result = await self.get_topic(text=text)
-        title = self.llm.trim_title(topic_result["title"])
-        title = self.llm.ensure_casing(title)
+        title = clean_title(topic_result.title)

        summary = TitleSummary(
            title=title,
-            summary=topic_result["summary"],
+            summary=topic_result.summary,
            timestamp=self.transcript.timestamp,
            duration=self.transcript.duration,
            transcript=self.transcript,
--- a/server/reflector/processors/transcript_translator.py
+++ b/server/reflector/processors/transcript_translator.py
@@ -13,14 +13,13 @@ class TranscriptTranslatorProcessor(Processor):

    INPUT_TYPE = Transcript
    OUTPUT_TYPE = Transcript
-    TASK = "translate"

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.transcript = None
        self.translate_url = settings.TRANSLATE_URL
        self.timeout = settings.TRANSLATE_TIMEOUT
-        self.headers = {"Authorization": f"Bearer {settings.LLM_MODAL_API_KEY}"}
+        self.headers = {"Authorization": f"Bearer {settings.TRANSCRIPT_MODAL_API_KEY}"}

    async def _push(self, data: Transcript):
        self.transcript = data