feat: parallelize hatchet (#804)

* parallelize hatchet (no-mistakes) * dry (no-mistakes) (minimal) * comments * self-review * self-review * self-review * self-review * pr comments * pr comments --------- Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-02-05 02:16:46 +00:00 · 2025-12-23 11:03:36 -05:00
parent 7c2d0698ed
commit 594bcc09e0
15 changed files with 849 additions and 287 deletions
--- a/server/reflector/processors/prompts.py
+++ b/server/reflector/processors/prompts.py
@@ -0,0 +1,30 @@
+"""
+LLM prompts for transcript processing.
+
+Extracted to a separate module to avoid circular imports when importing
+from processor modules (which import LLM/settings at module level).
+"""
+
+from textwrap import dedent
+
+TOPIC_PROMPT = dedent(
+    """
+    Analyze the following transcript segment and extract the main topic being discussed.
+    Focus on the substantive content and ignore small talk or administrative chatter.
+
+    Create a title that:
+    - Captures the specific subject matter being discussed
+    - Is descriptive and self-explanatory
+    - Uses professional language
+    - Is specific rather than generic
+
+    For the summary:
+    - Summarize the key points in maximum two sentences
+    - Focus on what was discussed, decided, or accomplished
+    - Be concise but informative
+
+    <transcript>
+    {text}
+    </transcript>
+    """
+).strip()
--- a/server/reflector/processors/summary/prompts.py
+++ b/server/reflector/processors/summary/prompts.py
@@ -0,0 +1,91 @@
+"""
+LLM prompts for summary generation.
+
+Extracted to a separate module to avoid circular imports when importing
+from summary_builder.py (which imports LLM/settings at module level).
+"""
+
+from textwrap import dedent
+
+
+def build_participant_instructions(participant_names: list[str]) -> str:
+    """Build participant context instructions for LLM prompts."""
+    if not participant_names:
+        return ""
+
+    participants_list = ", ".join(participant_names)
+    return dedent(
+        f"""
+        # IMPORTANT: Participant Names
+        The following participants are identified in this conversation: {participants_list}
+
+        You MUST use these specific participant names when referring to people in your response.
+        Do NOT use generic terms like "a participant", "someone", "attendee", "Speaker 1", "Speaker 2", etc.
+        Always refer to people by their actual names (e.g., "John suggested..." not "A participant suggested...").
+        """
+    ).strip()
+
+
+DETAILED_SUBJECT_PROMPT_TEMPLATE = dedent(
+    """
+    Get me information about the topic "{subject}"
+
+    # RESPONSE GUIDELINES
+    Follow this structured approach to create the topic summary:
+    - Highlight important arguments, insights, or data presented.
+    - Outline decisions made.
+    - Indicate any decisions reached, including any rationale or key factors
+      that influenced these decisions.
+    - Detail action items and responsibilities.
+    - For each decision or unresolved issue, list specific action items agreed
+      upon, along with assigned individuals or teams responsible for each task.
+    - Specify deadlines or timelines if mentioned. For each action item,
+      include any deadlines or timeframes discussed for completion or follow-up.
+    - Mention unresolved issues or topics needing further discussion, aiding in
+      planning future meetings or follow-up actions.
+    - Do not include topic unrelated to {subject}.
+
+    # OUTPUT
+    Your summary should be clear, concise, and structured, covering all major
+    points, decisions, and action items from the meeting. It should be easy to
+    understand for someone not present, providing a comprehensive understanding
+    of what transpired and what needs to be done next. The summary should not
+    exceed one page to ensure brevity and focus.
+    """
+).strip()
+
+PARAGRAPH_SUMMARY_PROMPT = dedent(
+    """
+    Summarize the mentioned topic in 1 paragraph.
+    It will be integrated into the final summary, so just for this topic.
+    """
+).strip()
+
+RECAP_PROMPT = dedent(
+    """
+    Provide a high-level quick recap of the following meeting, fitting in one paragraph.
+    Do not include decisions, action items or unresolved issue, just highlight the high moments.
+    Just dive into the meeting, be concise and do not include unnecessary details.
+    As we already know it is a meeting, do not start with 'During the meeting' or equivalent.
+    """
+).strip()
+
+
+def build_summary_markdown(recap: str, summaries: list[dict[str, str]]) -> str:
+    """Build markdown summary from recap and subject summaries."""
+    lines: list[str] = []
+    if recap:
+        lines.append("# Quick recap")
+        lines.append("")
+        lines.append(recap)
+        lines.append("")
+
+    if summaries:
+        lines.append("# Summary")
+        lines.append("")
+        for summary in summaries:
+            lines.append(f"**{summary['subject']}**")
+            lines.append(summary["summary"])
+            lines.append("")
+
+    return "\n".join(lines)
--- a/server/reflector/processors/summary/summary_builder.py
+++ b/server/reflector/processors/summary/summary_builder.py
@@ -15,6 +15,13 @@ import structlog
 from pydantic import BaseModel, Field

 from reflector.llm import LLM
+from reflector.processors.summary.prompts import (
+    DETAILED_SUBJECT_PROMPT_TEMPLATE,
+    PARAGRAPH_SUMMARY_PROMPT,
+    RECAP_PROMPT,
+    build_participant_instructions,
+    build_summary_markdown,
+)
 from reflector.settings import settings

 T = TypeVar("T", bound=BaseModel)
@@ -52,50 +59,6 @@ SUBJECTS_PROMPT = dedent(
    """
 ).strip()

-DETAILED_SUBJECT_PROMPT_TEMPLATE = dedent(
-    """
-    Get me information about the topic "{subject}"
-
-    # RESPONSE GUIDELINES
-    Follow this structured approach to create the topic summary:
-    - Highlight important arguments, insights, or data presented.
-    - Outline decisions made.
-    - Indicate any decisions reached, including any rationale or key factors
-      that influenced these decisions.
-    - Detail action items and responsibilities.
-    - For each decision or unresolved issue, list specific action items agreed
-      upon, along with assigned individuals or teams responsible for each task.
-    - Specify deadlines or timelines if mentioned. For each action item,
-      include any deadlines or timeframes discussed for completion or follow-up.
-    - Mention unresolved issues or topics needing further discussion, aiding in
-      planning future meetings or follow-up actions.
-    - Do not include topic unrelated to {subject}.
-
-    # OUTPUT
-    Your summary should be clear, concise, and structured, covering all major
-    points, decisions, and action items from the meeting. It should be easy to
-    understand for someone not present, providing a comprehensive understanding
-    of what transpired and what needs to be done next. The summary should not
-    exceed one page to ensure brevity and focus.
-    """
-).strip()
-
-PARAGRAPH_SUMMARY_PROMPT = dedent(
-    """
-    Summarize the mentioned topic in 1 paragraph.
-    It will be integrated into the final summary, so just for this topic.
-    """
-).strip()
-
-RECAP_PROMPT = dedent(
-    """
-    Provide a high-level quick recap of the following meeting, fitting in one paragraph.
-    Do not include decisions, action items or unresolved issue, just highlight the high moments.
-    Just dive into the meeting, be concise and do not include unnecessary details.
-    As we already know it is a meeting, do not start with 'During the meeting' or equivalent.
-    """
-).strip()
-
 ACTION_ITEMS_PROMPT = dedent(
    """
    Identify action items from this meeting transcript. Your goal is to identify what was decided and what needs to happen next.
@@ -331,17 +294,7 @@ class SummaryBuilder:
        participants_md = self.format_list_md(participants)
        self.transcript += f"\n\n# Participants\n\n{participants_md}"

-        participants_list = ", ".join(participants)
-        self.participant_instructions = dedent(
-            f"""
-            # IMPORTANT: Participant Names
-            The following participants are identified in this conversation: {participants_list}
-
-            You MUST use these specific participant names when referring to people in your response.
-            Do NOT use generic terms like "a participant", "someone", "attendee", "Speaker 1", "Speaker 2", etc.
-            Always refer to people by their actual names (e.g., "John suggested..." not "A participant suggested...").
-            """
-        ).strip()
+        self.participant_instructions = build_participant_instructions(participants)

    async def identify_participants(self) -> None:
        """
@@ -377,18 +330,9 @@ class SummaryBuilder:
                participants_md = self.format_list_md(unique_participants)
                self.transcript += f"\n\n# Participants\n\n{participants_md}"

-                # Set instructions that will be automatically added to all prompts
-                participants_list = ", ".join(unique_participants)
-                self.participant_instructions = dedent(
-                    f"""
-                    # IMPORTANT: Participant Names
-                    The following participants are identified in this conversation: {participants_list}
-
-                    You MUST use these specific participant names when referring to people in your response.
-                    Do NOT use generic terms like "a participant", "someone", "attendee", "Speaker 1", "Speaker 2", etc.
-                    Always refer to people by their actual names (e.g., "John suggested..." not "A participant suggested...").
-                    """
-                ).strip()
+                self.participant_instructions = build_participant_instructions(
+                    unique_participants
+                )
            else:
                self.logger.warning("No participants identified in the transcript")

@@ -613,22 +557,7 @@ class SummaryBuilder:
    # ----------------------------------------------------------------------------

    def as_markdown(self) -> str:
-        lines: list[str] = []
-        if self.recap:
-            lines.append("# Quick recap")
-            lines.append("")
-            lines.append(self.recap)
-            lines.append("")
-
-        if self.summaries:
-            lines.append("# Summary")
-            lines.append("")
-            for summary in self.summaries:
-                lines.append(f"**{summary['subject']}**")
-                lines.append(summary["summary"])
-                lines.append("")
-
-        return "\n".join(lines)
+        return build_summary_markdown(self.recap, self.summaries)

    def format_list_md(self, data: list[str]) -> str:
        return "\n".join([f"- {item}" for item in data])
--- a/server/reflector/processors/transcript_topic_detector.py
+++ b/server/reflector/processors/transcript_topic_detector.py
@@ -1,35 +1,12 @@
-from textwrap import dedent
-
 from pydantic import AliasChoices, BaseModel, Field

 from reflector.llm import LLM
 from reflector.processors.base import Processor
+from reflector.processors.prompts import TOPIC_PROMPT
 from reflector.processors.types import TitleSummary, Transcript
 from reflector.settings import settings
 from reflector.utils.text import clean_title

-TOPIC_PROMPT = dedent(
-    """
-    Analyze the following transcript segment and extract the main topic being discussed.
-    Focus on the substantive content and ignore small talk or administrative chatter.
-
-    Create a title that:
-    - Captures the specific subject matter being discussed
-    - Is descriptive and self-explanatory
-    - Uses professional language
-    - Is specific rather than generic
-
-    For the summary:
-    - Summarize the key points in maximum two sentences
-    - Focus on what was discussed, decided, or accomplished
-    - Be concise but informative
-
-    <transcript>
-    {text}
-    </transcript>
-    """
-).strip()
-

 class TopicResponse(BaseModel):
    """Structured response for topic detection"""