dry (no-mistakes) (minimal)

This commit is contained in:
Igor Loskutov
2025-12-22 15:58:13 -05:00
parent 1f41f16928
commit b9698c2aaf
5 changed files with 38 additions and 30 deletions

View File

@@ -558,8 +558,11 @@ async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
TranscriptTopic, TranscriptTopic,
transcripts_controller, transcripts_controller,
) )
from reflector.utils.transcript_constants import ( # noqa: PLC0415
TOPIC_CHUNK_WORD_COUNT,
)
chunk_size = 300 chunk_size = TOPIC_CHUNK_WORD_COUNT
chunks = [] chunks = []
for i in range(0, len(words), chunk_size): for i in range(0, len(words), chunk_size):
chunk_words = words[i : i + chunk_size] chunk_words = words[i : i + chunk_size]
@@ -850,6 +853,7 @@ async def generate_recap(input: PipelineInput, ctx: Context) -> RecapResult:
from reflector.processors.summary.prompts import ( # noqa: PLC0415 from reflector.processors.summary.prompts import ( # noqa: PLC0415
RECAP_PROMPT, RECAP_PROMPT,
build_participant_instructions, build_participant_instructions,
build_summary_markdown,
) )
subject_summaries = process_result.subject_summaries subject_summaries = process_result.subject_summaries
@@ -883,18 +887,7 @@ async def generate_recap(input: PipelineInput, ctx: Context) -> RecapResult:
) )
short_summary = str(recap_response) short_summary = str(recap_response)
lines = [] long_summary = build_summary_markdown(short_summary, summaries)
lines.append("# Quick recap")
lines.append("")
lines.append(short_summary)
lines.append("")
lines.append("# Summary")
lines.append("")
for s in summaries:
lines.append(f"**{s['subject']}**")
lines.append(s["summary"])
lines.append("")
long_summary = "\n".join(lines)
async with fresh_db_connection(): async with fresh_db_connection():
transcript = await transcripts_controller.get_by_id(input.transcript_id) transcript = await transcripts_controller.get_by_id(input.transcript_id)

View File

@@ -18,6 +18,7 @@ from reflector.processors import (
) )
from reflector.processors.types import TitleSummary from reflector.processors.types import TitleSummary
from reflector.processors.types import Transcript as TranscriptType from reflector.processors.types import Transcript as TranscriptType
from reflector.utils.transcript_constants import TOPIC_CHUNK_WORD_COUNT
class EmptyPipeline: class EmptyPipeline:
@@ -38,7 +39,7 @@ async def detect_topics(
on_topic_callback: Callable, on_topic_callback: Callable,
empty_pipeline: EmptyPipeline, empty_pipeline: EmptyPipeline,
) -> list[TitleSummary]: ) -> list[TitleSummary]:
chunk_size = 300 chunk_size = TOPIC_CHUNK_WORD_COUNT
topics: list[TitleSummary] = [] topics: list[TitleSummary] = []
async def on_topic(topic: TitleSummary): async def on_topic(topic: TitleSummary):

View File

@@ -69,3 +69,23 @@ RECAP_PROMPT = dedent(
As we already know it is a meeting, do not start with 'During the meeting' or equivalent. As we already know it is a meeting, do not start with 'During the meeting' or equivalent.
""" """
).strip() ).strip()
def build_summary_markdown(recap: str, summaries: list[dict[str, str]]) -> str:
"""Build markdown summary from recap and subject summaries."""
lines: list[str] = []
if recap:
lines.append("# Quick recap")
lines.append("")
lines.append(recap)
lines.append("")
if summaries:
lines.append("# Summary")
lines.append("")
for summary in summaries:
lines.append(f"**{summary['subject']}**")
lines.append(summary["summary"])
lines.append("")
return "\n".join(lines)

View File

@@ -20,6 +20,7 @@ from reflector.processors.summary.prompts import (
PARAGRAPH_SUMMARY_PROMPT, PARAGRAPH_SUMMARY_PROMPT,
RECAP_PROMPT, RECAP_PROMPT,
build_participant_instructions, build_participant_instructions,
build_summary_markdown,
) )
from reflector.settings import settings from reflector.settings import settings
@@ -556,22 +557,7 @@ class SummaryBuilder:
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
def as_markdown(self) -> str: def as_markdown(self) -> str:
lines: list[str] = [] return build_summary_markdown(self.recap, self.summaries)
if self.recap:
lines.append("# Quick recap")
lines.append("")
lines.append(self.recap)
lines.append("")
if self.summaries:
lines.append("# Summary")
lines.append("")
for summary in self.summaries:
lines.append(f"**{summary['subject']}**")
lines.append(summary["summary"])
lines.append("")
return "\n".join(lines)
def format_list_md(self, data: list[str]) -> str: def format_list_md(self, data: list[str]) -> str:
return "\n".join([f"- {item}" for item in data]) return "\n".join([f"- {item}" for item in data])

View File

@@ -0,0 +1,8 @@
"""
Shared transcript processing constants.
Used by both Hatchet workflows and Celery pipelines for consistent processing.
"""
# Topic detection: number of words per chunk for topic extraction
TOPIC_CHUNK_WORD_COUNT = 300