mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-23 05:39:05 +00:00
dry (no-mistakes) (minimal)
This commit is contained in:
@@ -558,8 +558,11 @@ async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
|
|||||||
TranscriptTopic,
|
TranscriptTopic,
|
||||||
transcripts_controller,
|
transcripts_controller,
|
||||||
)
|
)
|
||||||
|
from reflector.utils.transcript_constants import ( # noqa: PLC0415
|
||||||
|
TOPIC_CHUNK_WORD_COUNT,
|
||||||
|
)
|
||||||
|
|
||||||
chunk_size = 300
|
chunk_size = TOPIC_CHUNK_WORD_COUNT
|
||||||
chunks = []
|
chunks = []
|
||||||
for i in range(0, len(words), chunk_size):
|
for i in range(0, len(words), chunk_size):
|
||||||
chunk_words = words[i : i + chunk_size]
|
chunk_words = words[i : i + chunk_size]
|
||||||
@@ -850,6 +853,7 @@ async def generate_recap(input: PipelineInput, ctx: Context) -> RecapResult:
|
|||||||
from reflector.processors.summary.prompts import ( # noqa: PLC0415
|
from reflector.processors.summary.prompts import ( # noqa: PLC0415
|
||||||
RECAP_PROMPT,
|
RECAP_PROMPT,
|
||||||
build_participant_instructions,
|
build_participant_instructions,
|
||||||
|
build_summary_markdown,
|
||||||
)
|
)
|
||||||
|
|
||||||
subject_summaries = process_result.subject_summaries
|
subject_summaries = process_result.subject_summaries
|
||||||
@@ -883,18 +887,7 @@ async def generate_recap(input: PipelineInput, ctx: Context) -> RecapResult:
|
|||||||
)
|
)
|
||||||
short_summary = str(recap_response)
|
short_summary = str(recap_response)
|
||||||
|
|
||||||
lines = []
|
long_summary = build_summary_markdown(short_summary, summaries)
|
||||||
lines.append("# Quick recap")
|
|
||||||
lines.append("")
|
|
||||||
lines.append(short_summary)
|
|
||||||
lines.append("")
|
|
||||||
lines.append("# Summary")
|
|
||||||
lines.append("")
|
|
||||||
for s in summaries:
|
|
||||||
lines.append(f"**{s['subject']}**")
|
|
||||||
lines.append(s["summary"])
|
|
||||||
lines.append("")
|
|
||||||
long_summary = "\n".join(lines)
|
|
||||||
|
|
||||||
async with fresh_db_connection():
|
async with fresh_db_connection():
|
||||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ from reflector.processors import (
|
|||||||
)
|
)
|
||||||
from reflector.processors.types import TitleSummary
|
from reflector.processors.types import TitleSummary
|
||||||
from reflector.processors.types import Transcript as TranscriptType
|
from reflector.processors.types import Transcript as TranscriptType
|
||||||
|
from reflector.utils.transcript_constants import TOPIC_CHUNK_WORD_COUNT
|
||||||
|
|
||||||
|
|
||||||
class EmptyPipeline:
|
class EmptyPipeline:
|
||||||
@@ -38,7 +39,7 @@ async def detect_topics(
|
|||||||
on_topic_callback: Callable,
|
on_topic_callback: Callable,
|
||||||
empty_pipeline: EmptyPipeline,
|
empty_pipeline: EmptyPipeline,
|
||||||
) -> list[TitleSummary]:
|
) -> list[TitleSummary]:
|
||||||
chunk_size = 300
|
chunk_size = TOPIC_CHUNK_WORD_COUNT
|
||||||
topics: list[TitleSummary] = []
|
topics: list[TitleSummary] = []
|
||||||
|
|
||||||
async def on_topic(topic: TitleSummary):
|
async def on_topic(topic: TitleSummary):
|
||||||
|
|||||||
@@ -69,3 +69,23 @@ RECAP_PROMPT = dedent(
|
|||||||
As we already know it is a meeting, do not start with 'During the meeting' or equivalent.
|
As we already know it is a meeting, do not start with 'During the meeting' or equivalent.
|
||||||
"""
|
"""
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def build_summary_markdown(recap: str, summaries: list[dict[str, str]]) -> str:
|
||||||
|
"""Build markdown summary from recap and subject summaries."""
|
||||||
|
lines: list[str] = []
|
||||||
|
if recap:
|
||||||
|
lines.append("# Quick recap")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(recap)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if summaries:
|
||||||
|
lines.append("# Summary")
|
||||||
|
lines.append("")
|
||||||
|
for summary in summaries:
|
||||||
|
lines.append(f"**{summary['subject']}**")
|
||||||
|
lines.append(summary["summary"])
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from reflector.processors.summary.prompts import (
|
|||||||
PARAGRAPH_SUMMARY_PROMPT,
|
PARAGRAPH_SUMMARY_PROMPT,
|
||||||
RECAP_PROMPT,
|
RECAP_PROMPT,
|
||||||
build_participant_instructions,
|
build_participant_instructions,
|
||||||
|
build_summary_markdown,
|
||||||
)
|
)
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
|
|
||||||
@@ -556,22 +557,7 @@ class SummaryBuilder:
|
|||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
def as_markdown(self) -> str:
|
def as_markdown(self) -> str:
|
||||||
lines: list[str] = []
|
return build_summary_markdown(self.recap, self.summaries)
|
||||||
if self.recap:
|
|
||||||
lines.append("# Quick recap")
|
|
||||||
lines.append("")
|
|
||||||
lines.append(self.recap)
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
if self.summaries:
|
|
||||||
lines.append("# Summary")
|
|
||||||
lines.append("")
|
|
||||||
for summary in self.summaries:
|
|
||||||
lines.append(f"**{summary['subject']}**")
|
|
||||||
lines.append(summary["summary"])
|
|
||||||
lines.append("")
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
def format_list_md(self, data: list[str]) -> str:
|
def format_list_md(self, data: list[str]) -> str:
|
||||||
return "\n".join([f"- {item}" for item in data])
|
return "\n".join([f"- {item}" for item in data])
|
||||||
|
|||||||
8
server/reflector/utils/transcript_constants.py
Normal file
8
server/reflector/utils/transcript_constants.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
"""
|
||||||
|
Shared transcript processing constants.
|
||||||
|
|
||||||
|
Used by both Hatchet workflows and Celery pipelines for consistent processing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Topic detection: number of words per chunk for topic extraction
|
||||||
|
TOPIC_CHUNK_WORD_COUNT = 300
|
||||||
Reference in New Issue
Block a user