mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
* Identify action items * Add action items to mock summary * Add action items validator * Remove final prefix from action items * Make on action items callback required * Don't mutation action items response * Assign action items to none on error * Use timeout constant * Exclude action items from transcript list
729 lines
26 KiB
Python
729 lines
26 KiB
Python
"""
|
|
# Summary meeting notes
|
|
|
|
This script is used to generate a summary of a meeting notes transcript.
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from enum import Enum
|
|
from textwrap import dedent
|
|
from typing import Type, TypeVar
|
|
|
|
import structlog
|
|
from pydantic import BaseModel, Field
|
|
|
|
from reflector.llm import LLM
|
|
from reflector.settings import settings
|
|
|
|
T = TypeVar("T", bound=BaseModel)
|
|
|
|
PARTICIPANTS_PROMPT = dedent(
|
|
"""
|
|
Identify all participants in this conversation.
|
|
Distinguish between people who actually spoke in the transcript and those who were only mentioned.
|
|
Each participant should only be listed once.
|
|
Do not include company names, only people's names.
|
|
"""
|
|
).strip()
|
|
|
|
TRANSCRIPTION_TYPE_PROMPT = dedent(
|
|
"""
|
|
Analyze the transcript to determine if it is a meeting, podcast, or interview.
|
|
A meeting typically involves severals participants engaging in discussions,
|
|
making decisions, and planning actions. A podcast often includes hosts
|
|
discussing topics or interviewing guests for an audience in a structured format.
|
|
An interview generally features one or more interviewer questioning one or
|
|
more interviewees, often for hiring, research, or journalism. Deliver your
|
|
classification with a confidence score and reasoning.
|
|
"""
|
|
).strip()
|
|
|
|
SUBJECTS_PROMPT = dedent(
|
|
"""
|
|
What are the main / high level topic of the meeting.
|
|
Do not include direct quotes or unnecessary details.
|
|
Be concise and focused on the main ideas.
|
|
A subject briefly mentioned should not be included.
|
|
There should be maximum 6 subjects.
|
|
Do not write complete narrative sentences for the subject,
|
|
you must write a concise subject using noun phrases.
|
|
"""
|
|
).strip()
|
|
|
|
DETAILED_SUBJECT_PROMPT_TEMPLATE = dedent(
|
|
"""
|
|
Get me information about the topic "{subject}"
|
|
|
|
# RESPONSE GUIDELINES
|
|
Follow this structured approach to create the topic summary:
|
|
- Highlight important arguments, insights, or data presented.
|
|
- Outline decisions made.
|
|
- Indicate any decisions reached, including any rationale or key factors
|
|
that influenced these decisions.
|
|
- Detail action items and responsibilities.
|
|
- For each decision or unresolved issue, list specific action items agreed
|
|
upon, along with assigned individuals or teams responsible for each task.
|
|
- Specify deadlines or timelines if mentioned. For each action item,
|
|
include any deadlines or timeframes discussed for completion or follow-up.
|
|
- Mention unresolved issues or topics needing further discussion, aiding in
|
|
planning future meetings or follow-up actions.
|
|
- Do not include topic unrelated to {subject}.
|
|
|
|
# OUTPUT
|
|
Your summary should be clear, concise, and structured, covering all major
|
|
points, decisions, and action items from the meeting. It should be easy to
|
|
understand for someone not present, providing a comprehensive understanding
|
|
of what transpired and what needs to be done next. The summary should not
|
|
exceed one page to ensure brevity and focus.
|
|
"""
|
|
).strip()
|
|
|
|
PARAGRAPH_SUMMARY_PROMPT = dedent(
|
|
"""
|
|
Summarize the mentioned topic in 1 paragraph.
|
|
It will be integrated into the final summary, so just for this topic.
|
|
"""
|
|
).strip()
|
|
|
|
RECAP_PROMPT = dedent(
|
|
"""
|
|
Provide a high-level quick recap of the following meeting, fitting in one paragraph.
|
|
Do not include decisions, action items or unresolved issue, just highlight the high moments.
|
|
Just dive into the meeting, be concise and do not include unnecessary details.
|
|
As we already know it is a meeting, do not start with 'During the meeting' or equivalent.
|
|
"""
|
|
).strip()
|
|
|
|
ACTION_ITEMS_PROMPT = dedent(
|
|
"""
|
|
Identify action items from this meeting transcript. Your goal is to identify what was decided and what needs to happen next.
|
|
|
|
Look for:
|
|
|
|
1. **Decisions Made**: Any decisions, choices, or conclusions reached during the meeting. For each decision:
|
|
- What was decided? (be specific)
|
|
- Who made the decision or was involved? (use actual participant names)
|
|
- Why was this decision made? (key factors, reasoning, or rationale)
|
|
|
|
2. **Next Steps / Action Items**: Any tasks, follow-ups, or actions that were mentioned or assigned. For each action item:
|
|
- What specific task needs to be done? (be concrete and actionable)
|
|
- Who is responsible? (use actual participant names if mentioned, or "team" if unclear)
|
|
- When is it due? (any deadlines, timeframes, or "by next meeting" type commitments)
|
|
- What context is needed? (any additional details that help understand the task)
|
|
|
|
Guidelines:
|
|
- Be thorough and identify all action items, even if they seem minor
|
|
- Include items that were agreed upon, assigned, or committed to
|
|
- Include decisions even if they seem obvious or implicit
|
|
- If someone says "I'll do X" or "We should do Y", that's an action item
|
|
- If someone says "Let's go with option A", that's a decision
|
|
- Use the exact participant names from the transcript
|
|
- If no participant name is mentioned, you can leave assigned_to/decided_by as null
|
|
|
|
Only return empty lists if the transcript contains NO decisions and NO action items whatsoever.
|
|
"""
|
|
).strip()
|
|
|
|
STRUCTURED_RESPONSE_PROMPT_TEMPLATE = dedent(
|
|
"""
|
|
Based on the following analysis, provide the information in the requested JSON format:
|
|
|
|
Analysis:
|
|
{analysis}
|
|
|
|
{format_instructions}
|
|
"""
|
|
).strip()
|
|
|
|
|
|
class TranscriptionType(Enum):
|
|
MEETING = "meeting"
|
|
PODCAST = "podcast"
|
|
INTERVIEW = "interview"
|
|
|
|
|
|
class TranscriptionTypeResponse(BaseModel):
|
|
"""Pydantic model for transcription type classification"""
|
|
|
|
transcription_type: str = Field(
|
|
description="The type of transcription - either 'meeting', 'podcast', or 'interview'"
|
|
)
|
|
confidence: float = Field(
|
|
description="Confidence score between 0 and 1", ge=0.0, le=1.0
|
|
)
|
|
reasoning: str = Field(description="Brief explanation for the classification")
|
|
|
|
|
|
class ParticipantInfo(BaseModel):
|
|
"""Information about a single participant"""
|
|
|
|
name: str = Field(description="The name of the participant")
|
|
is_speaker: bool = Field(
|
|
default=True, description="Whether this person spoke in the transcript"
|
|
)
|
|
|
|
|
|
class ParticipantsResponse(BaseModel):
|
|
"""Pydantic model for participants identification"""
|
|
|
|
participants: list[ParticipantInfo] = Field(
|
|
description="List of all participants in the conversation"
|
|
)
|
|
total_speakers: int = Field(description="Total number of people who spoke")
|
|
mentioned_only: list[str] = Field(
|
|
default_factory=list, description="Names mentioned but who didn't speak"
|
|
)
|
|
|
|
|
|
class SubjectsResponse(BaseModel):
|
|
"""Pydantic model for extracted subjects/topics"""
|
|
|
|
subjects: list[str] = Field(
|
|
description="List of main subjects/topics discussed, maximum 6 items",
|
|
)
|
|
|
|
|
|
class ActionItem(BaseModel):
|
|
"""A single action item from the meeting"""
|
|
|
|
task: str = Field(description="The task or action item to be completed")
|
|
assigned_to: str | None = Field(
|
|
default=None, description="Person or team assigned to this task (name)"
|
|
)
|
|
assigned_to_participant_id: str | None = Field(
|
|
default=None, description="Participant ID if assigned_to matches a participant"
|
|
)
|
|
deadline: str | None = Field(
|
|
default=None, description="Deadline or timeframe mentioned for this task"
|
|
)
|
|
context: str | None = Field(
|
|
default=None, description="Additional context or notes about this task"
|
|
)
|
|
|
|
|
|
class Decision(BaseModel):
|
|
"""A decision made during the meeting"""
|
|
|
|
decision: str = Field(description="What was decided")
|
|
rationale: str | None = Field(
|
|
default=None,
|
|
description="Reasoning or key factors that influenced this decision",
|
|
)
|
|
decided_by: str | None = Field(
|
|
default=None, description="Person or group who made the decision (name)"
|
|
)
|
|
decided_by_participant_id: str | None = Field(
|
|
default=None, description="Participant ID if decided_by matches a participant"
|
|
)
|
|
|
|
|
|
class ActionItemsResponse(BaseModel):
|
|
"""Pydantic model for identified action items"""
|
|
|
|
decisions: list[Decision] = Field(
|
|
default_factory=list,
|
|
description="List of decisions made during the meeting",
|
|
)
|
|
next_steps: list[ActionItem] = Field(
|
|
default_factory=list,
|
|
description="List of action items and next steps to be taken",
|
|
)
|
|
|
|
|
|
class SummaryBuilder:
|
|
def __init__(self, llm: LLM, filename: str | None = None, logger=None) -> None:
|
|
self.transcript: str | None = None
|
|
self.recap: str | None = None
|
|
self.summaries: list[dict[str, str]] = []
|
|
self.subjects: list[str] = []
|
|
self.transcription_type: TranscriptionType | None = None
|
|
self.llm: LLM = llm
|
|
self.model_name: str = llm.model_name
|
|
self.logger = logger or structlog.get_logger()
|
|
self.participant_instructions: str | None = None
|
|
self.action_items: ActionItemsResponse | None = None
|
|
self.participant_name_to_id: dict[str, str] = {}
|
|
if filename:
|
|
self.read_transcript_from_file(filename)
|
|
|
|
def read_transcript_from_file(self, filename: str) -> None:
|
|
"""
|
|
Load a transcript from a text file.
|
|
Must be formatted as:
|
|
|
|
speaker: message
|
|
speaker2: message2
|
|
|
|
"""
|
|
with open(filename, "r", encoding="utf-8") as f:
|
|
self.transcript = f.read().strip()
|
|
|
|
def set_transcript(self, transcript: str) -> None:
|
|
assert isinstance(transcript, str)
|
|
self.transcript = transcript
|
|
|
|
def set_llm_instance(self, llm: LLM) -> None:
|
|
self.llm = llm
|
|
|
|
async def _get_structured_response(
|
|
self,
|
|
prompt: str,
|
|
output_cls: Type[T],
|
|
tone_name: str | None = None,
|
|
timeout: int | None = None,
|
|
) -> T:
|
|
"""Generic function to get structured output from LLM for non-function-calling models."""
|
|
enhanced_prompt = self._enhance_prompt_with_participants(prompt)
|
|
return await self.llm.get_structured_response(
|
|
enhanced_prompt,
|
|
[self.transcript],
|
|
output_cls,
|
|
tone_name=tone_name,
|
|
timeout=timeout,
|
|
)
|
|
|
|
async def _get_response(
|
|
self, prompt: str, texts: list[str], tone_name: str | None = None
|
|
) -> str:
|
|
"""Get text response with automatic participant instructions injection."""
|
|
enhanced_prompt = self._enhance_prompt_with_participants(prompt)
|
|
return await self.llm.get_response(enhanced_prompt, texts, tone_name=tone_name)
|
|
|
|
def _enhance_prompt_with_participants(self, prompt: str) -> str:
|
|
"""Add participant instructions to any prompt if participants are known."""
|
|
if self.participant_instructions:
|
|
self.logger.debug("Adding participant instructions to prompt")
|
|
return f"{prompt}\n\n{self.participant_instructions}"
|
|
return prompt
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Participants
|
|
# ----------------------------------------------------------------------------
|
|
|
|
def set_known_participants(
|
|
self,
|
|
participants: list[str],
|
|
participant_name_to_id: dict[str, str] | None = None,
|
|
) -> None:
|
|
"""
|
|
Set known participants directly without LLM identification.
|
|
This is used when participants are already identified and stored.
|
|
They are appended at the end of the transcript, providing more context for the assistant.
|
|
|
|
Args:
|
|
participants: List of participant names
|
|
participant_name_to_id: Optional mapping of participant names to their IDs
|
|
"""
|
|
if not participants:
|
|
self.logger.warning("No participants provided")
|
|
return
|
|
|
|
self.logger.info(
|
|
"Using known participants",
|
|
participants=participants,
|
|
)
|
|
|
|
if participant_name_to_id:
|
|
self.participant_name_to_id = participant_name_to_id
|
|
|
|
participants_md = self.format_list_md(participants)
|
|
self.transcript += f"\n\n# Participants\n\n{participants_md}"
|
|
|
|
participants_list = ", ".join(participants)
|
|
self.participant_instructions = dedent(
|
|
f"""
|
|
# IMPORTANT: Participant Names
|
|
The following participants are identified in this conversation: {participants_list}
|
|
|
|
You MUST use these specific participant names when referring to people in your response.
|
|
Do NOT use generic terms like "a participant", "someone", "attendee", "Speaker 1", "Speaker 2", etc.
|
|
Always refer to people by their actual names (e.g., "John suggested..." not "A participant suggested...").
|
|
"""
|
|
).strip()
|
|
|
|
async def identify_participants(self) -> None:
|
|
"""
|
|
From a transcript, try to identify the participants using TreeSummarize with structured output.
|
|
This might not give the best result without good diarization, but it's a start.
|
|
They are appended at the end of the transcript, providing more context for the assistant.
|
|
"""
|
|
|
|
self.logger.debug("--- identify_participants using TreeSummarize with Pydantic")
|
|
|
|
participants_prompt = PARTICIPANTS_PROMPT
|
|
|
|
try:
|
|
response = await self._get_structured_response(
|
|
participants_prompt,
|
|
ParticipantsResponse,
|
|
tone_name="Participant identifier",
|
|
)
|
|
|
|
all_participants = [p.name for p in response.participants]
|
|
|
|
self.logger.info(
|
|
"Participants analysis complete",
|
|
total_speakers=response.total_speakers,
|
|
speakers=[p.name for p in response.participants if p.is_speaker],
|
|
mentioned_only=response.mentioned_only,
|
|
total_identified=len(all_participants) + len(response.mentioned_only),
|
|
)
|
|
|
|
unique_participants = list(set(all_participants + response.mentioned_only))
|
|
|
|
if unique_participants:
|
|
participants_md = self.format_list_md(unique_participants)
|
|
self.transcript += f"\n\n# Participants\n\n{participants_md}"
|
|
|
|
# Set instructions that will be automatically added to all prompts
|
|
participants_list = ", ".join(unique_participants)
|
|
self.participant_instructions = dedent(
|
|
f"""
|
|
# IMPORTANT: Participant Names
|
|
The following participants are identified in this conversation: {participants_list}
|
|
|
|
You MUST use these specific participant names when referring to people in your response.
|
|
Do NOT use generic terms like "a participant", "someone", "attendee", "Speaker 1", "Speaker 2", etc.
|
|
Always refer to people by their actual names (e.g., "John suggested..." not "A participant suggested...").
|
|
"""
|
|
).strip()
|
|
else:
|
|
self.logger.warning("No participants identified in the transcript")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in participant identification: {e}")
|
|
self.logger.warning(
|
|
"Failed to identify participants, continuing without them"
|
|
)
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Transcription identification
|
|
# ----------------------------------------------------------------------------
|
|
|
|
async def identify_transcription_type(self) -> None:
|
|
"""
|
|
Identify the type of transcription: meeting or podcast using TreeSummarizer with structured output.
|
|
"""
|
|
|
|
self.logger.debug(
|
|
"--- identify transcription type using TreeSummarizer with Pydantic"
|
|
)
|
|
|
|
transcription_type_prompt = TRANSCRIPTION_TYPE_PROMPT
|
|
|
|
try:
|
|
response = await self._get_structured_response(
|
|
transcription_type_prompt,
|
|
TranscriptionTypeResponse,
|
|
tone_name="Transcription type classifier",
|
|
)
|
|
|
|
self.logger.info(
|
|
f"Transcription type identified: {response.transcription_type} "
|
|
f"(confidence: {response.confidence:.2f})"
|
|
)
|
|
self.logger.debug(f"Reasoning: {response.reasoning}")
|
|
|
|
if response.transcription_type.lower() == "meeting":
|
|
self.transcription_type = TranscriptionType.MEETING
|
|
elif response.transcription_type.lower() == "podcast":
|
|
self.transcription_type = TranscriptionType.PODCAST
|
|
elif response.transcription_type.lower() == "interview":
|
|
self.transcription_type = TranscriptionType.INTERVIEW
|
|
else:
|
|
self.logger.warning(
|
|
f"Unexpected transcription type: {response.transcription_type}, "
|
|
f"defaulting to meeting"
|
|
)
|
|
self.transcription_type = TranscriptionType.MEETING
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error in transcription type identification: {e}")
|
|
self.transcription_type = TranscriptionType.MEETING
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Summary
|
|
# ----------------------------------------------------------------------------
|
|
|
|
async def extract_subjects(self) -> None:
|
|
"""Extract main subjects/topics from the transcript."""
|
|
self.logger.info("--- extract main subjects using TreeSummarize")
|
|
|
|
subjects_prompt = SUBJECTS_PROMPT
|
|
|
|
try:
|
|
response = await self._get_structured_response(
|
|
subjects_prompt,
|
|
SubjectsResponse,
|
|
tone_name="Meeting assistant that talk only as list item",
|
|
)
|
|
|
|
self.subjects = response.subjects
|
|
self.logger.info(f"Extracted subjects: {self.subjects}")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error extracting subjects: {e}")
|
|
self.subjects = []
|
|
|
|
async def generate_subject_summaries(self) -> None:
|
|
"""Generate detailed summaries for each extracted subject."""
|
|
assert self.transcript is not None
|
|
summaries = []
|
|
|
|
for subject in self.subjects:
|
|
detailed_prompt = DETAILED_SUBJECT_PROMPT_TEMPLATE.format(subject=subject)
|
|
|
|
detailed_response = await self._get_response(
|
|
detailed_prompt, [self.transcript], tone_name="Topic assistant"
|
|
)
|
|
|
|
paragraph_prompt = PARAGRAPH_SUMMARY_PROMPT
|
|
|
|
paragraph_response = await self._get_response(
|
|
paragraph_prompt, [str(detailed_response)], tone_name="Topic summarizer"
|
|
)
|
|
|
|
summaries.append({"subject": subject, "summary": str(paragraph_response)})
|
|
self.logger.debug(f"Summary for {subject}: {paragraph_response}")
|
|
|
|
self.summaries = summaries
|
|
|
|
async def generate_recap(self) -> None:
|
|
"""Generate a quick recap from the subject summaries."""
|
|
|
|
summaries_text = "\n\n".join(
|
|
[
|
|
f"{summary['subject']}: {summary['summary']}"
|
|
for summary in self.summaries
|
|
]
|
|
)
|
|
|
|
recap_prompt = RECAP_PROMPT
|
|
|
|
recap_response = await self._get_response(
|
|
recap_prompt, [summaries_text], tone_name="Recap summarizer"
|
|
)
|
|
|
|
self.recap = str(recap_response)
|
|
self.logger.info(f"Quick recap: {self.recap}")
|
|
|
|
def _map_participant_names_to_ids(
|
|
self, response: ActionItemsResponse
|
|
) -> ActionItemsResponse:
|
|
"""Map participant names in action items to participant IDs."""
|
|
if not self.participant_name_to_id:
|
|
return response
|
|
|
|
decisions = []
|
|
for decision in response.decisions:
|
|
new_decision = decision.model_copy()
|
|
if (
|
|
decision.decided_by
|
|
and decision.decided_by in self.participant_name_to_id
|
|
):
|
|
new_decision.decided_by_participant_id = self.participant_name_to_id[
|
|
decision.decided_by
|
|
]
|
|
decisions.append(new_decision)
|
|
|
|
next_steps = []
|
|
for item in response.next_steps:
|
|
new_item = item.model_copy()
|
|
if item.assigned_to and item.assigned_to in self.participant_name_to_id:
|
|
new_item.assigned_to_participant_id = self.participant_name_to_id[
|
|
item.assigned_to
|
|
]
|
|
next_steps.append(new_item)
|
|
|
|
return ActionItemsResponse(decisions=decisions, next_steps=next_steps)
|
|
|
|
async def identify_action_items(self) -> ActionItemsResponse | None:
|
|
"""Identify action items (decisions and next steps) from the transcript."""
|
|
self.logger.info("--- identify action items using TreeSummarize")
|
|
|
|
if not self.transcript:
|
|
self.logger.warning(
|
|
"No transcript available for action items identification"
|
|
)
|
|
self.action_items = None
|
|
return None
|
|
|
|
action_items_prompt = ACTION_ITEMS_PROMPT
|
|
|
|
try:
|
|
response = await self._get_structured_response(
|
|
action_items_prompt,
|
|
ActionItemsResponse,
|
|
tone_name="Action item identifier",
|
|
timeout=settings.LLM_STRUCTURED_RESPONSE_TIMEOUT,
|
|
)
|
|
|
|
response = self._map_participant_names_to_ids(response)
|
|
|
|
self.action_items = response
|
|
self.logger.info(
|
|
f"Identified {len(response.decisions)} decisions and {len(response.next_steps)} action items",
|
|
decisions_count=len(response.decisions),
|
|
next_steps_count=len(response.next_steps),
|
|
)
|
|
|
|
if response.decisions:
|
|
self.logger.debug(
|
|
"Decisions identified",
|
|
decisions=[d.decision for d in response.decisions],
|
|
)
|
|
if response.next_steps:
|
|
self.logger.debug(
|
|
"Action items identified",
|
|
tasks=[item.task for item in response.next_steps],
|
|
)
|
|
if not response.decisions and not response.next_steps:
|
|
self.logger.warning(
|
|
"No action items identified from transcript",
|
|
transcript_length=len(self.transcript),
|
|
)
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
self.logger.error(
|
|
f"Error identifying action items: {e}",
|
|
exc_info=True,
|
|
)
|
|
self.action_items = None
|
|
return None
|
|
|
|
async def generate_summary(self, only_subjects: bool = False) -> None:
|
|
"""
|
|
Generate summary by extracting subjects, creating summaries for each, and generating a recap.
|
|
"""
|
|
await self.extract_subjects()
|
|
|
|
if only_subjects:
|
|
return
|
|
|
|
await self.generate_subject_summaries()
|
|
await self.generate_recap()
|
|
await self.identify_action_items()
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Markdown
|
|
# ----------------------------------------------------------------------------
|
|
|
|
def as_markdown(self) -> str:
|
|
lines: list[str] = []
|
|
if self.recap:
|
|
lines.append("# Quick recap")
|
|
lines.append("")
|
|
lines.append(self.recap)
|
|
lines.append("")
|
|
|
|
if self.summaries:
|
|
lines.append("# Summary")
|
|
lines.append("")
|
|
for summary in self.summaries:
|
|
lines.append(f"**{summary['subject']}**")
|
|
lines.append(summary["summary"])
|
|
lines.append("")
|
|
|
|
return "\n".join(lines)
|
|
|
|
def format_list_md(self, data: list[str]) -> str:
|
|
return "\n".join([f"- {item}" for item in data])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate a summary of a meeting transcript"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"transcript",
|
|
type=str,
|
|
nargs="?",
|
|
help="The transcript of the meeting",
|
|
default="transcript.txt",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--transcription-type",
|
|
action="store_true",
|
|
help="Identify the type of the transcript (meeting, interview, podcast...)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--save",
|
|
action="store_true",
|
|
help="Save the summary to a file",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--summary",
|
|
action="store_true",
|
|
help="Generate a summary",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--subjects",
|
|
help="Generate a list of subjects",
|
|
action="store_true",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--participants",
|
|
help="Generate a list of participants",
|
|
action="store_true",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
async def main():
|
|
# build the summary
|
|
|
|
llm = LLM(settings=settings)
|
|
sm = SummaryBuilder(llm=llm, filename=args.transcript)
|
|
|
|
if args.subjects:
|
|
await sm.generate_summary(only_subjects=True)
|
|
print("# Subjects\n")
|
|
print("\n".join(sm.subjects))
|
|
sys.exit(0)
|
|
|
|
if args.transcription_type:
|
|
await sm.identify_transcription_type()
|
|
print(sm.transcription_type)
|
|
sys.exit(0)
|
|
|
|
if args.participants:
|
|
await sm.identify_participants()
|
|
sys.exit(0)
|
|
|
|
# if no summary is asked, ask for everything
|
|
if not args.summary and not args.subjects:
|
|
args.summary = True
|
|
|
|
if args.summary:
|
|
await sm.generate_summary()
|
|
|
|
print("")
|
|
print("-" * 80)
|
|
print("")
|
|
print(sm.as_markdown())
|
|
|
|
if args.save:
|
|
# write the summary to a file, on the format summary-<iso date>.md
|
|
filename = f"summary-{datetime.now(timezone.utc).isoformat()}.md"
|
|
with open(filename, "w", encoding="utf-8") as f:
|
|
f.write(sm.as_markdown())
|
|
|
|
print("")
|
|
print("-" * 80)
|
|
print("")
|
|
print("Saved to", filename)
|
|
|
|
asyncio.run(main())
|