mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
feat: Multitrack segmentation (#747)
* segmentation multitrack (no-mistakes) * segmentation multitrack (no-mistakes) * self review * self review * recording poll daily doc * filter cam_audio tracks to remove screensharing from daily processing * pr review --------- Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
@@ -35,8 +35,15 @@ class Recording(BaseModel):
|
|||||||
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
||||||
meeting_id: str | None = None
|
meeting_id: str | None = None
|
||||||
# for multitrack reprocessing
|
# for multitrack reprocessing
|
||||||
|
# track_keys can be empty list [] if recording finished but no audio was captured (silence/muted)
|
||||||
|
# None means not a multitrack recording, [] means multitrack with no tracks
|
||||||
track_keys: list[str] | None = None
|
track_keys: list[str] | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_multitrack(self) -> bool:
|
||||||
|
"""True if recording has separate audio tracks (1+ tracks counts as multitrack)."""
|
||||||
|
return self.track_keys is not None and len(self.track_keys) > 0
|
||||||
|
|
||||||
|
|
||||||
class RecordingController:
|
class RecordingController:
|
||||||
async def create(self, recording: Recording):
|
async def create(self, recording: Recording):
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, TypedDict
|
from typing import Annotated, TypedDict
|
||||||
|
|
||||||
@@ -16,6 +17,17 @@ class DiarizationSegment(TypedDict):
|
|||||||
|
|
||||||
|
|
||||||
PUNC_RE = re.compile(r"[.;:?!…]")
|
PUNC_RE = re.compile(r"[.;:?!…]")
|
||||||
|
SENTENCE_END_RE = re.compile(r"[.?!…]$")
|
||||||
|
|
||||||
|
# Max segment length for words_to_segments() - breaks on any punctuation (. ; : ? ! …)
|
||||||
|
# when segment exceeds this limit. Used for non-multitrack recordings.
|
||||||
|
MAX_SEGMENT_CHARS = 120
|
||||||
|
|
||||||
|
# Max segment length for words_to_segments_by_sentence() - only breaks on sentence-ending
|
||||||
|
# punctuation (. ? ! …) when segment exceeds this limit. Higher threshold allows complete
|
||||||
|
# sentences in multitrack recordings where speakers overlap.
|
||||||
|
# similar number to server/reflector/processors/transcript_liner.py
|
||||||
|
MAX_SENTENCE_SEGMENT_CHARS = 1000
|
||||||
|
|
||||||
|
|
||||||
class AudioFile(BaseModel):
|
class AudioFile(BaseModel):
|
||||||
@@ -76,7 +88,6 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
|
|||||||
# but separate if the speaker changes, or if the punctuation is a . , ; : ? !
|
# but separate if the speaker changes, or if the punctuation is a . , ; : ? !
|
||||||
segments = []
|
segments = []
|
||||||
current_segment = None
|
current_segment = None
|
||||||
MAX_SEGMENT_LENGTH = 120
|
|
||||||
|
|
||||||
for word in words:
|
for word in words:
|
||||||
if current_segment is None:
|
if current_segment is None:
|
||||||
@@ -106,7 +117,7 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
|
|||||||
current_segment.end = word.end
|
current_segment.end = word.end
|
||||||
|
|
||||||
have_punc = PUNC_RE.search(word.text)
|
have_punc = PUNC_RE.search(word.text)
|
||||||
if have_punc and (len(current_segment.text) > MAX_SEGMENT_LENGTH):
|
if have_punc and (len(current_segment.text) > MAX_SEGMENT_CHARS):
|
||||||
segments.append(current_segment)
|
segments.append(current_segment)
|
||||||
current_segment = None
|
current_segment = None
|
||||||
|
|
||||||
@@ -116,6 +127,70 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
|
|||||||
return segments
|
return segments
|
||||||
|
|
||||||
|
|
||||||
|
def words_to_segments_by_sentence(words: list[Word]) -> list[TranscriptSegment]:
|
||||||
|
"""Group words by speaker, then split into sentences.
|
||||||
|
|
||||||
|
For multitrack recordings where words from different speakers are interleaved
|
||||||
|
by timestamp, this function first groups all words by speaker, then creates
|
||||||
|
segments based on sentence boundaries within each speaker's words.
|
||||||
|
|
||||||
|
This produces cleaner output than words_to_segments() which breaks on every
|
||||||
|
speaker change, resulting in many tiny segments when speakers overlap.
|
||||||
|
"""
|
||||||
|
if not words:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Group words by speaker, preserving order within each speaker
|
||||||
|
by_speaker: dict[int, list[Word]] = defaultdict(list)
|
||||||
|
for w in words:
|
||||||
|
by_speaker[w.speaker].append(w)
|
||||||
|
|
||||||
|
segments: list[TranscriptSegment] = []
|
||||||
|
|
||||||
|
for speaker, speaker_words in by_speaker.items():
|
||||||
|
current_text = ""
|
||||||
|
current_start: float | None = None
|
||||||
|
current_end: float = 0.0
|
||||||
|
|
||||||
|
for word in speaker_words:
|
||||||
|
if current_start is None:
|
||||||
|
current_start = word.start
|
||||||
|
|
||||||
|
current_text += word.text
|
||||||
|
current_end = word.end
|
||||||
|
|
||||||
|
# Check for sentence end or max length
|
||||||
|
is_sentence_end = SENTENCE_END_RE.search(word.text.strip())
|
||||||
|
is_too_long = len(current_text) >= MAX_SENTENCE_SEGMENT_CHARS
|
||||||
|
|
||||||
|
if is_sentence_end or is_too_long:
|
||||||
|
segments.append(
|
||||||
|
TranscriptSegment(
|
||||||
|
text=current_text,
|
||||||
|
start=current_start,
|
||||||
|
end=current_end,
|
||||||
|
speaker=speaker,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
current_text = ""
|
||||||
|
current_start = None
|
||||||
|
|
||||||
|
# Flush remaining words for this speaker
|
||||||
|
if current_text and current_start is not None:
|
||||||
|
segments.append(
|
||||||
|
TranscriptSegment(
|
||||||
|
text=current_text,
|
||||||
|
start=current_start,
|
||||||
|
end=current_end,
|
||||||
|
speaker=speaker,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sort segments by start time
|
||||||
|
segments.sort(key=lambda s: s.start)
|
||||||
|
return segments
|
||||||
|
|
||||||
|
|
||||||
class Transcript(BaseModel):
|
class Transcript(BaseModel):
|
||||||
translation: str | None = None
|
translation: str | None = None
|
||||||
words: list[Word] = []
|
words: list[Word] = []
|
||||||
@@ -154,7 +229,9 @@ class Transcript(BaseModel):
|
|||||||
word.start += offset
|
word.start += offset
|
||||||
word.end += offset
|
word.end += offset
|
||||||
|
|
||||||
def as_segments(self) -> list[TranscriptSegment]:
|
def as_segments(self, is_multitrack: bool = False) -> list[TranscriptSegment]:
|
||||||
|
if is_multitrack:
|
||||||
|
return words_to_segments_by_sentence(self.words)
|
||||||
return words_to_segments(self.words)
|
return words_to_segments(self.words)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,11 @@ def recording_lock_key(recording_id: NonEmptyString) -> NonEmptyString:
|
|||||||
return f"recording:{recording_id}"
|
return f"recording:{recording_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def filter_cam_audio_tracks(track_keys: list[str]) -> list[str]:
|
||||||
|
"""Filter track keys to cam-audio tracks only (skip screen-audio, etc.)."""
|
||||||
|
return [k for k in track_keys if "cam-audio" in k]
|
||||||
|
|
||||||
|
|
||||||
def extract_base_room_name(daily_room_name: DailyRoomName) -> NonEmptyString:
|
def extract_base_room_name(daily_room_name: DailyRoomName) -> NonEmptyString:
|
||||||
"""
|
"""
|
||||||
Extract base room name from Daily.co timestamped room name.
|
Extract base room name from Daily.co timestamped room name.
|
||||||
|
|||||||
@@ -6,9 +6,6 @@ from reflector.db.transcripts import TranscriptParticipant, TranscriptTopic
|
|||||||
from reflector.processors.types import (
|
from reflector.processors.types import (
|
||||||
Transcript as ProcessorTranscript,
|
Transcript as ProcessorTranscript,
|
||||||
)
|
)
|
||||||
from reflector.processors.types import (
|
|
||||||
words_to_segments,
|
|
||||||
)
|
|
||||||
from reflector.schemas.transcript_formats import TranscriptSegment
|
from reflector.schemas.transcript_formats import TranscriptSegment
|
||||||
from reflector.utils.webvtt import seconds_to_timestamp
|
from reflector.utils.webvtt import seconds_to_timestamp
|
||||||
|
|
||||||
@@ -32,7 +29,9 @@ def format_timestamp_mmss(seconds: float | int) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def transcript_to_text(
|
def transcript_to_text(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Convert transcript topics to plain text with speaker names."""
|
"""Convert transcript topics to plain text with speaker names."""
|
||||||
lines = []
|
lines = []
|
||||||
@@ -41,7 +40,7 @@ def transcript_to_text(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
transcript = ProcessorTranscript(words=topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
segments = transcript.as_segments()
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
@@ -52,7 +51,9 @@ def transcript_to_text(
|
|||||||
|
|
||||||
|
|
||||||
def transcript_to_text_timestamped(
|
def transcript_to_text_timestamped(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Convert transcript topics to timestamped text with speaker names."""
|
"""Convert transcript topics to timestamped text with speaker names."""
|
||||||
lines = []
|
lines = []
|
||||||
@@ -61,7 +62,7 @@ def transcript_to_text_timestamped(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
transcript = ProcessorTranscript(words=topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
segments = transcript.as_segments()
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
@@ -73,7 +74,9 @@ def transcript_to_text_timestamped(
|
|||||||
|
|
||||||
|
|
||||||
def topics_to_webvtt_named(
|
def topics_to_webvtt_named(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Convert transcript topics to WebVTT format with participant names."""
|
"""Convert transcript topics to WebVTT format with participant names."""
|
||||||
vtt = webvtt.WebVTT()
|
vtt = webvtt.WebVTT()
|
||||||
@@ -82,7 +85,8 @@ def topics_to_webvtt_named(
|
|||||||
if not topic.words:
|
if not topic.words:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
segments = words_to_segments(topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
@@ -100,19 +104,23 @@ def topics_to_webvtt_named(
|
|||||||
|
|
||||||
|
|
||||||
def transcript_to_json_segments(
|
def transcript_to_json_segments(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> list[TranscriptSegment]:
|
) -> list[TranscriptSegment]:
|
||||||
"""Convert transcript topics to a flat list of JSON segments."""
|
"""Convert transcript topics to a flat list of JSON segments."""
|
||||||
segments = []
|
result = []
|
||||||
|
|
||||||
for topic in topics:
|
for topic in topics:
|
||||||
if not topic.words:
|
if not topic.words:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
transcript = ProcessorTranscript(words=topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
for segment in transcript.as_segments():
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
segments.append(
|
result.append(
|
||||||
TranscriptSegment(
|
TranscriptSegment(
|
||||||
speaker=segment.speaker,
|
speaker=segment.speaker,
|
||||||
speaker_name=speaker_name,
|
speaker_name=speaker_name,
|
||||||
@@ -122,4 +130,4 @@ def transcript_to_json_segments(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return segments
|
return result
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from pydantic import (
|
|||||||
|
|
||||||
import reflector.auth as auth
|
import reflector.auth as auth
|
||||||
from reflector.db import get_database
|
from reflector.db import get_database
|
||||||
|
from reflector.db.recordings import recordings_controller
|
||||||
from reflector.db.search import (
|
from reflector.db.search import (
|
||||||
DEFAULT_SEARCH_LIMIT,
|
DEFAULT_SEARCH_LIMIT,
|
||||||
SearchLimit,
|
SearchLimit,
|
||||||
@@ -60,6 +61,14 @@ ALGORITHM = "HS256"
|
|||||||
DOWNLOAD_EXPIRE_MINUTES = 60
|
DOWNLOAD_EXPIRE_MINUTES = 60
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_is_multitrack(transcript) -> bool:
|
||||||
|
"""Detect if transcript is from multitrack recording."""
|
||||||
|
if not transcript.recording_id:
|
||||||
|
return False
|
||||||
|
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||||
|
return recording is not None and recording.is_multitrack
|
||||||
|
|
||||||
|
|
||||||
def create_access_token(data: dict, expires_delta: timedelta):
|
def create_access_token(data: dict, expires_delta: timedelta):
|
||||||
to_encode = data.copy()
|
to_encode = data.copy()
|
||||||
expire = datetime.now(timezone.utc) + expires_delta
|
expire = datetime.now(timezone.utc) + expires_delta
|
||||||
@@ -360,7 +369,7 @@ class GetTranscriptTopic(BaseModel):
|
|||||||
segments: list[GetTranscriptSegmentTopic] = []
|
segments: list[GetTranscriptSegmentTopic] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_transcript_topic(cls, topic: TranscriptTopic):
|
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
|
||||||
if not topic.words:
|
if not topic.words:
|
||||||
# In previous version, words were missing
|
# In previous version, words were missing
|
||||||
# Just output a segment with speaker 0
|
# Just output a segment with speaker 0
|
||||||
@@ -384,7 +393,7 @@ class GetTranscriptTopic(BaseModel):
|
|||||||
start=segment.start,
|
start=segment.start,
|
||||||
speaker=segment.speaker,
|
speaker=segment.speaker,
|
||||||
)
|
)
|
||||||
for segment in transcript.as_segments()
|
for segment in transcript.as_segments(is_multitrack)
|
||||||
]
|
]
|
||||||
return cls(
|
return cls(
|
||||||
id=topic.id,
|
id=topic.id,
|
||||||
@@ -401,8 +410,8 @@ class GetTranscriptTopicWithWords(GetTranscriptTopic):
|
|||||||
words: list[Word] = []
|
words: list[Word] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_transcript_topic(cls, topic: TranscriptTopic):
|
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
|
||||||
instance = super().from_transcript_topic(topic)
|
instance = super().from_transcript_topic(topic, is_multitrack)
|
||||||
if topic.words:
|
if topic.words:
|
||||||
instance.words = topic.words
|
instance.words = topic.words
|
||||||
return instance
|
return instance
|
||||||
@@ -417,8 +426,8 @@ class GetTranscriptTopicWithWordsPerSpeaker(GetTranscriptTopic):
|
|||||||
words_per_speaker: list[SpeakerWords] = []
|
words_per_speaker: list[SpeakerWords] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_transcript_topic(cls, topic: TranscriptTopic):
|
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
|
||||||
instance = super().from_transcript_topic(topic)
|
instance = super().from_transcript_topic(topic, is_multitrack)
|
||||||
if topic.words:
|
if topic.words:
|
||||||
words_per_speakers = []
|
words_per_speakers = []
|
||||||
# group words by speaker
|
# group words by speaker
|
||||||
@@ -457,6 +466,8 @@ async def transcript_get(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
base_data = {
|
base_data = {
|
||||||
"id": transcript.id,
|
"id": transcript.id,
|
||||||
"user_id": transcript.user_id,
|
"user_id": transcript.user_id,
|
||||||
@@ -483,14 +494,16 @@ async def transcript_get(
|
|||||||
return GetTranscriptWithText(
|
return GetTranscriptWithText(
|
||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="text",
|
transcript_format="text",
|
||||||
transcript=transcript_to_text(transcript.topics, transcript.participants),
|
transcript=transcript_to_text(
|
||||||
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
|
),
|
||||||
)
|
)
|
||||||
elif transcript_format == "text-timestamped":
|
elif transcript_format == "text-timestamped":
|
||||||
return GetTranscriptWithTextTimestamped(
|
return GetTranscriptWithTextTimestamped(
|
||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="text-timestamped",
|
transcript_format="text-timestamped",
|
||||||
transcript=transcript_to_text_timestamped(
|
transcript=transcript_to_text_timestamped(
|
||||||
transcript.topics, transcript.participants
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
elif transcript_format == "webvtt-named":
|
elif transcript_format == "webvtt-named":
|
||||||
@@ -498,7 +511,7 @@ async def transcript_get(
|
|||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="webvtt-named",
|
transcript_format="webvtt-named",
|
||||||
transcript=topics_to_webvtt_named(
|
transcript=topics_to_webvtt_named(
|
||||||
transcript.topics, transcript.participants
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
elif transcript_format == "json":
|
elif transcript_format == "json":
|
||||||
@@ -506,7 +519,7 @@ async def transcript_get(
|
|||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="json",
|
transcript_format="json",
|
||||||
transcript=transcript_to_json_segments(
|
transcript=transcript_to_json_segments(
|
||||||
transcript.topics, transcript.participants
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -565,9 +578,12 @@ async def transcript_get_topics(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
# convert to GetTranscriptTopic
|
# convert to GetTranscriptTopic
|
||||||
return [
|
return [
|
||||||
GetTranscriptTopic.from_transcript_topic(topic) for topic in transcript.topics
|
GetTranscriptTopic.from_transcript_topic(topic, is_multitrack)
|
||||||
|
for topic in transcript.topics
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -584,9 +600,11 @@ async def transcript_get_topics_with_words(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
# convert to GetTranscriptTopicWithWords
|
# convert to GetTranscriptTopicWithWords
|
||||||
return [
|
return [
|
||||||
GetTranscriptTopicWithWords.from_transcript_topic(topic)
|
GetTranscriptTopicWithWords.from_transcript_topic(topic, is_multitrack)
|
||||||
for topic in transcript.topics
|
for topic in transcript.topics
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -605,13 +623,17 @@ async def transcript_get_topics_with_words_per_speaker(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
# get the topic from the transcript
|
# get the topic from the transcript
|
||||||
topic = next((t for t in transcript.topics if t.id == topic_id), None)
|
topic = next((t for t in transcript.topics if t.id == topic_id), None)
|
||||||
if not topic:
|
if not topic:
|
||||||
raise HTTPException(status_code=404, detail="Topic not found")
|
raise HTTPException(status_code=404, detail="Topic not found")
|
||||||
|
|
||||||
# convert to GetTranscriptTopicWithWordsPerSpeaker
|
# convert to GetTranscriptTopicWithWordsPerSpeaker
|
||||||
return GetTranscriptTopicWithWordsPerSpeaker.from_transcript_topic(topic)
|
return GetTranscriptTopicWithWordsPerSpeaker.from_transcript_topic(
|
||||||
|
topic, is_multitrack
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/transcripts/{transcript_id}/zulip")
|
@router.post("/transcripts/{transcript_id}/zulip")
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from typing import List
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
import av
|
import av
|
||||||
@@ -11,7 +12,7 @@ from celery import shared_task
|
|||||||
from celery.utils.log import get_task_logger
|
from celery.utils.log import get_task_logger
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
from reflector.dailyco_api import MeetingParticipantsResponse
|
from reflector.dailyco_api import MeetingParticipantsResponse, RecordingResponse
|
||||||
from reflector.db.daily_participant_sessions import (
|
from reflector.db.daily_participant_sessions import (
|
||||||
DailyParticipantSession,
|
DailyParticipantSession,
|
||||||
daily_participant_sessions_controller,
|
daily_participant_sessions_controller,
|
||||||
@@ -38,6 +39,7 @@ from reflector.storage import get_transcripts_storage
|
|||||||
from reflector.utils.daily import (
|
from reflector.utils.daily import (
|
||||||
DailyRoomName,
|
DailyRoomName,
|
||||||
extract_base_room_name,
|
extract_base_room_name,
|
||||||
|
filter_cam_audio_tracks,
|
||||||
parse_daily_recording_filename,
|
parse_daily_recording_filename,
|
||||||
recording_lock_key,
|
recording_lock_key,
|
||||||
)
|
)
|
||||||
@@ -338,7 +340,9 @@ async def _process_multitrack_recording_inner(
|
|||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
for idx, key in enumerate(track_keys):
|
cam_audio_keys = filter_cam_audio_tracks(track_keys)
|
||||||
|
|
||||||
|
for idx, key in enumerate(cam_audio_keys):
|
||||||
try:
|
try:
|
||||||
parsed = parse_daily_recording_filename(key)
|
parsed = parse_daily_recording_filename(key)
|
||||||
participant_id = parsed.participant_id
|
participant_id = parsed.participant_id
|
||||||
@@ -366,7 +370,7 @@ async def _process_multitrack_recording_inner(
|
|||||||
task_pipeline_multitrack_process.delay(
|
task_pipeline_multitrack_process.delay(
|
||||||
transcript_id=transcript.id,
|
transcript_id=transcript.id,
|
||||||
bucket_name=bucket_name,
|
bucket_name=bucket_name,
|
||||||
track_keys=track_keys,
|
track_keys=filter_cam_audio_tracks(track_keys),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -391,7 +395,7 @@ async def poll_daily_recordings():
|
|||||||
|
|
||||||
async with create_platform_client("daily") as daily_client:
|
async with create_platform_client("daily") as daily_client:
|
||||||
# latest 100. TODO cursor-based state
|
# latest 100. TODO cursor-based state
|
||||||
api_recordings = await daily_client.list_recordings()
|
api_recordings: List[RecordingResponse] = await daily_client.list_recordings()
|
||||||
|
|
||||||
if not api_recordings:
|
if not api_recordings:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -422,17 +426,19 @@ async def poll_daily_recordings():
|
|||||||
|
|
||||||
for recording in missing_recordings:
|
for recording in missing_recordings:
|
||||||
if not recording.tracks:
|
if not recording.tracks:
|
||||||
assert recording.status != "finished", (
|
if recording.status == "finished":
|
||||||
f"Recording {recording.id} has status='finished' but no tracks. "
|
logger.warning(
|
||||||
f"Daily.co API guarantees finished recordings have tracks available. "
|
"Finished recording has no tracks (no audio captured)",
|
||||||
f"room_name={recording.room_name}"
|
recording_id=recording.id,
|
||||||
)
|
room_name=recording.room_name,
|
||||||
logger.debug(
|
)
|
||||||
"No tracks in recording yet",
|
else:
|
||||||
recording_id=recording.id,
|
logger.debug(
|
||||||
room_name=recording.room_name,
|
"No tracks in recording yet",
|
||||||
status=recording.status,
|
recording_id=recording.id,
|
||||||
)
|
room_name=recording.room_name,
|
||||||
|
status=recording.status,
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
||||||
|
|||||||
@@ -159,3 +159,78 @@ def test_processor_transcript_segment():
|
|||||||
assert segments[3].start == 30.72
|
assert segments[3].start == 30.72
|
||||||
assert segments[4].start == 31.56
|
assert segments[4].start == 31.56
|
||||||
assert segments[5].start == 32.38
|
assert segments[5].start == 32.38
|
||||||
|
|
||||||
|
|
||||||
|
def test_processor_transcript_segment_multitrack_interleaved():
|
||||||
|
"""Test as_segments(is_multitrack=True) with interleaved speakers.
|
||||||
|
|
||||||
|
Multitrack recordings have words from different speakers sorted by start time,
|
||||||
|
causing frequent speaker alternation. The multitrack mode should group by
|
||||||
|
speaker first, then split into sentences.
|
||||||
|
"""
|
||||||
|
from reflector.processors.types import Transcript, Word
|
||||||
|
|
||||||
|
# Simulate real multitrack data: words sorted by start time, speakers interleave
|
||||||
|
# Speaker 0 says: "Hello there."
|
||||||
|
# Speaker 1 says: "I'm good."
|
||||||
|
# When sorted by time, words interleave
|
||||||
|
transcript = Transcript(
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default behavior (is_multitrack=False): breaks on every speaker change = 4 segments
|
||||||
|
segments_default = transcript.as_segments(is_multitrack=False)
|
||||||
|
assert len(segments_default) == 4
|
||||||
|
|
||||||
|
# Multitrack behavior: groups by speaker, then sentences = 2 segments
|
||||||
|
segments_multitrack = transcript.as_segments(is_multitrack=True)
|
||||||
|
assert len(segments_multitrack) == 2
|
||||||
|
|
||||||
|
# Check content - sorted by start time
|
||||||
|
assert segments_multitrack[0].speaker == 0
|
||||||
|
assert segments_multitrack[0].text == "Hello there."
|
||||||
|
assert segments_multitrack[0].start == 0.0
|
||||||
|
assert segments_multitrack[0].end == 1.0
|
||||||
|
|
||||||
|
assert segments_multitrack[1].speaker == 1
|
||||||
|
assert segments_multitrack[1].text == "I'm good."
|
||||||
|
assert segments_multitrack[1].start == 0.5
|
||||||
|
assert segments_multitrack[1].end == 1.5
|
||||||
|
|
||||||
|
|
||||||
|
def test_processor_transcript_segment_multitrack_overlapping_timestamps():
|
||||||
|
"""Test multitrack with exactly overlapping timestamps (real Daily.co data pattern)."""
|
||||||
|
from reflector.processors.types import Transcript, Word
|
||||||
|
|
||||||
|
# Real pattern from transcript 38d84d57: words with identical timestamps
|
||||||
|
transcript = Transcript(
|
||||||
|
words=[
|
||||||
|
Word(text="speaking ", start=6.71, end=7.11, speaker=0),
|
||||||
|
Word(text="Speaking ", start=6.71, end=7.11, speaker=1),
|
||||||
|
Word(text="at ", start=7.11, end=7.27, speaker=0),
|
||||||
|
Word(text="at ", start=7.11, end=7.27, speaker=1),
|
||||||
|
Word(text="the ", start=7.27, end=7.43, speaker=0),
|
||||||
|
Word(text="the ", start=7.27, end=7.43, speaker=1),
|
||||||
|
Word(text="same ", start=7.43, end=7.59, speaker=0),
|
||||||
|
Word(text="same ", start=7.43, end=7.59, speaker=1),
|
||||||
|
Word(text="time.", start=7.59, end=8.0, speaker=0),
|
||||||
|
Word(text="time.", start=7.59, end=8.0, speaker=1),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default: 10 segments (one per speaker change)
|
||||||
|
segments_default = transcript.as_segments(is_multitrack=False)
|
||||||
|
assert len(segments_default) == 10
|
||||||
|
|
||||||
|
# Multitrack: 2 segments (one per speaker sentence)
|
||||||
|
segments_multitrack = transcript.as_segments(is_multitrack=True)
|
||||||
|
assert len(segments_multitrack) == 2
|
||||||
|
|
||||||
|
# Both should have complete sentences
|
||||||
|
assert "speaking at the same time." in segments_multitrack[0].text
|
||||||
|
assert "Speaking at the same time." in segments_multitrack[1].text
|
||||||
|
|||||||
@@ -273,8 +273,17 @@ async def test_transcript_formats_with_multiple_speakers():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcript_formats_with_overlapping_speakers():
|
async def test_transcript_formats_with_overlapping_speakers_multitrack():
|
||||||
"""Test format conversion when multiple speakers speak at the same time (overlapping timestamps)."""
|
"""Test format conversion for multitrack recordings with truly interleaved words.
|
||||||
|
|
||||||
|
Multitrack recordings have words from different speakers sorted by start time,
|
||||||
|
causing frequent speaker alternation. This tests the sentence-based segmentation
|
||||||
|
that groups each speaker's words into complete sentences.
|
||||||
|
"""
|
||||||
|
# Real multitrack data: words sorted by start time, speakers interleave
|
||||||
|
# Alice says: "Hello there." (0.0-1.0)
|
||||||
|
# Bob says: "I'm good." (0.5-1.5)
|
||||||
|
# When sorted by time, words interleave: Hello, I'm, there., good.
|
||||||
topics = [
|
topics = [
|
||||||
TranscriptTopic(
|
TranscriptTopic(
|
||||||
id="1",
|
id="1",
|
||||||
@@ -282,11 +291,10 @@ async def test_transcript_formats_with_overlapping_speakers():
|
|||||||
summary="Summary 1",
|
summary="Summary 1",
|
||||||
timestamp=0.0,
|
timestamp=0.0,
|
||||||
words=[
|
words=[
|
||||||
Word(text="Hello", start=0.0, end=0.5, speaker=0),
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
Word(text=" there.", start=0.5, end=1.0, speaker=0),
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
# Speaker 1 overlaps with speaker 0 at 0.5-1.0
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
Word(text="I'm", start=0.5, end=1.0, speaker=1),
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
Word(text=" good.", start=1.0, end=1.5, speaker=1),
|
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
@@ -296,20 +304,9 @@ async def test_transcript_formats_with_overlapping_speakers():
|
|||||||
TranscriptParticipant(id="2", speaker=1, name="Bob"),
|
TranscriptParticipant(id="2", speaker=1, name="Bob"),
|
||||||
]
|
]
|
||||||
|
|
||||||
text_result = transcript_to_text(topics, participants)
|
# With is_multitrack=True, should produce 2 segments (one per speaker sentence)
|
||||||
lines = text_result.split("\n")
|
# not 4 segments (one per speaker change)
|
||||||
assert len(lines) >= 2
|
webvtt_result = topics_to_webvtt_named(topics, participants, is_multitrack=True)
|
||||||
assert any("Alice:" in line for line in lines)
|
|
||||||
assert any("Bob:" in line for line in lines)
|
|
||||||
|
|
||||||
timestamped_result = transcript_to_text_timestamped(topics, participants)
|
|
||||||
timestamped_lines = timestamped_result.split("\n")
|
|
||||||
assert len(timestamped_lines) >= 2
|
|
||||||
assert any("Alice:" in line for line in timestamped_lines)
|
|
||||||
assert any("Bob:" in line for line in timestamped_lines)
|
|
||||||
assert any("[00:00]" in line for line in timestamped_lines)
|
|
||||||
|
|
||||||
webvtt_result = topics_to_webvtt_named(topics, participants)
|
|
||||||
expected_webvtt = """WEBVTT
|
expected_webvtt = """WEBVTT
|
||||||
|
|
||||||
00:00:00.000 --> 00:00:01.000
|
00:00:00.000 --> 00:00:01.000
|
||||||
@@ -320,23 +317,26 @@ async def test_transcript_formats_with_overlapping_speakers():
|
|||||||
"""
|
"""
|
||||||
assert webvtt_result == expected_webvtt
|
assert webvtt_result == expected_webvtt
|
||||||
|
|
||||||
segments = transcript_to_json_segments(topics, participants)
|
text_result = transcript_to_text(topics, participants, is_multitrack=True)
|
||||||
assert len(segments) >= 2
|
lines = text_result.split("\n")
|
||||||
speakers = {seg.speaker for seg in segments}
|
assert len(lines) == 2
|
||||||
assert 0 in speakers and 1 in speakers
|
assert "Alice: Hello there." in lines[0]
|
||||||
|
assert "Bob: I'm good." in lines[1]
|
||||||
|
|
||||||
alice_seg = next(seg for seg in segments if seg.speaker == 0)
|
timestamped_result = transcript_to_text_timestamped(
|
||||||
bob_seg = next(seg for seg in segments if seg.speaker == 1)
|
topics, participants, is_multitrack=True
|
||||||
|
)
|
||||||
|
timestamped_lines = timestamped_result.split("\n")
|
||||||
|
assert len(timestamped_lines) == 2
|
||||||
|
assert "[00:00] Alice: Hello there." in timestamped_lines[0]
|
||||||
|
assert "[00:00] Bob: I'm good." in timestamped_lines[1]
|
||||||
|
|
||||||
# Verify timestamps overlap: Alice (0.0-1.0) and Bob (0.5-1.5) overlap at 0.5-1.0
|
segments = transcript_to_json_segments(topics, participants, is_multitrack=True)
|
||||||
assert alice_seg.start < bob_seg.end, "Alice segment should start before Bob ends"
|
assert len(segments) == 2
|
||||||
assert bob_seg.start < alice_seg.end, "Bob segment should start before Alice ends"
|
assert segments[0].speaker_name == "Alice"
|
||||||
|
assert segments[0].text == "Hello there."
|
||||||
overlap_start = max(alice_seg.start, bob_seg.start)
|
assert segments[1].speaker_name == "Bob"
|
||||||
overlap_end = min(alice_seg.end, bob_seg.end)
|
assert segments[1].text == "I'm good."
|
||||||
assert (
|
|
||||||
overlap_start < overlap_end
|
|
||||||
), f"Segments should overlap between {overlap_start} and {overlap_end}"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -573,3 +573,207 @@ async def test_api_transcript_format_default_is_text(client):
|
|||||||
|
|
||||||
assert data["transcript_format"] == "text"
|
assert data["transcript_format"] == "text"
|
||||||
assert "transcript" in data
|
assert "transcript" in data
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_api_topics_endpoint_multitrack_segmentation(client):
|
||||||
|
"""Test GET /transcripts/{id}/topics uses sentence-based segmentation for multitrack.
|
||||||
|
|
||||||
|
This tests the fix for TASKS2.md - ensuring /topics endpoints correctly detect
|
||||||
|
multitrack recordings and use sentence-based segmentation instead of fragmenting
|
||||||
|
on every speaker change.
|
||||||
|
"""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from reflector.db.recordings import Recording, recordings_controller
|
||||||
|
from reflector.db.transcripts import (
|
||||||
|
TranscriptParticipant,
|
||||||
|
TranscriptTopic,
|
||||||
|
transcripts_controller,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
# Create a multitrack recording (has track_keys)
|
||||||
|
recording = Recording(
|
||||||
|
bucket_name="test-bucket",
|
||||||
|
object_key="test-key",
|
||||||
|
recorded_at=datetime.now(timezone.utc),
|
||||||
|
track_keys=["track1.webm", "track2.webm"], # This makes it multitrack
|
||||||
|
)
|
||||||
|
await recordings_controller.create(recording)
|
||||||
|
|
||||||
|
# Create transcript linked to the recording
|
||||||
|
transcript = await transcripts_controller.add(
|
||||||
|
name="Multitrack Test",
|
||||||
|
source_kind="file",
|
||||||
|
recording_id=recording.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"participants": [
|
||||||
|
TranscriptParticipant(id="1", speaker=0, name="Alice").model_dump(),
|
||||||
|
TranscriptParticipant(id="2", speaker=1, name="Bob").model_dump(),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add interleaved words (as they appear in real multitrack data)
|
||||||
|
await transcripts_controller.upsert_topic(
|
||||||
|
transcript,
|
||||||
|
TranscriptTopic(
|
||||||
|
title="Topic 1",
|
||||||
|
summary="Summary 1",
|
||||||
|
timestamp=0,
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test /topics endpoint
|
||||||
|
response = await client.get(f"/transcripts/{transcript.id}/topics")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
topic = data[0]
|
||||||
|
|
||||||
|
# Key assertion: multitrack should produce 2 segments (one per speaker sentence)
|
||||||
|
# Not 4 segments (one per speaker change)
|
||||||
|
assert len(topic["segments"]) == 2
|
||||||
|
|
||||||
|
# Check content
|
||||||
|
segment_texts = [s["text"] for s in topic["segments"]]
|
||||||
|
assert "Hello there." in segment_texts
|
||||||
|
assert "I'm good." in segment_texts
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_api_topics_endpoint_non_multitrack_segmentation(client):
|
||||||
|
"""Test GET /transcripts/{id}/topics uses default segmentation for non-multitrack.
|
||||||
|
|
||||||
|
Ensures backward compatibility - transcripts without multitrack recordings
|
||||||
|
should continue using the default speaker-change-based segmentation.
|
||||||
|
"""
|
||||||
|
from reflector.db.transcripts import (
|
||||||
|
TranscriptParticipant,
|
||||||
|
TranscriptTopic,
|
||||||
|
transcripts_controller,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
# Create transcript WITHOUT recording (defaulted as not multitrack) TODO better heuristic
|
||||||
|
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||||
|
assert response.status_code == 200
|
||||||
|
tid = response.json()["id"]
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(tid)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"participants": [
|
||||||
|
TranscriptParticipant(id="1", speaker=0, name="Alice").model_dump(),
|
||||||
|
TranscriptParticipant(id="2", speaker=1, name="Bob").model_dump(),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add interleaved words
|
||||||
|
await transcripts_controller.upsert_topic(
|
||||||
|
transcript,
|
||||||
|
TranscriptTopic(
|
||||||
|
title="Topic 1",
|
||||||
|
summary="Summary 1",
|
||||||
|
timestamp=0,
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test /topics endpoint
|
||||||
|
response = await client.get(f"/transcripts/{tid}/topics")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
topic = data[0]
|
||||||
|
|
||||||
|
# Non-multitrack: should produce 4 segments (one per speaker change)
|
||||||
|
assert len(topic["segments"]) == 4
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_api_topics_with_words_endpoint_multitrack(client):
|
||||||
|
"""Test GET /transcripts/{id}/topics/with-words uses multitrack segmentation."""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from reflector.db.recordings import Recording, recordings_controller
|
||||||
|
from reflector.db.transcripts import (
|
||||||
|
TranscriptParticipant,
|
||||||
|
TranscriptTopic,
|
||||||
|
transcripts_controller,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
# Create multitrack recording
|
||||||
|
recording = Recording(
|
||||||
|
bucket_name="test-bucket",
|
||||||
|
object_key="test-key-2",
|
||||||
|
recorded_at=datetime.now(timezone.utc),
|
||||||
|
track_keys=["track1.webm", "track2.webm"],
|
||||||
|
)
|
||||||
|
await recordings_controller.create(recording)
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.add(
|
||||||
|
name="Multitrack Test 2",
|
||||||
|
source_kind="file",
|
||||||
|
recording_id=recording.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"participants": [
|
||||||
|
TranscriptParticipant(id="1", speaker=0, name="Alice").model_dump(),
|
||||||
|
TranscriptParticipant(id="2", speaker=1, name="Bob").model_dump(),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.upsert_topic(
|
||||||
|
transcript,
|
||||||
|
TranscriptTopic(
|
||||||
|
title="Topic 1",
|
||||||
|
summary="Summary 1",
|
||||||
|
timestamp=0,
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.get(f"/transcripts/{transcript.id}/topics/with-words")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
topic = data[0]
|
||||||
|
|
||||||
|
# Should have 2 segments (multitrack sentence-based)
|
||||||
|
assert len(topic["segments"]) == 2
|
||||||
|
# Should also have words field
|
||||||
|
assert "words" in topic
|
||||||
|
assert len(topic["words"]) == 4
|
||||||
|
|||||||
Reference in New Issue
Block a user