feat: Multitrack segmentation (#747)

* segmentation multitrack (no-mistakes)

* segmentation multitrack (no-mistakes)

* self review

* self review

* recording poll daily doc

* filter cam_audio tracks to remove screensharing from daily processing

* pr review

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
Igor Monadical
2025-11-26 16:21:32 -05:00
committed by GitHub
parent 8d696aa775
commit d63040e2fd
8 changed files with 485 additions and 81 deletions

View File

@@ -6,9 +6,6 @@ from reflector.db.transcripts import TranscriptParticipant, TranscriptTopic
from reflector.processors.types import (
Transcript as ProcessorTranscript,
)
from reflector.processors.types import (
words_to_segments,
)
from reflector.schemas.transcript_formats import TranscriptSegment
from reflector.utils.webvtt import seconds_to_timestamp
@@ -32,7 +29,9 @@ def format_timestamp_mmss(seconds: float | int) -> str:
def transcript_to_text(
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
topics: list[TranscriptTopic],
participants: list[TranscriptParticipant] | None,
is_multitrack: bool = False,
) -> str:
"""Convert transcript topics to plain text with speaker names."""
lines = []
@@ -41,7 +40,7 @@ def transcript_to_text(
continue
transcript = ProcessorTranscript(words=topic.words)
segments = transcript.as_segments()
segments = transcript.as_segments(is_multitrack)
for segment in segments:
speaker_name = get_speaker_name(segment.speaker, participants)
@@ -52,7 +51,9 @@ def transcript_to_text(
def transcript_to_text_timestamped(
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
topics: list[TranscriptTopic],
participants: list[TranscriptParticipant] | None,
is_multitrack: bool = False,
) -> str:
"""Convert transcript topics to timestamped text with speaker names."""
lines = []
@@ -61,7 +62,7 @@ def transcript_to_text_timestamped(
continue
transcript = ProcessorTranscript(words=topic.words)
segments = transcript.as_segments()
segments = transcript.as_segments(is_multitrack)
for segment in segments:
speaker_name = get_speaker_name(segment.speaker, participants)
@@ -73,7 +74,9 @@ def transcript_to_text_timestamped(
def topics_to_webvtt_named(
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
topics: list[TranscriptTopic],
participants: list[TranscriptParticipant] | None,
is_multitrack: bool = False,
) -> str:
"""Convert transcript topics to WebVTT format with participant names."""
vtt = webvtt.WebVTT()
@@ -82,7 +85,8 @@ def topics_to_webvtt_named(
if not topic.words:
continue
segments = words_to_segments(topic.words)
transcript = ProcessorTranscript(words=topic.words)
segments = transcript.as_segments(is_multitrack)
for segment in segments:
speaker_name = get_speaker_name(segment.speaker, participants)
@@ -100,19 +104,23 @@ def topics_to_webvtt_named(
def transcript_to_json_segments(
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
topics: list[TranscriptTopic],
participants: list[TranscriptParticipant] | None,
is_multitrack: bool = False,
) -> list[TranscriptSegment]:
"""Convert transcript topics to a flat list of JSON segments."""
segments = []
result = []
for topic in topics:
if not topic.words:
continue
transcript = ProcessorTranscript(words=topic.words)
for segment in transcript.as_segments():
segments = transcript.as_segments(is_multitrack)
for segment in segments:
speaker_name = get_speaker_name(segment.speaker, participants)
segments.append(
result.append(
TranscriptSegment(
speaker=segment.speaker,
speaker_name=speaker_name,
@@ -122,4 +130,4 @@ def transcript_to_json_segments(
)
)
return segments
return result