feat: Multitrack segmentation (#747)

* segmentation multitrack (no-mistakes)

* segmentation multitrack (no-mistakes)

* self review

* self review

* recording poll daily doc

* filter cam_audio tracks to remove screensharing from daily processing

* pr review

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
2025-11-26 16:21:32 -05:00
committed by GitHub
parent 8d696aa775
commit d63040e2fd
8 changed files with 485 additions and 81 deletions

View File

@@ -16,6 +16,7 @@ from pydantic import (
import reflector.auth as auth
from reflector.db import get_database
from reflector.db.recordings import recordings_controller
from reflector.db.search import (
DEFAULT_SEARCH_LIMIT,
SearchLimit,
@@ -60,6 +61,14 @@ ALGORITHM = "HS256"
DOWNLOAD_EXPIRE_MINUTES = 60
async def _get_is_multitrack(transcript) -> bool:
"""Detect if transcript is from multitrack recording."""
if not transcript.recording_id:
return False
recording = await recordings_controller.get_by_id(transcript.recording_id)
return recording is not None and recording.is_multitrack
def create_access_token(data: dict, expires_delta: timedelta):
to_encode = data.copy()
expire = datetime.now(timezone.utc) + expires_delta
@@ -360,7 +369,7 @@ class GetTranscriptTopic(BaseModel):
segments: list[GetTranscriptSegmentTopic] = []
@classmethod
def from_transcript_topic(cls, topic: TranscriptTopic):
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
if not topic.words:
# In previous version, words were missing
# Just output a segment with speaker 0
@@ -384,7 +393,7 @@ class GetTranscriptTopic(BaseModel):
start=segment.start,
speaker=segment.speaker,
)
for segment in transcript.as_segments()
for segment in transcript.as_segments(is_multitrack)
]
return cls(
id=topic.id,
@@ -401,8 +410,8 @@ class GetTranscriptTopicWithWords(GetTranscriptTopic):
words: list[Word] = []
@classmethod
def from_transcript_topic(cls, topic: TranscriptTopic):
instance = super().from_transcript_topic(topic)
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
instance = super().from_transcript_topic(topic, is_multitrack)
if topic.words:
instance.words = topic.words
return instance
@@ -417,8 +426,8 @@ class GetTranscriptTopicWithWordsPerSpeaker(GetTranscriptTopic):
words_per_speaker: list[SpeakerWords] = []
@classmethod
def from_transcript_topic(cls, topic: TranscriptTopic):
instance = super().from_transcript_topic(topic)
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
instance = super().from_transcript_topic(topic, is_multitrack)
if topic.words:
words_per_speakers = []
# group words by speaker
@@ -457,6 +466,8 @@ async def transcript_get(
transcript_id, user_id=user_id
)
is_multitrack = await _get_is_multitrack(transcript)
base_data = {
"id": transcript.id,
"user_id": transcript.user_id,
@@ -483,14 +494,16 @@ async def transcript_get(
return GetTranscriptWithText(
**base_data,
transcript_format="text",
transcript=transcript_to_text(transcript.topics, transcript.participants),
transcript=transcript_to_text(
transcript.topics, transcript.participants, is_multitrack
),
)
elif transcript_format == "text-timestamped":
return GetTranscriptWithTextTimestamped(
**base_data,
transcript_format="text-timestamped",
transcript=transcript_to_text_timestamped(
transcript.topics, transcript.participants
transcript.topics, transcript.participants, is_multitrack
),
)
elif transcript_format == "webvtt-named":
@@ -498,7 +511,7 @@ async def transcript_get(
**base_data,
transcript_format="webvtt-named",
transcript=topics_to_webvtt_named(
transcript.topics, transcript.participants
transcript.topics, transcript.participants, is_multitrack
),
)
elif transcript_format == "json":
@@ -506,7 +519,7 @@ async def transcript_get(
**base_data,
transcript_format="json",
transcript=transcript_to_json_segments(
transcript.topics, transcript.participants
transcript.topics, transcript.participants, is_multitrack
),
)
else:
@@ -565,9 +578,12 @@ async def transcript_get_topics(
transcript_id, user_id=user_id
)
is_multitrack = await _get_is_multitrack(transcript)
# convert to GetTranscriptTopic
return [
GetTranscriptTopic.from_transcript_topic(topic) for topic in transcript.topics
GetTranscriptTopic.from_transcript_topic(topic, is_multitrack)
for topic in transcript.topics
]
@@ -584,9 +600,11 @@ async def transcript_get_topics_with_words(
transcript_id, user_id=user_id
)
is_multitrack = await _get_is_multitrack(transcript)
# convert to GetTranscriptTopicWithWords
return [
GetTranscriptTopicWithWords.from_transcript_topic(topic)
GetTranscriptTopicWithWords.from_transcript_topic(topic, is_multitrack)
for topic in transcript.topics
]
@@ -605,13 +623,17 @@ async def transcript_get_topics_with_words_per_speaker(
transcript_id, user_id=user_id
)
is_multitrack = await _get_is_multitrack(transcript)
# get the topic from the transcript
topic = next((t for t in transcript.topics if t.id == topic_id), None)
if not topic:
raise HTTPException(status_code=404, detail="Topic not found")
# convert to GetTranscriptTopicWithWordsPerSpeaker
return GetTranscriptTopicWithWordsPerSpeaker.from_transcript_topic(topic)
return GetTranscriptTopicWithWordsPerSpeaker.from_transcript_topic(
topic, is_multitrack
)
@router.post("/transcripts/{transcript_id}/zulip")