mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 12:49:06 +00:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dabf7251db | |||
|
|
b51b7aa917 | ||
|
|
a8983b4e7e | ||
|
|
fe47c46489 | ||
| a2bb6a27d6 | |||
| 7f0b728991 | |||
| 692895c859 | |||
|
|
d63040e2fd |
22
CHANGELOG.md
22
CHANGELOG.md
@@ -1,5 +1,27 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## [0.22.2](https://github.com/Monadical-SAS/reflector/compare/v0.22.1...v0.22.2) (2025-12-02)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* daily auto refresh fix ([#755](https://github.com/Monadical-SAS/reflector/issues/755)) ([fe47c46](https://github.com/Monadical-SAS/reflector/commit/fe47c46489c5aa0cc538109f7559cc9accb35c01))
|
||||||
|
* Skip mixdown for multitrack ([#760](https://github.com/Monadical-SAS/reflector/issues/760)) ([b51b7aa](https://github.com/Monadical-SAS/reflector/commit/b51b7aa9176c1a53ba57ad99f5e976c804a1e80c))
|
||||||
|
|
||||||
|
## [0.22.1](https://github.com/Monadical-SAS/reflector/compare/v0.22.0...v0.22.1) (2025-11-27)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* participants update from daily ([#749](https://github.com/Monadical-SAS/reflector/issues/749)) ([7f0b728](https://github.com/Monadical-SAS/reflector/commit/7f0b728991c1b9f9aae702c96297eae63b561ef5))
|
||||||
|
|
||||||
|
## [0.22.0](https://github.com/Monadical-SAS/reflector/compare/v0.21.0...v0.22.0) (2025-11-26)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* Multitrack segmentation ([#747](https://github.com/Monadical-SAS/reflector/issues/747)) ([d63040e](https://github.com/Monadical-SAS/reflector/commit/d63040e2fdc07e7b272e85a39eb2411cd6a14798))
|
||||||
|
|
||||||
## [0.21.0](https://github.com/Monadical-SAS/reflector/compare/v0.20.0...v0.21.0) (2025-11-26)
|
## [0.21.0](https://github.com/Monadical-SAS/reflector/compare/v0.20.0...v0.21.0) (2025-11-26)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ class MeetingParticipant(BaseModel):
|
|||||||
Reference: https://docs.daily.co/reference/rest-api/meetings/get-meeting-participants
|
Reference: https://docs.daily.co/reference/rest-api/meetings/get-meeting-participants
|
||||||
"""
|
"""
|
||||||
|
|
||||||
user_id: NonEmptyString = Field(description="User identifier")
|
user_id: NonEmptyString | None = Field(None, description="User identifier")
|
||||||
participant_id: NonEmptyString = Field(description="Participant session identifier")
|
participant_id: NonEmptyString = Field(description="Participant session identifier")
|
||||||
user_name: NonEmptyString | None = Field(None, description="User display name")
|
user_name: NonEmptyString | None = Field(None, description="User display name")
|
||||||
join_time: int = Field(description="Join timestamp (Unix epoch seconds)")
|
join_time: int = Field(description="Join timestamp (Unix epoch seconds)")
|
||||||
|
|||||||
@@ -35,8 +35,15 @@ class Recording(BaseModel):
|
|||||||
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
||||||
meeting_id: str | None = None
|
meeting_id: str | None = None
|
||||||
# for multitrack reprocessing
|
# for multitrack reprocessing
|
||||||
|
# track_keys can be empty list [] if recording finished but no audio was captured (silence/muted)
|
||||||
|
# None means not a multitrack recording, [] means multitrack with no tracks
|
||||||
track_keys: list[str] | None = None
|
track_keys: list[str] | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_multitrack(self) -> bool:
|
||||||
|
"""True if recording has separate audio tracks (1+ tracks counts as multitrack)."""
|
||||||
|
return self.track_keys is not None and len(self.track_keys) > 0
|
||||||
|
|
||||||
|
|
||||||
class RecordingController:
|
class RecordingController:
|
||||||
async def create(self, recording: Recording):
|
async def create(self, recording: Recording):
|
||||||
|
|||||||
@@ -9,7 +9,10 @@ from av.audio.resampler import AudioResampler
|
|||||||
from celery import chain, shared_task
|
from celery import chain, shared_task
|
||||||
|
|
||||||
from reflector.asynctask import asynctask
|
from reflector.asynctask import asynctask
|
||||||
|
from reflector.dailyco_api import MeetingParticipantsResponse
|
||||||
from reflector.db.transcripts import (
|
from reflector.db.transcripts import (
|
||||||
|
Transcript,
|
||||||
|
TranscriptParticipant,
|
||||||
TranscriptStatus,
|
TranscriptStatus,
|
||||||
TranscriptWaveform,
|
TranscriptWaveform,
|
||||||
transcripts_controller,
|
transcripts_controller,
|
||||||
@@ -28,8 +31,14 @@ from reflector.processors import AudioFileWriterProcessor
|
|||||||
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
|
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
|
||||||
from reflector.processors.types import TitleSummary
|
from reflector.processors.types import TitleSummary
|
||||||
from reflector.processors.types import Transcript as TranscriptType
|
from reflector.processors.types import Transcript as TranscriptType
|
||||||
|
from reflector.settings import settings
|
||||||
from reflector.storage import Storage, get_transcripts_storage
|
from reflector.storage import Storage, get_transcripts_storage
|
||||||
|
from reflector.utils.daily import (
|
||||||
|
filter_cam_audio_tracks,
|
||||||
|
parse_daily_recording_filename,
|
||||||
|
)
|
||||||
from reflector.utils.string import NonEmptyString
|
from reflector.utils.string import NonEmptyString
|
||||||
|
from reflector.video_platforms.factory import create_platform_client
|
||||||
|
|
||||||
# Audio encoding constants
|
# Audio encoding constants
|
||||||
OPUS_STANDARD_SAMPLE_RATE = 48000
|
OPUS_STANDARD_SAMPLE_RATE = 48000
|
||||||
@@ -494,6 +503,90 @@ class PipelineMainMultitrack(PipelineMainBase):
|
|||||||
transcript=transcript, event="WAVEFORM", data=waveform
|
transcript=transcript, event="WAVEFORM", data=waveform
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def update_participants_from_daily(
|
||||||
|
self, transcript: Transcript, track_keys: list[str]
|
||||||
|
) -> None:
|
||||||
|
"""Update transcript participants with user_id and names from Daily.co API."""
|
||||||
|
if not transcript.recording_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with create_platform_client("daily") as daily_client:
|
||||||
|
id_to_name = {}
|
||||||
|
id_to_user_id = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
rec_details = await daily_client.get_recording(
|
||||||
|
transcript.recording_id
|
||||||
|
)
|
||||||
|
mtg_session_id = rec_details.mtgSessionId
|
||||||
|
if mtg_session_id:
|
||||||
|
try:
|
||||||
|
payload: MeetingParticipantsResponse = (
|
||||||
|
await daily_client.get_meeting_participants(
|
||||||
|
mtg_session_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for p in payload.data:
|
||||||
|
pid = p.participant_id
|
||||||
|
name = p.user_name
|
||||||
|
user_id = p.user_id
|
||||||
|
if name:
|
||||||
|
id_to_name[pid] = name
|
||||||
|
if user_id:
|
||||||
|
id_to_user_id[pid] = user_id
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(
|
||||||
|
"Failed to fetch Daily meeting participants",
|
||||||
|
error=str(e),
|
||||||
|
mtg_session_id=mtg_session_id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.logger.warning(
|
||||||
|
"No mtgSessionId found for recording; participant names may be generic",
|
||||||
|
recording_id=transcript.recording_id,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(
|
||||||
|
"Failed to fetch Daily recording details",
|
||||||
|
error=str(e),
|
||||||
|
recording_id=transcript.recording_id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
cam_audio_keys = filter_cam_audio_tracks(track_keys)
|
||||||
|
|
||||||
|
for idx, key in enumerate(cam_audio_keys):
|
||||||
|
try:
|
||||||
|
parsed = parse_daily_recording_filename(key)
|
||||||
|
participant_id = parsed.participant_id
|
||||||
|
except ValueError as e:
|
||||||
|
self.logger.error(
|
||||||
|
"Failed to parse Daily recording filename",
|
||||||
|
error=str(e),
|
||||||
|
key=key,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
default_name = f"Speaker {idx}"
|
||||||
|
name = id_to_name.get(participant_id, default_name)
|
||||||
|
user_id = id_to_user_id.get(participant_id)
|
||||||
|
|
||||||
|
participant = TranscriptParticipant(
|
||||||
|
id=participant_id, speaker=idx, name=name, user_id=user_id
|
||||||
|
)
|
||||||
|
await transcripts_controller.upsert_participant(
|
||||||
|
transcript, participant
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(
|
||||||
|
"Failed to map participant names", error=str(e), exc_info=True
|
||||||
|
)
|
||||||
|
|
||||||
async def process(self, bucket_name: str, track_keys: list[str]):
|
async def process(self, bucket_name: str, track_keys: list[str]):
|
||||||
transcript = await self.get_transcript()
|
transcript = await self.get_transcript()
|
||||||
async with self.transaction():
|
async with self.transaction():
|
||||||
@@ -502,9 +595,12 @@ class PipelineMainMultitrack(PipelineMainBase):
|
|||||||
{
|
{
|
||||||
"events": [],
|
"events": [],
|
||||||
"topics": [],
|
"topics": [],
|
||||||
|
"participants": [],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
await self.update_participants_from_daily(transcript, track_keys)
|
||||||
|
|
||||||
source_storage = get_transcripts_storage()
|
source_storage = get_transcripts_storage()
|
||||||
transcript_storage = source_storage
|
transcript_storage = source_storage
|
||||||
|
|
||||||
@@ -536,43 +632,55 @@ class PipelineMainMultitrack(PipelineMainBase):
|
|||||||
|
|
||||||
transcript.data_path.mkdir(parents=True, exist_ok=True)
|
transcript.data_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
mp3_writer = AudioFileWriterProcessor(
|
if settings.SKIP_MIXDOWN:
|
||||||
path=str(transcript.audio_mp3_filename),
|
self.logger.warning(
|
||||||
on_duration=self.on_duration,
|
"SKIP_MIXDOWN enabled: Skipping mixdown and waveform generation. "
|
||||||
)
|
"UI will have no audio playback or waveform.",
|
||||||
await self.mixdown_tracks(padded_track_urls, mp3_writer, offsets_seconds=None)
|
num_tracks=len(padded_track_urls),
|
||||||
await mp3_writer.flush()
|
transcript_id=transcript.id,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
mp3_writer = AudioFileWriterProcessor(
|
||||||
|
path=str(transcript.audio_mp3_filename),
|
||||||
|
on_duration=self.on_duration,
|
||||||
|
)
|
||||||
|
await self.mixdown_tracks(
|
||||||
|
padded_track_urls, mp3_writer, offsets_seconds=None
|
||||||
|
)
|
||||||
|
await mp3_writer.flush()
|
||||||
|
|
||||||
if not transcript.audio_mp3_filename.exists():
|
if not transcript.audio_mp3_filename.exists():
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Mixdown failed - no MP3 file generated. Cannot proceed without playable audio."
|
"Mixdown failed - no MP3 file generated. Cannot proceed without playable audio."
|
||||||
|
)
|
||||||
|
|
||||||
|
storage_path = f"{transcript.id}/audio.mp3"
|
||||||
|
# Use file handle streaming to avoid loading entire MP3 into memory
|
||||||
|
mp3_size = transcript.audio_mp3_filename.stat().st_size
|
||||||
|
with open(transcript.audio_mp3_filename, "rb") as mp3_file:
|
||||||
|
await transcript_storage.put_file(storage_path, mp3_file)
|
||||||
|
mp3_url = await transcript_storage.get_file_url(storage_path)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript, {"audio_location": "storage"}
|
||||||
)
|
)
|
||||||
|
|
||||||
storage_path = f"{transcript.id}/audio.mp3"
|
self.logger.info(
|
||||||
# Use file handle streaming to avoid loading entire MP3 into memory
|
f"Uploaded mixed audio to storage",
|
||||||
mp3_size = transcript.audio_mp3_filename.stat().st_size
|
storage_path=storage_path,
|
||||||
with open(transcript.audio_mp3_filename, "rb") as mp3_file:
|
size=mp3_size,
|
||||||
await transcript_storage.put_file(storage_path, mp3_file)
|
url=mp3_url,
|
||||||
mp3_url = await transcript_storage.get_file_url(storage_path)
|
)
|
||||||
|
|
||||||
await transcripts_controller.update(transcript, {"audio_location": "storage"})
|
self.logger.info("Generating waveform from mixed audio")
|
||||||
|
waveform_processor = AudioWaveformProcessor(
|
||||||
self.logger.info(
|
audio_path=transcript.audio_mp3_filename,
|
||||||
f"Uploaded mixed audio to storage",
|
waveform_path=transcript.audio_waveform_filename,
|
||||||
storage_path=storage_path,
|
on_waveform=self.on_waveform,
|
||||||
size=mp3_size,
|
)
|
||||||
url=mp3_url,
|
waveform_processor.set_pipeline(self.empty_pipeline)
|
||||||
)
|
await waveform_processor.flush()
|
||||||
|
self.logger.info("Waveform generated successfully")
|
||||||
self.logger.info("Generating waveform from mixed audio")
|
|
||||||
waveform_processor = AudioWaveformProcessor(
|
|
||||||
audio_path=transcript.audio_mp3_filename,
|
|
||||||
waveform_path=transcript.audio_waveform_filename,
|
|
||||||
on_waveform=self.on_waveform,
|
|
||||||
)
|
|
||||||
waveform_processor.set_pipeline(self.empty_pipeline)
|
|
||||||
await waveform_processor.flush()
|
|
||||||
self.logger.info("Waveform generated successfully")
|
|
||||||
|
|
||||||
speaker_transcripts: list[TranscriptType] = []
|
speaker_transcripts: list[TranscriptType] = []
|
||||||
for idx, padded_url in enumerate(padded_track_urls):
|
for idx, padded_url in enumerate(padded_track_urls):
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, TypedDict
|
from typing import Annotated, TypedDict
|
||||||
|
|
||||||
@@ -16,6 +17,17 @@ class DiarizationSegment(TypedDict):
|
|||||||
|
|
||||||
|
|
||||||
PUNC_RE = re.compile(r"[.;:?!…]")
|
PUNC_RE = re.compile(r"[.;:?!…]")
|
||||||
|
SENTENCE_END_RE = re.compile(r"[.?!…]$")
|
||||||
|
|
||||||
|
# Max segment length for words_to_segments() - breaks on any punctuation (. ; : ? ! …)
|
||||||
|
# when segment exceeds this limit. Used for non-multitrack recordings.
|
||||||
|
MAX_SEGMENT_CHARS = 120
|
||||||
|
|
||||||
|
# Max segment length for words_to_segments_by_sentence() - only breaks on sentence-ending
|
||||||
|
# punctuation (. ? ! …) when segment exceeds this limit. Higher threshold allows complete
|
||||||
|
# sentences in multitrack recordings where speakers overlap.
|
||||||
|
# similar number to server/reflector/processors/transcript_liner.py
|
||||||
|
MAX_SENTENCE_SEGMENT_CHARS = 1000
|
||||||
|
|
||||||
|
|
||||||
class AudioFile(BaseModel):
|
class AudioFile(BaseModel):
|
||||||
@@ -76,7 +88,6 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
|
|||||||
# but separate if the speaker changes, or if the punctuation is a . , ; : ? !
|
# but separate if the speaker changes, or if the punctuation is a . , ; : ? !
|
||||||
segments = []
|
segments = []
|
||||||
current_segment = None
|
current_segment = None
|
||||||
MAX_SEGMENT_LENGTH = 120
|
|
||||||
|
|
||||||
for word in words:
|
for word in words:
|
||||||
if current_segment is None:
|
if current_segment is None:
|
||||||
@@ -106,7 +117,7 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
|
|||||||
current_segment.end = word.end
|
current_segment.end = word.end
|
||||||
|
|
||||||
have_punc = PUNC_RE.search(word.text)
|
have_punc = PUNC_RE.search(word.text)
|
||||||
if have_punc and (len(current_segment.text) > MAX_SEGMENT_LENGTH):
|
if have_punc and (len(current_segment.text) > MAX_SEGMENT_CHARS):
|
||||||
segments.append(current_segment)
|
segments.append(current_segment)
|
||||||
current_segment = None
|
current_segment = None
|
||||||
|
|
||||||
@@ -116,6 +127,70 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
|
|||||||
return segments
|
return segments
|
||||||
|
|
||||||
|
|
||||||
|
def words_to_segments_by_sentence(words: list[Word]) -> list[TranscriptSegment]:
|
||||||
|
"""Group words by speaker, then split into sentences.
|
||||||
|
|
||||||
|
For multitrack recordings where words from different speakers are interleaved
|
||||||
|
by timestamp, this function first groups all words by speaker, then creates
|
||||||
|
segments based on sentence boundaries within each speaker's words.
|
||||||
|
|
||||||
|
This produces cleaner output than words_to_segments() which breaks on every
|
||||||
|
speaker change, resulting in many tiny segments when speakers overlap.
|
||||||
|
"""
|
||||||
|
if not words:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Group words by speaker, preserving order within each speaker
|
||||||
|
by_speaker: dict[int, list[Word]] = defaultdict(list)
|
||||||
|
for w in words:
|
||||||
|
by_speaker[w.speaker].append(w)
|
||||||
|
|
||||||
|
segments: list[TranscriptSegment] = []
|
||||||
|
|
||||||
|
for speaker, speaker_words in by_speaker.items():
|
||||||
|
current_text = ""
|
||||||
|
current_start: float | None = None
|
||||||
|
current_end: float = 0.0
|
||||||
|
|
||||||
|
for word in speaker_words:
|
||||||
|
if current_start is None:
|
||||||
|
current_start = word.start
|
||||||
|
|
||||||
|
current_text += word.text
|
||||||
|
current_end = word.end
|
||||||
|
|
||||||
|
# Check for sentence end or max length
|
||||||
|
is_sentence_end = SENTENCE_END_RE.search(word.text.strip())
|
||||||
|
is_too_long = len(current_text) >= MAX_SENTENCE_SEGMENT_CHARS
|
||||||
|
|
||||||
|
if is_sentence_end or is_too_long:
|
||||||
|
segments.append(
|
||||||
|
TranscriptSegment(
|
||||||
|
text=current_text,
|
||||||
|
start=current_start,
|
||||||
|
end=current_end,
|
||||||
|
speaker=speaker,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
current_text = ""
|
||||||
|
current_start = None
|
||||||
|
|
||||||
|
# Flush remaining words for this speaker
|
||||||
|
if current_text and current_start is not None:
|
||||||
|
segments.append(
|
||||||
|
TranscriptSegment(
|
||||||
|
text=current_text,
|
||||||
|
start=current_start,
|
||||||
|
end=current_end,
|
||||||
|
speaker=speaker,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sort segments by start time
|
||||||
|
segments.sort(key=lambda s: s.start)
|
||||||
|
return segments
|
||||||
|
|
||||||
|
|
||||||
class Transcript(BaseModel):
|
class Transcript(BaseModel):
|
||||||
translation: str | None = None
|
translation: str | None = None
|
||||||
words: list[Word] = []
|
words: list[Word] = []
|
||||||
@@ -154,7 +229,9 @@ class Transcript(BaseModel):
|
|||||||
word.start += offset
|
word.start += offset
|
||||||
word.end += offset
|
word.end += offset
|
||||||
|
|
||||||
def as_segments(self) -> list[TranscriptSegment]:
|
def as_segments(self, is_multitrack: bool = False) -> list[TranscriptSegment]:
|
||||||
|
if is_multitrack:
|
||||||
|
return words_to_segments_by_sentence(self.words)
|
||||||
return words_to_segments(self.words)
|
return words_to_segments(self.words)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -138,6 +138,14 @@ class Settings(BaseSettings):
|
|||||||
DAILY_WEBHOOK_UUID: str | None = (
|
DAILY_WEBHOOK_UUID: str | None = (
|
||||||
None # Webhook UUID for this environment. Not used by production code
|
None # Webhook UUID for this environment. Not used by production code
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Multitrack processing
|
||||||
|
# SKIP_MIXDOWN: When True, skips audio mixdown and waveform generation.
|
||||||
|
# Transcription still works using individual tracks. Useful for:
|
||||||
|
# - Diagnosing OOM issues in mixdown
|
||||||
|
# - Fast processing when audio playback is not needed
|
||||||
|
# Note: UI will have no audio playback or waveform when enabled.
|
||||||
|
SKIP_MIXDOWN: bool = True
|
||||||
# Platform Configuration
|
# Platform Configuration
|
||||||
DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM
|
DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,11 @@ def recording_lock_key(recording_id: NonEmptyString) -> NonEmptyString:
|
|||||||
return f"recording:{recording_id}"
|
return f"recording:{recording_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def filter_cam_audio_tracks(track_keys: list[str]) -> list[str]:
|
||||||
|
"""Filter track keys to cam-audio tracks only (skip screen-audio, etc.)."""
|
||||||
|
return [k for k in track_keys if "cam-audio" in k]
|
||||||
|
|
||||||
|
|
||||||
def extract_base_room_name(daily_room_name: DailyRoomName) -> NonEmptyString:
|
def extract_base_room_name(daily_room_name: DailyRoomName) -> NonEmptyString:
|
||||||
"""
|
"""
|
||||||
Extract base room name from Daily.co timestamped room name.
|
Extract base room name from Daily.co timestamped room name.
|
||||||
|
|||||||
@@ -6,9 +6,6 @@ from reflector.db.transcripts import TranscriptParticipant, TranscriptTopic
|
|||||||
from reflector.processors.types import (
|
from reflector.processors.types import (
|
||||||
Transcript as ProcessorTranscript,
|
Transcript as ProcessorTranscript,
|
||||||
)
|
)
|
||||||
from reflector.processors.types import (
|
|
||||||
words_to_segments,
|
|
||||||
)
|
|
||||||
from reflector.schemas.transcript_formats import TranscriptSegment
|
from reflector.schemas.transcript_formats import TranscriptSegment
|
||||||
from reflector.utils.webvtt import seconds_to_timestamp
|
from reflector.utils.webvtt import seconds_to_timestamp
|
||||||
|
|
||||||
@@ -32,7 +29,9 @@ def format_timestamp_mmss(seconds: float | int) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def transcript_to_text(
|
def transcript_to_text(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Convert transcript topics to plain text with speaker names."""
|
"""Convert transcript topics to plain text with speaker names."""
|
||||||
lines = []
|
lines = []
|
||||||
@@ -41,7 +40,7 @@ def transcript_to_text(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
transcript = ProcessorTranscript(words=topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
segments = transcript.as_segments()
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
@@ -52,7 +51,9 @@ def transcript_to_text(
|
|||||||
|
|
||||||
|
|
||||||
def transcript_to_text_timestamped(
|
def transcript_to_text_timestamped(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Convert transcript topics to timestamped text with speaker names."""
|
"""Convert transcript topics to timestamped text with speaker names."""
|
||||||
lines = []
|
lines = []
|
||||||
@@ -61,7 +62,7 @@ def transcript_to_text_timestamped(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
transcript = ProcessorTranscript(words=topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
segments = transcript.as_segments()
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
@@ -73,7 +74,9 @@ def transcript_to_text_timestamped(
|
|||||||
|
|
||||||
|
|
||||||
def topics_to_webvtt_named(
|
def topics_to_webvtt_named(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Convert transcript topics to WebVTT format with participant names."""
|
"""Convert transcript topics to WebVTT format with participant names."""
|
||||||
vtt = webvtt.WebVTT()
|
vtt = webvtt.WebVTT()
|
||||||
@@ -82,7 +85,8 @@ def topics_to_webvtt_named(
|
|||||||
if not topic.words:
|
if not topic.words:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
segments = words_to_segments(topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
@@ -100,19 +104,23 @@ def topics_to_webvtt_named(
|
|||||||
|
|
||||||
|
|
||||||
def transcript_to_json_segments(
|
def transcript_to_json_segments(
|
||||||
topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None
|
topics: list[TranscriptTopic],
|
||||||
|
participants: list[TranscriptParticipant] | None,
|
||||||
|
is_multitrack: bool = False,
|
||||||
) -> list[TranscriptSegment]:
|
) -> list[TranscriptSegment]:
|
||||||
"""Convert transcript topics to a flat list of JSON segments."""
|
"""Convert transcript topics to a flat list of JSON segments."""
|
||||||
segments = []
|
result = []
|
||||||
|
|
||||||
for topic in topics:
|
for topic in topics:
|
||||||
if not topic.words:
|
if not topic.words:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
transcript = ProcessorTranscript(words=topic.words)
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
for segment in transcript.as_segments():
|
segments = transcript.as_segments(is_multitrack)
|
||||||
|
|
||||||
|
for segment in segments:
|
||||||
speaker_name = get_speaker_name(segment.speaker, participants)
|
speaker_name = get_speaker_name(segment.speaker, participants)
|
||||||
segments.append(
|
result.append(
|
||||||
TranscriptSegment(
|
TranscriptSegment(
|
||||||
speaker=segment.speaker,
|
speaker=segment.speaker,
|
||||||
speaker_name=speaker_name,
|
speaker_name=speaker_name,
|
||||||
@@ -122,4 +130,4 @@ def transcript_to_json_segments(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return segments
|
return result
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from pydantic import (
|
|||||||
|
|
||||||
import reflector.auth as auth
|
import reflector.auth as auth
|
||||||
from reflector.db import get_database
|
from reflector.db import get_database
|
||||||
|
from reflector.db.recordings import recordings_controller
|
||||||
from reflector.db.search import (
|
from reflector.db.search import (
|
||||||
DEFAULT_SEARCH_LIMIT,
|
DEFAULT_SEARCH_LIMIT,
|
||||||
SearchLimit,
|
SearchLimit,
|
||||||
@@ -60,6 +61,14 @@ ALGORITHM = "HS256"
|
|||||||
DOWNLOAD_EXPIRE_MINUTES = 60
|
DOWNLOAD_EXPIRE_MINUTES = 60
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_is_multitrack(transcript) -> bool:
|
||||||
|
"""Detect if transcript is from multitrack recording."""
|
||||||
|
if not transcript.recording_id:
|
||||||
|
return False
|
||||||
|
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||||
|
return recording is not None and recording.is_multitrack
|
||||||
|
|
||||||
|
|
||||||
def create_access_token(data: dict, expires_delta: timedelta):
|
def create_access_token(data: dict, expires_delta: timedelta):
|
||||||
to_encode = data.copy()
|
to_encode = data.copy()
|
||||||
expire = datetime.now(timezone.utc) + expires_delta
|
expire = datetime.now(timezone.utc) + expires_delta
|
||||||
@@ -360,7 +369,7 @@ class GetTranscriptTopic(BaseModel):
|
|||||||
segments: list[GetTranscriptSegmentTopic] = []
|
segments: list[GetTranscriptSegmentTopic] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_transcript_topic(cls, topic: TranscriptTopic):
|
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
|
||||||
if not topic.words:
|
if not topic.words:
|
||||||
# In previous version, words were missing
|
# In previous version, words were missing
|
||||||
# Just output a segment with speaker 0
|
# Just output a segment with speaker 0
|
||||||
@@ -384,7 +393,7 @@ class GetTranscriptTopic(BaseModel):
|
|||||||
start=segment.start,
|
start=segment.start,
|
||||||
speaker=segment.speaker,
|
speaker=segment.speaker,
|
||||||
)
|
)
|
||||||
for segment in transcript.as_segments()
|
for segment in transcript.as_segments(is_multitrack)
|
||||||
]
|
]
|
||||||
return cls(
|
return cls(
|
||||||
id=topic.id,
|
id=topic.id,
|
||||||
@@ -401,8 +410,8 @@ class GetTranscriptTopicWithWords(GetTranscriptTopic):
|
|||||||
words: list[Word] = []
|
words: list[Word] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_transcript_topic(cls, topic: TranscriptTopic):
|
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
|
||||||
instance = super().from_transcript_topic(topic)
|
instance = super().from_transcript_topic(topic, is_multitrack)
|
||||||
if topic.words:
|
if topic.words:
|
||||||
instance.words = topic.words
|
instance.words = topic.words
|
||||||
return instance
|
return instance
|
||||||
@@ -417,8 +426,8 @@ class GetTranscriptTopicWithWordsPerSpeaker(GetTranscriptTopic):
|
|||||||
words_per_speaker: list[SpeakerWords] = []
|
words_per_speaker: list[SpeakerWords] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_transcript_topic(cls, topic: TranscriptTopic):
|
def from_transcript_topic(cls, topic: TranscriptTopic, is_multitrack: bool = False):
|
||||||
instance = super().from_transcript_topic(topic)
|
instance = super().from_transcript_topic(topic, is_multitrack)
|
||||||
if topic.words:
|
if topic.words:
|
||||||
words_per_speakers = []
|
words_per_speakers = []
|
||||||
# group words by speaker
|
# group words by speaker
|
||||||
@@ -457,6 +466,8 @@ async def transcript_get(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
base_data = {
|
base_data = {
|
||||||
"id": transcript.id,
|
"id": transcript.id,
|
||||||
"user_id": transcript.user_id,
|
"user_id": transcript.user_id,
|
||||||
@@ -483,14 +494,16 @@ async def transcript_get(
|
|||||||
return GetTranscriptWithText(
|
return GetTranscriptWithText(
|
||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="text",
|
transcript_format="text",
|
||||||
transcript=transcript_to_text(transcript.topics, transcript.participants),
|
transcript=transcript_to_text(
|
||||||
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
|
),
|
||||||
)
|
)
|
||||||
elif transcript_format == "text-timestamped":
|
elif transcript_format == "text-timestamped":
|
||||||
return GetTranscriptWithTextTimestamped(
|
return GetTranscriptWithTextTimestamped(
|
||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="text-timestamped",
|
transcript_format="text-timestamped",
|
||||||
transcript=transcript_to_text_timestamped(
|
transcript=transcript_to_text_timestamped(
|
||||||
transcript.topics, transcript.participants
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
elif transcript_format == "webvtt-named":
|
elif transcript_format == "webvtt-named":
|
||||||
@@ -498,7 +511,7 @@ async def transcript_get(
|
|||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="webvtt-named",
|
transcript_format="webvtt-named",
|
||||||
transcript=topics_to_webvtt_named(
|
transcript=topics_to_webvtt_named(
|
||||||
transcript.topics, transcript.participants
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
elif transcript_format == "json":
|
elif transcript_format == "json":
|
||||||
@@ -506,7 +519,7 @@ async def transcript_get(
|
|||||||
**base_data,
|
**base_data,
|
||||||
transcript_format="json",
|
transcript_format="json",
|
||||||
transcript=transcript_to_json_segments(
|
transcript=transcript_to_json_segments(
|
||||||
transcript.topics, transcript.participants
|
transcript.topics, transcript.participants, is_multitrack
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -565,9 +578,12 @@ async def transcript_get_topics(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
# convert to GetTranscriptTopic
|
# convert to GetTranscriptTopic
|
||||||
return [
|
return [
|
||||||
GetTranscriptTopic.from_transcript_topic(topic) for topic in transcript.topics
|
GetTranscriptTopic.from_transcript_topic(topic, is_multitrack)
|
||||||
|
for topic in transcript.topics
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -584,9 +600,11 @@ async def transcript_get_topics_with_words(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
# convert to GetTranscriptTopicWithWords
|
# convert to GetTranscriptTopicWithWords
|
||||||
return [
|
return [
|
||||||
GetTranscriptTopicWithWords.from_transcript_topic(topic)
|
GetTranscriptTopicWithWords.from_transcript_topic(topic, is_multitrack)
|
||||||
for topic in transcript.topics
|
for topic in transcript.topics
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -605,13 +623,17 @@ async def transcript_get_topics_with_words_per_speaker(
|
|||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_multitrack = await _get_is_multitrack(transcript)
|
||||||
|
|
||||||
# get the topic from the transcript
|
# get the topic from the transcript
|
||||||
topic = next((t for t in transcript.topics if t.id == topic_id), None)
|
topic = next((t for t in transcript.topics if t.id == topic_id), None)
|
||||||
if not topic:
|
if not topic:
|
||||||
raise HTTPException(status_code=404, detail="Topic not found")
|
raise HTTPException(status_code=404, detail="Topic not found")
|
||||||
|
|
||||||
# convert to GetTranscriptTopicWithWordsPerSpeaker
|
# convert to GetTranscriptTopicWithWordsPerSpeaker
|
||||||
return GetTranscriptTopicWithWordsPerSpeaker.from_transcript_topic(topic)
|
return GetTranscriptTopicWithWordsPerSpeaker.from_transcript_topic(
|
||||||
|
topic, is_multitrack
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/transcripts/{transcript_id}/zulip")
|
@router.post("/transcripts/{transcript_id}/zulip")
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from typing import List
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
import av
|
import av
|
||||||
@@ -11,7 +12,7 @@ from celery import shared_task
|
|||||||
from celery.utils.log import get_task_logger
|
from celery.utils.log import get_task_logger
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
from reflector.dailyco_api import MeetingParticipantsResponse
|
from reflector.dailyco_api import RecordingResponse
|
||||||
from reflector.db.daily_participant_sessions import (
|
from reflector.db.daily_participant_sessions import (
|
||||||
DailyParticipantSession,
|
DailyParticipantSession,
|
||||||
daily_participant_sessions_controller,
|
daily_participant_sessions_controller,
|
||||||
@@ -21,7 +22,6 @@ from reflector.db.recordings import Recording, recordings_controller
|
|||||||
from reflector.db.rooms import rooms_controller
|
from reflector.db.rooms import rooms_controller
|
||||||
from reflector.db.transcripts import (
|
from reflector.db.transcripts import (
|
||||||
SourceKind,
|
SourceKind,
|
||||||
TranscriptParticipant,
|
|
||||||
transcripts_controller,
|
transcripts_controller,
|
||||||
)
|
)
|
||||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||||
@@ -38,7 +38,7 @@ from reflector.storage import get_transcripts_storage
|
|||||||
from reflector.utils.daily import (
|
from reflector.utils.daily import (
|
||||||
DailyRoomName,
|
DailyRoomName,
|
||||||
extract_base_room_name,
|
extract_base_room_name,
|
||||||
parse_daily_recording_filename,
|
filter_cam_audio_tracks,
|
||||||
recording_lock_key,
|
recording_lock_key,
|
||||||
)
|
)
|
||||||
from reflector.video_platforms.factory import create_platform_client
|
from reflector.video_platforms.factory import create_platform_client
|
||||||
@@ -273,15 +273,7 @@ async def _process_multitrack_recording_inner(
|
|||||||
# else: Recording already exists; metadata set at creation time
|
# else: Recording already exists; metadata set at creation time
|
||||||
|
|
||||||
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
||||||
if transcript:
|
if not transcript:
|
||||||
await transcripts_controller.update(
|
|
||||||
transcript,
|
|
||||||
{
|
|
||||||
"topics": [],
|
|
||||||
"participants": [],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
transcript = await transcripts_controller.add(
|
transcript = await transcripts_controller.add(
|
||||||
"",
|
"",
|
||||||
source_kind=SourceKind.ROOM,
|
source_kind=SourceKind.ROOM,
|
||||||
@@ -294,79 +286,10 @@ async def _process_multitrack_recording_inner(
|
|||||||
room_id=room.id,
|
room_id=room.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
async with create_platform_client("daily") as daily_client:
|
|
||||||
id_to_name = {}
|
|
||||||
id_to_user_id = {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
rec_details = await daily_client.get_recording(recording_id)
|
|
||||||
mtg_session_id = rec_details.mtgSessionId
|
|
||||||
if mtg_session_id:
|
|
||||||
try:
|
|
||||||
payload: MeetingParticipantsResponse = (
|
|
||||||
await daily_client.get_meeting_participants(mtg_session_id)
|
|
||||||
)
|
|
||||||
for p in payload.data:
|
|
||||||
pid = p.participant_id
|
|
||||||
assert (
|
|
||||||
pid is not None
|
|
||||||
), "panic! participant id cannot be None"
|
|
||||||
name = p.user_name
|
|
||||||
user_id = p.user_id
|
|
||||||
if name:
|
|
||||||
id_to_name[pid] = name
|
|
||||||
if user_id:
|
|
||||||
id_to_user_id[pid] = user_id
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
"Failed to fetch Daily meeting participants",
|
|
||||||
error=str(e),
|
|
||||||
mtg_session_id=mtg_session_id,
|
|
||||||
exc_info=True,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"No mtgSessionId found for recording; participant names may be generic",
|
|
||||||
recording_id=recording_id,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
"Failed to fetch Daily recording details",
|
|
||||||
error=str(e),
|
|
||||||
recording_id=recording_id,
|
|
||||||
exc_info=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
for idx, key in enumerate(track_keys):
|
|
||||||
try:
|
|
||||||
parsed = parse_daily_recording_filename(key)
|
|
||||||
participant_id = parsed.participant_id
|
|
||||||
except ValueError as e:
|
|
||||||
logger.error(
|
|
||||||
"Failed to parse Daily recording filename",
|
|
||||||
error=str(e),
|
|
||||||
key=key,
|
|
||||||
exc_info=True,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
default_name = f"Speaker {idx}"
|
|
||||||
name = id_to_name.get(participant_id, default_name)
|
|
||||||
user_id = id_to_user_id.get(participant_id)
|
|
||||||
|
|
||||||
participant = TranscriptParticipant(
|
|
||||||
id=participant_id, speaker=idx, name=name, user_id=user_id
|
|
||||||
)
|
|
||||||
await transcripts_controller.upsert_participant(transcript, participant)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Failed to map participant names", error=str(e), exc_info=True)
|
|
||||||
|
|
||||||
task_pipeline_multitrack_process.delay(
|
task_pipeline_multitrack_process.delay(
|
||||||
transcript_id=transcript.id,
|
transcript_id=transcript.id,
|
||||||
bucket_name=bucket_name,
|
bucket_name=bucket_name,
|
||||||
track_keys=track_keys,
|
track_keys=filter_cam_audio_tracks(track_keys),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -391,7 +314,7 @@ async def poll_daily_recordings():
|
|||||||
|
|
||||||
async with create_platform_client("daily") as daily_client:
|
async with create_platform_client("daily") as daily_client:
|
||||||
# latest 100. TODO cursor-based state
|
# latest 100. TODO cursor-based state
|
||||||
api_recordings = await daily_client.list_recordings()
|
api_recordings: List[RecordingResponse] = await daily_client.list_recordings()
|
||||||
|
|
||||||
if not api_recordings:
|
if not api_recordings:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -422,17 +345,19 @@ async def poll_daily_recordings():
|
|||||||
|
|
||||||
for recording in missing_recordings:
|
for recording in missing_recordings:
|
||||||
if not recording.tracks:
|
if not recording.tracks:
|
||||||
assert recording.status != "finished", (
|
if recording.status == "finished":
|
||||||
f"Recording {recording.id} has status='finished' but no tracks. "
|
logger.warning(
|
||||||
f"Daily.co API guarantees finished recordings have tracks available. "
|
"Finished recording has no tracks (no audio captured)",
|
||||||
f"room_name={recording.room_name}"
|
recording_id=recording.id,
|
||||||
)
|
room_name=recording.room_name,
|
||||||
logger.debug(
|
)
|
||||||
"No tracks in recording yet",
|
else:
|
||||||
recording_id=recording.id,
|
logger.debug(
|
||||||
room_name=recording.room_name,
|
"No tracks in recording yet",
|
||||||
status=recording.status,
|
recording_id=recording.id,
|
||||||
)
|
room_name=recording.room_name,
|
||||||
|
status=recording.status,
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
||||||
|
|||||||
@@ -159,3 +159,78 @@ def test_processor_transcript_segment():
|
|||||||
assert segments[3].start == 30.72
|
assert segments[3].start == 30.72
|
||||||
assert segments[4].start == 31.56
|
assert segments[4].start == 31.56
|
||||||
assert segments[5].start == 32.38
|
assert segments[5].start == 32.38
|
||||||
|
|
||||||
|
|
||||||
|
def test_processor_transcript_segment_multitrack_interleaved():
|
||||||
|
"""Test as_segments(is_multitrack=True) with interleaved speakers.
|
||||||
|
|
||||||
|
Multitrack recordings have words from different speakers sorted by start time,
|
||||||
|
causing frequent speaker alternation. The multitrack mode should group by
|
||||||
|
speaker first, then split into sentences.
|
||||||
|
"""
|
||||||
|
from reflector.processors.types import Transcript, Word
|
||||||
|
|
||||||
|
# Simulate real multitrack data: words sorted by start time, speakers interleave
|
||||||
|
# Speaker 0 says: "Hello there."
|
||||||
|
# Speaker 1 says: "I'm good."
|
||||||
|
# When sorted by time, words interleave
|
||||||
|
transcript = Transcript(
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default behavior (is_multitrack=False): breaks on every speaker change = 4 segments
|
||||||
|
segments_default = transcript.as_segments(is_multitrack=False)
|
||||||
|
assert len(segments_default) == 4
|
||||||
|
|
||||||
|
# Multitrack behavior: groups by speaker, then sentences = 2 segments
|
||||||
|
segments_multitrack = transcript.as_segments(is_multitrack=True)
|
||||||
|
assert len(segments_multitrack) == 2
|
||||||
|
|
||||||
|
# Check content - sorted by start time
|
||||||
|
assert segments_multitrack[0].speaker == 0
|
||||||
|
assert segments_multitrack[0].text == "Hello there."
|
||||||
|
assert segments_multitrack[0].start == 0.0
|
||||||
|
assert segments_multitrack[0].end == 1.0
|
||||||
|
|
||||||
|
assert segments_multitrack[1].speaker == 1
|
||||||
|
assert segments_multitrack[1].text == "I'm good."
|
||||||
|
assert segments_multitrack[1].start == 0.5
|
||||||
|
assert segments_multitrack[1].end == 1.5
|
||||||
|
|
||||||
|
|
||||||
|
def test_processor_transcript_segment_multitrack_overlapping_timestamps():
|
||||||
|
"""Test multitrack with exactly overlapping timestamps (real Daily.co data pattern)."""
|
||||||
|
from reflector.processors.types import Transcript, Word
|
||||||
|
|
||||||
|
# Real pattern from transcript 38d84d57: words with identical timestamps
|
||||||
|
transcript = Transcript(
|
||||||
|
words=[
|
||||||
|
Word(text="speaking ", start=6.71, end=7.11, speaker=0),
|
||||||
|
Word(text="Speaking ", start=6.71, end=7.11, speaker=1),
|
||||||
|
Word(text="at ", start=7.11, end=7.27, speaker=0),
|
||||||
|
Word(text="at ", start=7.11, end=7.27, speaker=1),
|
||||||
|
Word(text="the ", start=7.27, end=7.43, speaker=0),
|
||||||
|
Word(text="the ", start=7.27, end=7.43, speaker=1),
|
||||||
|
Word(text="same ", start=7.43, end=7.59, speaker=0),
|
||||||
|
Word(text="same ", start=7.43, end=7.59, speaker=1),
|
||||||
|
Word(text="time.", start=7.59, end=8.0, speaker=0),
|
||||||
|
Word(text="time.", start=7.59, end=8.0, speaker=1),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default: 10 segments (one per speaker change)
|
||||||
|
segments_default = transcript.as_segments(is_multitrack=False)
|
||||||
|
assert len(segments_default) == 10
|
||||||
|
|
||||||
|
# Multitrack: 2 segments (one per speaker sentence)
|
||||||
|
segments_multitrack = transcript.as_segments(is_multitrack=True)
|
||||||
|
assert len(segments_multitrack) == 2
|
||||||
|
|
||||||
|
# Both should have complete sentences
|
||||||
|
assert "speaking at the same time." in segments_multitrack[0].text
|
||||||
|
assert "Speaking at the same time." in segments_multitrack[1].text
|
||||||
|
|||||||
@@ -273,8 +273,17 @@ async def test_transcript_formats_with_multiple_speakers():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcript_formats_with_overlapping_speakers():
|
async def test_transcript_formats_with_overlapping_speakers_multitrack():
|
||||||
"""Test format conversion when multiple speakers speak at the same time (overlapping timestamps)."""
|
"""Test format conversion for multitrack recordings with truly interleaved words.
|
||||||
|
|
||||||
|
Multitrack recordings have words from different speakers sorted by start time,
|
||||||
|
causing frequent speaker alternation. This tests the sentence-based segmentation
|
||||||
|
that groups each speaker's words into complete sentences.
|
||||||
|
"""
|
||||||
|
# Real multitrack data: words sorted by start time, speakers interleave
|
||||||
|
# Alice says: "Hello there." (0.0-1.0)
|
||||||
|
# Bob says: "I'm good." (0.5-1.5)
|
||||||
|
# When sorted by time, words interleave: Hello, I'm, there., good.
|
||||||
topics = [
|
topics = [
|
||||||
TranscriptTopic(
|
TranscriptTopic(
|
||||||
id="1",
|
id="1",
|
||||||
@@ -282,11 +291,10 @@ async def test_transcript_formats_with_overlapping_speakers():
|
|||||||
summary="Summary 1",
|
summary="Summary 1",
|
||||||
timestamp=0.0,
|
timestamp=0.0,
|
||||||
words=[
|
words=[
|
||||||
Word(text="Hello", start=0.0, end=0.5, speaker=0),
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
Word(text=" there.", start=0.5, end=1.0, speaker=0),
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
# Speaker 1 overlaps with speaker 0 at 0.5-1.0
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
Word(text="I'm", start=0.5, end=1.0, speaker=1),
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
Word(text=" good.", start=1.0, end=1.5, speaker=1),
|
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
@@ -296,20 +304,9 @@ async def test_transcript_formats_with_overlapping_speakers():
|
|||||||
TranscriptParticipant(id="2", speaker=1, name="Bob"),
|
TranscriptParticipant(id="2", speaker=1, name="Bob"),
|
||||||
]
|
]
|
||||||
|
|
||||||
text_result = transcript_to_text(topics, participants)
|
# With is_multitrack=True, should produce 2 segments (one per speaker sentence)
|
||||||
lines = text_result.split("\n")
|
# not 4 segments (one per speaker change)
|
||||||
assert len(lines) >= 2
|
webvtt_result = topics_to_webvtt_named(topics, participants, is_multitrack=True)
|
||||||
assert any("Alice:" in line for line in lines)
|
|
||||||
assert any("Bob:" in line for line in lines)
|
|
||||||
|
|
||||||
timestamped_result = transcript_to_text_timestamped(topics, participants)
|
|
||||||
timestamped_lines = timestamped_result.split("\n")
|
|
||||||
assert len(timestamped_lines) >= 2
|
|
||||||
assert any("Alice:" in line for line in timestamped_lines)
|
|
||||||
assert any("Bob:" in line for line in timestamped_lines)
|
|
||||||
assert any("[00:00]" in line for line in timestamped_lines)
|
|
||||||
|
|
||||||
webvtt_result = topics_to_webvtt_named(topics, participants)
|
|
||||||
expected_webvtt = """WEBVTT
|
expected_webvtt = """WEBVTT
|
||||||
|
|
||||||
00:00:00.000 --> 00:00:01.000
|
00:00:00.000 --> 00:00:01.000
|
||||||
@@ -320,23 +317,26 @@ async def test_transcript_formats_with_overlapping_speakers():
|
|||||||
"""
|
"""
|
||||||
assert webvtt_result == expected_webvtt
|
assert webvtt_result == expected_webvtt
|
||||||
|
|
||||||
segments = transcript_to_json_segments(topics, participants)
|
text_result = transcript_to_text(topics, participants, is_multitrack=True)
|
||||||
assert len(segments) >= 2
|
lines = text_result.split("\n")
|
||||||
speakers = {seg.speaker for seg in segments}
|
assert len(lines) == 2
|
||||||
assert 0 in speakers and 1 in speakers
|
assert "Alice: Hello there." in lines[0]
|
||||||
|
assert "Bob: I'm good." in lines[1]
|
||||||
|
|
||||||
alice_seg = next(seg for seg in segments if seg.speaker == 0)
|
timestamped_result = transcript_to_text_timestamped(
|
||||||
bob_seg = next(seg for seg in segments if seg.speaker == 1)
|
topics, participants, is_multitrack=True
|
||||||
|
)
|
||||||
|
timestamped_lines = timestamped_result.split("\n")
|
||||||
|
assert len(timestamped_lines) == 2
|
||||||
|
assert "[00:00] Alice: Hello there." in timestamped_lines[0]
|
||||||
|
assert "[00:00] Bob: I'm good." in timestamped_lines[1]
|
||||||
|
|
||||||
# Verify timestamps overlap: Alice (0.0-1.0) and Bob (0.5-1.5) overlap at 0.5-1.0
|
segments = transcript_to_json_segments(topics, participants, is_multitrack=True)
|
||||||
assert alice_seg.start < bob_seg.end, "Alice segment should start before Bob ends"
|
assert len(segments) == 2
|
||||||
assert bob_seg.start < alice_seg.end, "Bob segment should start before Alice ends"
|
assert segments[0].speaker_name == "Alice"
|
||||||
|
assert segments[0].text == "Hello there."
|
||||||
overlap_start = max(alice_seg.start, bob_seg.start)
|
assert segments[1].speaker_name == "Bob"
|
||||||
overlap_end = min(alice_seg.end, bob_seg.end)
|
assert segments[1].text == "I'm good."
|
||||||
assert (
|
|
||||||
overlap_start < overlap_end
|
|
||||||
), f"Segments should overlap between {overlap_start} and {overlap_end}"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -573,3 +573,207 @@ async def test_api_transcript_format_default_is_text(client):
|
|||||||
|
|
||||||
assert data["transcript_format"] == "text"
|
assert data["transcript_format"] == "text"
|
||||||
assert "transcript" in data
|
assert "transcript" in data
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_api_topics_endpoint_multitrack_segmentation(client):
|
||||||
|
"""Test GET /transcripts/{id}/topics uses sentence-based segmentation for multitrack.
|
||||||
|
|
||||||
|
This tests the fix for TASKS2.md - ensuring /topics endpoints correctly detect
|
||||||
|
multitrack recordings and use sentence-based segmentation instead of fragmenting
|
||||||
|
on every speaker change.
|
||||||
|
"""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from reflector.db.recordings import Recording, recordings_controller
|
||||||
|
from reflector.db.transcripts import (
|
||||||
|
TranscriptParticipant,
|
||||||
|
TranscriptTopic,
|
||||||
|
transcripts_controller,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
# Create a multitrack recording (has track_keys)
|
||||||
|
recording = Recording(
|
||||||
|
bucket_name="test-bucket",
|
||||||
|
object_key="test-key",
|
||||||
|
recorded_at=datetime.now(timezone.utc),
|
||||||
|
track_keys=["track1.webm", "track2.webm"], # This makes it multitrack
|
||||||
|
)
|
||||||
|
await recordings_controller.create(recording)
|
||||||
|
|
||||||
|
# Create transcript linked to the recording
|
||||||
|
transcript = await transcripts_controller.add(
|
||||||
|
name="Multitrack Test",
|
||||||
|
source_kind="file",
|
||||||
|
recording_id=recording.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"participants": [
|
||||||
|
TranscriptParticipant(id="1", speaker=0, name="Alice").model_dump(),
|
||||||
|
TranscriptParticipant(id="2", speaker=1, name="Bob").model_dump(),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add interleaved words (as they appear in real multitrack data)
|
||||||
|
await transcripts_controller.upsert_topic(
|
||||||
|
transcript,
|
||||||
|
TranscriptTopic(
|
||||||
|
title="Topic 1",
|
||||||
|
summary="Summary 1",
|
||||||
|
timestamp=0,
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test /topics endpoint
|
||||||
|
response = await client.get(f"/transcripts/{transcript.id}/topics")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
topic = data[0]
|
||||||
|
|
||||||
|
# Key assertion: multitrack should produce 2 segments (one per speaker sentence)
|
||||||
|
# Not 4 segments (one per speaker change)
|
||||||
|
assert len(topic["segments"]) == 2
|
||||||
|
|
||||||
|
# Check content
|
||||||
|
segment_texts = [s["text"] for s in topic["segments"]]
|
||||||
|
assert "Hello there." in segment_texts
|
||||||
|
assert "I'm good." in segment_texts
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_api_topics_endpoint_non_multitrack_segmentation(client):
|
||||||
|
"""Test GET /transcripts/{id}/topics uses default segmentation for non-multitrack.
|
||||||
|
|
||||||
|
Ensures backward compatibility - transcripts without multitrack recordings
|
||||||
|
should continue using the default speaker-change-based segmentation.
|
||||||
|
"""
|
||||||
|
from reflector.db.transcripts import (
|
||||||
|
TranscriptParticipant,
|
||||||
|
TranscriptTopic,
|
||||||
|
transcripts_controller,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
# Create transcript WITHOUT recording (defaulted as not multitrack) TODO better heuristic
|
||||||
|
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||||
|
assert response.status_code == 200
|
||||||
|
tid = response.json()["id"]
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(tid)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"participants": [
|
||||||
|
TranscriptParticipant(id="1", speaker=0, name="Alice").model_dump(),
|
||||||
|
TranscriptParticipant(id="2", speaker=1, name="Bob").model_dump(),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add interleaved words
|
||||||
|
await transcripts_controller.upsert_topic(
|
||||||
|
transcript,
|
||||||
|
TranscriptTopic(
|
||||||
|
title="Topic 1",
|
||||||
|
summary="Summary 1",
|
||||||
|
timestamp=0,
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test /topics endpoint
|
||||||
|
response = await client.get(f"/transcripts/{tid}/topics")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
topic = data[0]
|
||||||
|
|
||||||
|
# Non-multitrack: should produce 4 segments (one per speaker change)
|
||||||
|
assert len(topic["segments"]) == 4
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_api_topics_with_words_endpoint_multitrack(client):
|
||||||
|
"""Test GET /transcripts/{id}/topics/with-words uses multitrack segmentation."""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from reflector.db.recordings import Recording, recordings_controller
|
||||||
|
from reflector.db.transcripts import (
|
||||||
|
TranscriptParticipant,
|
||||||
|
TranscriptTopic,
|
||||||
|
transcripts_controller,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
# Create multitrack recording
|
||||||
|
recording = Recording(
|
||||||
|
bucket_name="test-bucket",
|
||||||
|
object_key="test-key-2",
|
||||||
|
recorded_at=datetime.now(timezone.utc),
|
||||||
|
track_keys=["track1.webm", "track2.webm"],
|
||||||
|
)
|
||||||
|
await recordings_controller.create(recording)
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.add(
|
||||||
|
name="Multitrack Test 2",
|
||||||
|
source_kind="file",
|
||||||
|
recording_id=recording.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"participants": [
|
||||||
|
TranscriptParticipant(id="1", speaker=0, name="Alice").model_dump(),
|
||||||
|
TranscriptParticipant(id="2", speaker=1, name="Bob").model_dump(),
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.upsert_topic(
|
||||||
|
transcript,
|
||||||
|
TranscriptTopic(
|
||||||
|
title="Topic 1",
|
||||||
|
summary="Summary 1",
|
||||||
|
timestamp=0,
|
||||||
|
words=[
|
||||||
|
Word(text="Hello ", start=0.0, end=0.5, speaker=0),
|
||||||
|
Word(text="I'm ", start=0.5, end=0.8, speaker=1),
|
||||||
|
Word(text="there.", start=0.5, end=1.0, speaker=0),
|
||||||
|
Word(text="good.", start=1.0, end=1.5, speaker=1),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.get(f"/transcripts/{transcript.id}/topics/with-words")
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
assert len(data) == 1
|
||||||
|
topic = data[0]
|
||||||
|
|
||||||
|
# Should have 2 segments (multitrack sentence-based)
|
||||||
|
assert len(topic["segments"]) == 2
|
||||||
|
# Should also have words field
|
||||||
|
assert "words" in topic
|
||||||
|
assert len(topic["words"]) == 4
|
||||||
|
|||||||
@@ -117,15 +117,6 @@ export default function TranscriptDetails(details: TranscriptDetails) {
|
|||||||
return <Modal title="Loading" text={"Loading transcript..."} />;
|
return <Modal title="Loading" text={"Loading transcript..."} />;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mp3.error) {
|
|
||||||
return (
|
|
||||||
<Modal
|
|
||||||
title="Transcription error"
|
|
||||||
text={`There was an error loading the recording. Error: ${mp3.error}`}
|
|
||||||
/>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<Grid
|
<Grid
|
||||||
@@ -147,7 +138,12 @@ export default function TranscriptDetails(details: TranscriptDetails) {
|
|||||||
/>
|
/>
|
||||||
) : !mp3.loading && (waveform.error || mp3.error) ? (
|
) : !mp3.loading && (waveform.error || mp3.error) ? (
|
||||||
<Box p={4} bg="red.100" borderRadius="md">
|
<Box p={4} bg="red.100" borderRadius="md">
|
||||||
<Text>Error loading this recording</Text>
|
<Text>
|
||||||
|
Error loading{" "}
|
||||||
|
{[waveform.error && "waveform", mp3.error && "mp3"]
|
||||||
|
.filter(Boolean)
|
||||||
|
.join(" and ")}
|
||||||
|
</Text>
|
||||||
</Box>
|
</Box>
|
||||||
) : (
|
) : (
|
||||||
<Skeleton h={14} />
|
<Skeleton h={14} />
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import {
|
|||||||
recordingTypeRequiresConsent,
|
recordingTypeRequiresConsent,
|
||||||
} from "../../lib/consent";
|
} from "../../lib/consent";
|
||||||
import { useRoomJoinMeeting } from "../../lib/apiHooks";
|
import { useRoomJoinMeeting } from "../../lib/apiHooks";
|
||||||
|
import { assertExists } from "../../lib/utils";
|
||||||
|
|
||||||
type Meeting = components["schemas"]["Meeting"];
|
type Meeting = components["schemas"]["Meeting"];
|
||||||
|
|
||||||
@@ -22,16 +23,15 @@ export default function DailyRoom({ meeting }: DailyRoomProps) {
|
|||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const params = useParams();
|
const params = useParams();
|
||||||
const auth = useAuth();
|
const auth = useAuth();
|
||||||
const status = auth.status;
|
const authLastUserId = auth.lastUserId;
|
||||||
const containerRef = useRef<HTMLDivElement>(null);
|
const containerRef = useRef<HTMLDivElement>(null);
|
||||||
const joinMutation = useRoomJoinMeeting();
|
const joinMutation = useRoomJoinMeeting();
|
||||||
const [joinedMeeting, setJoinedMeeting] = useState<Meeting | null>(null);
|
const [joinedMeeting, setJoinedMeeting] = useState<Meeting | null>(null);
|
||||||
|
|
||||||
const roomName = params?.roomName as string;
|
const roomName = params?.roomName as string;
|
||||||
|
|
||||||
// Always call /join to get a fresh token with user_id
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (status === "loading" || !meeting?.id || !roomName) return;
|
if (authLastUserId === undefined || !meeting?.id || !roomName) return;
|
||||||
|
|
||||||
const join = async () => {
|
const join = async () => {
|
||||||
try {
|
try {
|
||||||
@@ -50,18 +50,17 @@ export default function DailyRoom({ meeting }: DailyRoomProps) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
join();
|
join();
|
||||||
}, [meeting?.id, roomName, status]);
|
}, [meeting?.id, roomName, authLastUserId]);
|
||||||
|
|
||||||
const roomUrl = joinedMeeting?.host_room_url || joinedMeeting?.room_url;
|
const roomUrl = joinedMeeting?.host_room_url || joinedMeeting?.room_url;
|
||||||
const isLoading =
|
|
||||||
status === "loading" || joinMutation.isPending || !joinedMeeting;
|
|
||||||
|
|
||||||
const handleLeave = useCallback(() => {
|
const handleLeave = useCallback(() => {
|
||||||
router.push("/browse");
|
router.push("/browse");
|
||||||
}, [router]);
|
}, [router]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (isLoading || !roomUrl || !containerRef.current) return;
|
if (authLastUserId === undefined || !roomUrl || !containerRef.current)
|
||||||
|
return;
|
||||||
|
|
||||||
let frame: DailyCall | null = null;
|
let frame: DailyCall | null = null;
|
||||||
let destroyed = false;
|
let destroyed = false;
|
||||||
@@ -90,9 +89,14 @@ export default function DailyRoom({ meeting }: DailyRoomProps) {
|
|||||||
|
|
||||||
frame.on("left-meeting", handleLeave);
|
frame.on("left-meeting", handleLeave);
|
||||||
|
|
||||||
|
// TODO this method must not ignore no-recording option
|
||||||
|
// TODO this method is here to make dev environments work in some cases (not examined which)
|
||||||
frame.on("joined-meeting", async () => {
|
frame.on("joined-meeting", async () => {
|
||||||
try {
|
try {
|
||||||
await frame.startRecording({ type: "raw-tracks" });
|
assertExists(
|
||||||
|
frame,
|
||||||
|
"frame object got lost somewhere after frame.on was called",
|
||||||
|
).startRecording({ type: "raw-tracks" });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Failed to start recording:", error);
|
console.error("Failed to start recording:", error);
|
||||||
}
|
}
|
||||||
@@ -104,7 +108,9 @@ export default function DailyRoom({ meeting }: DailyRoomProps) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
createAndJoin();
|
createAndJoin().catch((error) => {
|
||||||
|
console.error("Failed to create and join meeting:", error);
|
||||||
|
});
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
destroyed = true;
|
destroyed = true;
|
||||||
@@ -114,9 +120,9 @@ export default function DailyRoom({ meeting }: DailyRoomProps) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}, [roomUrl, isLoading, handleLeave]);
|
}, [roomUrl, authLastUserId, handleLeave]);
|
||||||
|
|
||||||
if (isLoading) {
|
if (authLastUserId === undefined) {
|
||||||
return (
|
return (
|
||||||
<Center width="100vw" height="100vh">
|
<Center width="100vw" height="100vh">
|
||||||
<Spinner size="xl" />
|
<Spinner size="xl" />
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { createContext, useContext } from "react";
|
import { createContext, useContext, useRef } from "react";
|
||||||
import { useSession as useNextAuthSession } from "next-auth/react";
|
import { useSession as useNextAuthSession } from "next-auth/react";
|
||||||
import { signOut, signIn } from "next-auth/react";
|
import { signOut, signIn } from "next-auth/react";
|
||||||
import { configureApiAuth } from "./apiClient";
|
import { configureApiAuth } from "./apiClient";
|
||||||
@@ -25,6 +25,9 @@ type AuthContextType = (
|
|||||||
update: () => Promise<Session | null>;
|
update: () => Promise<Session | null>;
|
||||||
signIn: typeof signIn;
|
signIn: typeof signIn;
|
||||||
signOut: typeof signOut;
|
signOut: typeof signOut;
|
||||||
|
// TODO probably rename isLoading to isReloading and make THIS field "isLoading"
|
||||||
|
// undefined is "not known", null is "is certainly logged out"
|
||||||
|
lastUserId: CustomSession["user"]["id"] | null | undefined;
|
||||||
};
|
};
|
||||||
|
|
||||||
const AuthContext = createContext<AuthContextType | undefined>(undefined);
|
const AuthContext = createContext<AuthContextType | undefined>(undefined);
|
||||||
@@ -41,10 +44,15 @@ const noopAuthContext: AuthContextType = {
|
|||||||
signOut: async () => {
|
signOut: async () => {
|
||||||
throw new Error("signOut not supposed to be called");
|
throw new Error("signOut not supposed to be called");
|
||||||
},
|
},
|
||||||
|
lastUserId: undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
export function AuthProvider({ children }: { children: React.ReactNode }) {
|
export function AuthProvider({ children }: { children: React.ReactNode }) {
|
||||||
const { data: session, status, update } = useNextAuthSession();
|
const { data: session, status, update } = useNextAuthSession();
|
||||||
|
// referential comparison done in component, must be primitive /or cached
|
||||||
|
const lastUserId = useRef<CustomSession["user"]["id"] | null | undefined>(
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
|
||||||
const contextValue: AuthContextType = isAuthEnabled
|
const contextValue: AuthContextType = isAuthEnabled
|
||||||
? {
|
? {
|
||||||
@@ -73,11 +81,16 @@ export function AuthProvider({ children }: { children: React.ReactNode }) {
|
|||||||
case "authenticated": {
|
case "authenticated": {
|
||||||
const customSession = assertCustomSession(session);
|
const customSession = assertCustomSession(session);
|
||||||
if (customSession?.error === REFRESH_ACCESS_TOKEN_ERROR) {
|
if (customSession?.error === REFRESH_ACCESS_TOKEN_ERROR) {
|
||||||
|
// warning: call order-dependent
|
||||||
|
lastUserId.current = null;
|
||||||
// token had expired but next auth still returns "authenticated" so show user unauthenticated state
|
// token had expired but next auth still returns "authenticated" so show user unauthenticated state
|
||||||
return {
|
return {
|
||||||
status: "unauthenticated" as const,
|
status: "unauthenticated" as const,
|
||||||
};
|
};
|
||||||
} else if (customSession?.accessToken) {
|
} else if (customSession?.accessToken) {
|
||||||
|
// updates anyways with updated properties below
|
||||||
|
// warning! execution order conscience, must be ran before reading lastUserId.current below
|
||||||
|
lastUserId.current = customSession.user.id;
|
||||||
return {
|
return {
|
||||||
status,
|
status,
|
||||||
accessToken: customSession.accessToken,
|
accessToken: customSession.accessToken,
|
||||||
@@ -92,6 +105,8 @@ export function AuthProvider({ children }: { children: React.ReactNode }) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
case "unauthenticated": {
|
case "unauthenticated": {
|
||||||
|
// warning: call order-dependent
|
||||||
|
lastUserId.current = null;
|
||||||
return { status: "unauthenticated" as const };
|
return { status: "unauthenticated" as const };
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@@ -103,6 +118,8 @@ export function AuthProvider({ children }: { children: React.ReactNode }) {
|
|||||||
update,
|
update,
|
||||||
signIn,
|
signIn,
|
||||||
signOut,
|
signOut,
|
||||||
|
// for optimistic cases when we assume "loading" doesn't immediately invalidate the user
|
||||||
|
lastUserId: lastUserId.current,
|
||||||
}
|
}
|
||||||
: noopAuthContext;
|
: noopAuthContext;
|
||||||
|
|
||||||
|
|||||||
@@ -148,7 +148,7 @@ export const authOptions = (): AuthOptions =>
|
|||||||
},
|
},
|
||||||
async session({ session, token }) {
|
async session({ session, token }) {
|
||||||
const extendedToken = token as JWTWithAccessToken;
|
const extendedToken = token as JWTWithAccessToken;
|
||||||
|
console.log("extendedToken", extendedToken);
|
||||||
const userId = await getUserId(extendedToken.accessToken);
|
const userId = await getUserId(extendedToken.accessToken);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
Reference in New Issue
Block a user