diff --git a/docs/transcript.md b/docs/transcript.md new file mode 100644 index 00000000..df091aa1 --- /dev/null +++ b/docs/transcript.md @@ -0,0 +1,241 @@ +# Transcript Formats + +The Reflector API provides multiple output formats for transcript data through the `transcript_format` query parameter on the GET `/v1/transcripts/{id}` endpoint. + +## Overview + +When retrieving a transcript, you can specify the desired format using the `transcript_format` query parameter. The API supports four formats optimized for different use cases: + +- **text** - Plain text with speaker names (default) +- **text-timestamped** - Timestamped text with speaker names +- **webvtt-named** - WebVTT subtitle format with participant names +- **json** - Structured JSON segments with full metadata + +All formats include participant information when available, resolving speaker IDs to actual names. + +## Query Parameter Usage + +``` +GET /v1/transcripts/{id}?transcript_format={format} +``` + +### Parameters + +- `transcript_format` (optional): The desired output format + - Type: `"text" | "text-timestamped" | "webvtt-named" | "json"` + - Default: `"text"` + +## Format Descriptions + +### Text Format (`text`) + +**Use case:** Simple, human-readable transcript for display or export. + +**Format:** Speaker names followed by their dialogue, one line per segment. + +**Example:** +``` +John Smith: Hello everyone +Jane Doe: Hi there +John Smith: How are you today? +``` + +**Request:** +```bash +GET /v1/transcripts/{id}?transcript_format=text +``` + +**Response:** +```json +{ + "id": "transcript_123", + "name": "Meeting Recording", + "transcript_format": "text", + "transcript": "John Smith: Hello everyone\nJane Doe: Hi there\nJohn Smith: How are you today?", + "participants": [ + {"id": "p1", "speaker": 0, "name": "John Smith"}, + {"id": "p2", "speaker": 1, "name": "Jane Doe"} + ], + ... +} +``` + +### Text Timestamped Format (`text-timestamped`) + +**Use case:** Transcript with timing information for navigation or reference. + +**Format:** `[MM:SS]` timestamp prefix before each speaker and dialogue. + +**Example:** +``` +[00:00] John Smith: Hello everyone +[00:05] Jane Doe: Hi there +[00:12] John Smith: How are you today? +``` + +**Request:** +```bash +GET /v1/transcripts/{id}?transcript_format=text-timestamped +``` + +**Response:** +```json +{ + "id": "transcript_123", + "name": "Meeting Recording", + "transcript_format": "text-timestamped", + "transcript": "[00:00] John Smith: Hello everyone\n[00:05] Jane Doe: Hi there\n[00:12] John Smith: How are you today?", + "participants": [ + {"id": "p1", "speaker": 0, "name": "John Smith"}, + {"id": "p2", "speaker": 1, "name": "Jane Doe"} + ], + ... +} +``` + +### WebVTT Named Format (`webvtt-named`) + +**Use case:** Subtitle files for video players, accessibility tools, or video editing. + +**Format:** Standard WebVTT subtitle format with voice tags using participant names. + +**Example:** +``` +WEBVTT + +00:00:00.000 --> 00:00:05.000 +Hello everyone + +00:00:05.000 --> 00:00:12.000 +Hi there + +00:00:12.000 --> 00:00:18.000 +How are you today? +``` + +**Request:** +```bash +GET /v1/transcripts/{id}?transcript_format=webvtt-named +``` + +**Response:** +```json +{ + "id": "transcript_123", + "name": "Meeting Recording", + "transcript_format": "webvtt-named", + "transcript": "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\nHello everyone\n\n...", + "participants": [ + {"id": "p1", "speaker": 0, "name": "John Smith"}, + {"id": "p2", "speaker": 1, "name": "Jane Doe"} + ], + ... +} +``` + +### JSON Format (`json`) + +**Use case:** Programmatic access with full timing and speaker metadata. + +**Format:** Array of segment objects with speaker information, text content, and precise timing. + +**Example:** +```json +[ + { + "speaker": 0, + "speaker_name": "John Smith", + "text": "Hello everyone", + "start": 0.0, + "end": 5.0 + }, + { + "speaker": 1, + "speaker_name": "Jane Doe", + "text": "Hi there", + "start": 5.0, + "end": 12.0 + }, + { + "speaker": 0, + "speaker_name": "John Smith", + "text": "How are you today?", + "start": 12.0, + "end": 18.0 + } +] +``` + +**Request:** +```bash +GET /v1/transcripts/{id}?transcript_format=json +``` + +**Response:** +```json +{ + "id": "transcript_123", + "name": "Meeting Recording", + "transcript_format": "json", + "transcript": [ + { + "speaker": 0, + "speaker_name": "John Smith", + "text": "Hello everyone", + "start": 0.0, + "end": 5.0 + }, + { + "speaker": 1, + "speaker_name": "Jane Doe", + "text": "Hi there", + "start": 5.0, + "end": 12.0 + } + ], + "participants": [ + {"id": "p1", "speaker": 0, "name": "John Smith"}, + {"id": "p2", "speaker": 1, "name": "Jane Doe"} + ], + ... +} +``` + +## Response Structure + +All formats return the same base transcript metadata with an additional `transcript_format` field and format-specific `transcript` field: + +### Common Fields + +- `id`: Transcript identifier +- `user_id`: Owner user ID (if authenticated) +- `name`: Transcript name +- `status`: Processing status +- `locked`: Whether transcript is locked for editing +- `duration`: Total duration in seconds +- `title`: Auto-generated or custom title +- `short_summary`: Brief summary +- `long_summary`: Detailed summary +- `created_at`: Creation timestamp +- `share_mode`: Access control setting +- `source_language`: Original audio language +- `target_language`: Translation target language +- `reviewed`: Whether transcript has been reviewed +- `meeting_id`: Associated meeting ID (if applicable) +- `source_kind`: Source type (live, file, room) +- `room_id`: Associated room ID (if applicable) +- `audio_deleted`: Whether audio has been deleted +- `participants`: Array of participant objects with speaker mappings + +### Format-Specific Fields + +- `transcript_format`: The format identifier (discriminator field) +- `transcript`: The formatted transcript content (string for text/webvtt formats, array for json format) + +## Speaker Name Resolution + +All formats resolve speaker IDs to participant names when available: + +- If a participant exists for the speaker ID, their name is used +- If no participant exists, a default name like "Speaker 0" is generated +- Speaker IDs are integers (0, 1, 2, etc.) assigned during diarization diff --git a/server/reflector/schemas/transcript_formats.py b/server/reflector/schemas/transcript_formats.py new file mode 100644 index 00000000..916e4a80 --- /dev/null +++ b/server/reflector/schemas/transcript_formats.py @@ -0,0 +1,17 @@ +"""Schema definitions for transcript format types and segments.""" + +from typing import Literal + +from pydantic import BaseModel + +TranscriptFormat = Literal["text", "text-timestamped", "webvtt-named", "json"] + + +class TranscriptSegment(BaseModel): + """A single transcript segment with speaker and timing information.""" + + speaker: int + speaker_name: str + text: str + start: float + end: float diff --git a/server/reflector/services/transcript_process.py b/server/reflector/services/transcript_process.py index bc48a4eb..746ca3ea 100644 --- a/server/reflector/services/transcript_process.py +++ b/server/reflector/services/transcript_process.py @@ -7,7 +7,7 @@ This module provides result-based error handling that works in both contexts: """ from dataclasses import dataclass -from typing import Literal, Union +from typing import Literal, Union, assert_never import celery from celery.result import AsyncResult @@ -18,7 +18,6 @@ from reflector.pipelines.main_file_pipeline import task_pipeline_file_process from reflector.pipelines.main_multitrack_pipeline import ( task_pipeline_multitrack_process, ) -from reflector.utils.match import absurd from reflector.utils.string import NonEmptyString @@ -155,7 +154,7 @@ def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult: elif isinstance(config, FileProcessingConfig): return task_pipeline_file_process.delay(transcript_id=config.transcript_id) else: - absurd(config) + assert_never(config) def task_is_scheduled_or_active(task_name: str, **kwargs): diff --git a/server/reflector/utils/match.py b/server/reflector/utils/match.py deleted file mode 100644 index e0f6bc53..00000000 --- a/server/reflector/utils/match.py +++ /dev/null @@ -1,10 +0,0 @@ -from typing import NoReturn - - -def assert_exhaustiveness(x: NoReturn) -> NoReturn: - """Provide an assertion at type-check time that this function is never called.""" - raise AssertionError(f"Invalid value: {x!r}") - - -def absurd(x: NoReturn) -> NoReturn: - return assert_exhaustiveness(x) diff --git a/server/reflector/utils/transcript_formats.py b/server/reflector/utils/transcript_formats.py new file mode 100644 index 00000000..4ccf8cce --- /dev/null +++ b/server/reflector/utils/transcript_formats.py @@ -0,0 +1,125 @@ +"""Utilities for converting transcript data to various output formats.""" + +import webvtt + +from reflector.db.transcripts import TranscriptParticipant, TranscriptTopic +from reflector.processors.types import ( + Transcript as ProcessorTranscript, +) +from reflector.processors.types import ( + words_to_segments, +) +from reflector.schemas.transcript_formats import TranscriptSegment +from reflector.utils.webvtt import seconds_to_timestamp + + +def get_speaker_name( + speaker: int, participants: list[TranscriptParticipant] | None +) -> str: + """Get participant name for speaker or default to 'Speaker N'.""" + if participants: + for participant in participants: + if participant.speaker == speaker: + return participant.name + return f"Speaker {speaker}" + + +def format_timestamp_mmss(seconds: float | int) -> str: + """Format seconds as MM:SS timestamp.""" + minutes = int(seconds // 60) + secs = int(seconds % 60) + return f"{minutes:02d}:{secs:02d}" + + +def transcript_to_text( + topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None +) -> str: + """Convert transcript topics to plain text with speaker names.""" + lines = [] + for topic in topics: + if not topic.words: + continue + + transcript = ProcessorTranscript(words=topic.words) + segments = transcript.as_segments() + + for segment in segments: + speaker_name = get_speaker_name(segment.speaker, participants) + text = segment.text.strip() + lines.append(f"{speaker_name}: {text}") + + return "\n".join(lines) + + +def transcript_to_text_timestamped( + topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None +) -> str: + """Convert transcript topics to timestamped text with speaker names.""" + lines = [] + for topic in topics: + if not topic.words: + continue + + transcript = ProcessorTranscript(words=topic.words) + segments = transcript.as_segments() + + for segment in segments: + speaker_name = get_speaker_name(segment.speaker, participants) + timestamp = format_timestamp_mmss(segment.start) + text = segment.text.strip() + lines.append(f"[{timestamp}] {speaker_name}: {text}") + + return "\n".join(lines) + + +def topics_to_webvtt_named( + topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None +) -> str: + """Convert transcript topics to WebVTT format with participant names.""" + vtt = webvtt.WebVTT() + + for topic in topics: + if not topic.words: + continue + + segments = words_to_segments(topic.words) + + for segment in segments: + speaker_name = get_speaker_name(segment.speaker, participants) + text = segment.text.strip() + text = f"{text}" + + caption = webvtt.Caption( + start=seconds_to_timestamp(segment.start), + end=seconds_to_timestamp(segment.end), + text=text, + ) + vtt.captions.append(caption) + + return vtt.content + + +def transcript_to_json_segments( + topics: list[TranscriptTopic], participants: list[TranscriptParticipant] | None +) -> list[TranscriptSegment]: + """Convert transcript topics to a flat list of JSON segments.""" + segments = [] + + for topic in topics: + if not topic.words: + continue + + transcript = ProcessorTranscript(words=topic.words) + for segment in transcript.as_segments(): + speaker_name = get_speaker_name(segment.speaker, participants) + segments.append( + TranscriptSegment( + speaker=segment.speaker, + speaker_name=speaker_name, + text=segment.text.strip(), + start=segment.start, + end=segment.end, + ) + ) + + return segments diff --git a/server/reflector/utils/webvtt.py b/server/reflector/utils/webvtt.py index efdbe948..9b3d16ef 100644 --- a/server/reflector/utils/webvtt.py +++ b/server/reflector/utils/webvtt.py @@ -13,7 +13,7 @@ VttTimestamp = Annotated[str, "vtt_timestamp"] WebVTTStr = Annotated[str, "webvtt_str"] -def _seconds_to_timestamp(seconds: Seconds) -> VttTimestamp: +def seconds_to_timestamp(seconds: Seconds) -> VttTimestamp: # lib doesn't do that hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) @@ -37,8 +37,8 @@ def words_to_webvtt(words: list[Word]) -> WebVTTStr: text = f"{text}" caption = webvtt.Caption( - start=_seconds_to_timestamp(segment.start), - end=_seconds_to_timestamp(segment.end), + start=seconds_to_timestamp(segment.start), + end=seconds_to_timestamp(segment.end), text=text, ) vtt.captions.append(caption) diff --git a/server/reflector/views/transcripts.py b/server/reflector/views/transcripts.py index 37e806cb..dc5ccdb7 100644 --- a/server/reflector/views/transcripts.py +++ b/server/reflector/views/transcripts.py @@ -1,11 +1,18 @@ from datetime import datetime, timedelta, timezone -from typing import Annotated, Literal, Optional +from typing import Annotated, Literal, Optional, assert_never from fastapi import APIRouter, Depends, HTTPException, Query from fastapi_pagination import Page from fastapi_pagination.ext.databases import apaginate from jose import jwt -from pydantic import AwareDatetime, BaseModel, Field, constr, field_serializer +from pydantic import ( + AwareDatetime, + BaseModel, + Discriminator, + Field, + constr, + field_serializer, +) import reflector.auth as auth from reflector.db import get_database @@ -31,7 +38,14 @@ from reflector.db.transcripts import ( ) from reflector.processors.types import Transcript as ProcessorTranscript from reflector.processors.types import Word +from reflector.schemas.transcript_formats import TranscriptFormat, TranscriptSegment from reflector.settings import settings +from reflector.utils.transcript_formats import ( + topics_to_webvtt_named, + transcript_to_json_segments, + transcript_to_text, + transcript_to_text_timestamped, +) from reflector.ws_manager import get_ws_manager from reflector.zulip import ( InvalidMessageError, @@ -88,10 +102,84 @@ class GetTranscriptMinimal(BaseModel): audio_deleted: bool | None = None -class GetTranscript(GetTranscriptMinimal): +class GetTranscriptWithParticipants(GetTranscriptMinimal): participants: list[TranscriptParticipant] | None +class GetTranscriptWithText(GetTranscriptWithParticipants): + """ + Transcript response with plain text format. + + Format: Speaker names followed by their dialogue, one line per segment. + Example: + John Smith: Hello everyone + Jane Doe: Hi there + """ + + transcript_format: Literal["text"] = "text" + transcript: str + + +class GetTranscriptWithTextTimestamped(GetTranscriptWithParticipants): + """ + Transcript response with timestamped text format. + + Format: [MM:SS] timestamp prefix before each speaker and dialogue. + Example: + [00:00] John Smith: Hello everyone + [00:05] Jane Doe: Hi there + """ + + transcript_format: Literal["text-timestamped"] = "text-timestamped" + transcript: str + + +class GetTranscriptWithWebVTTNamed(GetTranscriptWithParticipants): + """ + Transcript response in WebVTT subtitle format with participant names. + + Format: Standard WebVTT with voice tags using participant names. + Example: + WEBVTT + + 00:00:00.000 --> 00:00:05.000 + Hello everyone + """ + + transcript_format: Literal["webvtt-named"] = "webvtt-named" + transcript: str + + +class GetTranscriptWithJSON(GetTranscriptWithParticipants): + """ + Transcript response as structured JSON segments. + + Format: Array of segment objects with speaker info, text, and timing. + Example: + [ + { + "speaker": 0, + "speaker_name": "John Smith", + "text": "Hello everyone", + "start": 0.0, + "end": 5.0 + } + ] + """ + + transcript_format: Literal["json"] = "json" + transcript: list[TranscriptSegment] + + +GetTranscript = Annotated[ + GetTranscriptWithText + | GetTranscriptWithTextTimestamped + | GetTranscriptWithWebVTTNamed + | GetTranscriptWithJSON, + Discriminator("transcript_format"), +] + + class CreateTranscript(BaseModel): name: str source_language: str = Field("en") @@ -228,7 +316,7 @@ async def transcripts_search( ) -@router.post("/transcripts", response_model=GetTranscript) +@router.post("/transcripts", response_model=GetTranscriptWithParticipants) async def transcripts_create( info: CreateTranscript, user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)], @@ -362,14 +450,72 @@ class GetTranscriptTopicWithWordsPerSpeaker(GetTranscriptTopic): async def transcript_get( transcript_id: str, user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)], + transcript_format: TranscriptFormat = "text", ): user_id = user["sub"] if user else None - return await transcripts_controller.get_by_id_for_http( + transcript = await transcripts_controller.get_by_id_for_http( transcript_id, user_id=user_id ) + base_data = { + "id": transcript.id, + "user_id": transcript.user_id, + "name": transcript.name, + "status": transcript.status, + "locked": transcript.locked, + "duration": transcript.duration, + "title": transcript.title, + "short_summary": transcript.short_summary, + "long_summary": transcript.long_summary, + "created_at": transcript.created_at, + "share_mode": transcript.share_mode, + "source_language": transcript.source_language, + "target_language": transcript.target_language, + "reviewed": transcript.reviewed, + "meeting_id": transcript.meeting_id, + "source_kind": transcript.source_kind, + "room_id": transcript.room_id, + "audio_deleted": transcript.audio_deleted, + "participants": transcript.participants, + } -@router.patch("/transcripts/{transcript_id}", response_model=GetTranscript) + if transcript_format == "text": + return GetTranscriptWithText( + **base_data, + transcript_format="text", + transcript=transcript_to_text(transcript.topics, transcript.participants), + ) + elif transcript_format == "text-timestamped": + return GetTranscriptWithTextTimestamped( + **base_data, + transcript_format="text-timestamped", + transcript=transcript_to_text_timestamped( + transcript.topics, transcript.participants + ), + ) + elif transcript_format == "webvtt-named": + return GetTranscriptWithWebVTTNamed( + **base_data, + transcript_format="webvtt-named", + transcript=topics_to_webvtt_named( + transcript.topics, transcript.participants + ), + ) + elif transcript_format == "json": + return GetTranscriptWithJSON( + **base_data, + transcript_format="json", + transcript=transcript_to_json_segments( + transcript.topics, transcript.participants + ), + ) + else: + assert_never(transcript_format) + + +@router.patch( + "/transcripts/{transcript_id}", response_model=GetTranscriptWithParticipants +) async def transcript_update( transcript_id: str, info: UpdateTranscript, diff --git a/server/reflector/views/transcripts_process.py b/server/reflector/views/transcripts_process.py index 88f11e71..927cc8a9 100644 --- a/server/reflector/views/transcripts_process.py +++ b/server/reflector/views/transcripts_process.py @@ -1,4 +1,4 @@ -from typing import Annotated, Optional +from typing import Annotated, Optional, assert_never from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel @@ -15,7 +15,6 @@ from reflector.services.transcript_process import ( prepare_transcript_processing, validate_transcript_for_processing, ) -from reflector.utils.match import absurd router = APIRouter() @@ -44,7 +43,7 @@ async def transcript_process( elif isinstance(validation, ValidationOk): pass else: - absurd(validation) + assert_never(validation) config = await prepare_transcript_processing(validation) diff --git a/server/tests/test_transcript_formats.py b/server/tests/test_transcript_formats.py new file mode 100644 index 00000000..62e382fe --- /dev/null +++ b/server/tests/test_transcript_formats.py @@ -0,0 +1,575 @@ +"""Tests for transcript format conversion functionality.""" + +import pytest + +from reflector.db.transcripts import TranscriptParticipant, TranscriptTopic +from reflector.processors.types import Word +from reflector.utils.transcript_formats import ( + format_timestamp_mmss, + get_speaker_name, + topics_to_webvtt_named, + transcript_to_json_segments, + transcript_to_text, + transcript_to_text_timestamped, +) + + +@pytest.mark.asyncio +async def test_get_speaker_name_with_participants(): + """Test speaker name resolution with participants list.""" + participants = [ + TranscriptParticipant(id="1", speaker=0, name="John Smith"), + TranscriptParticipant(id="2", speaker=1, name="Jane Doe"), + ] + + assert get_speaker_name(0, participants) == "John Smith" + assert get_speaker_name(1, participants) == "Jane Doe" + assert get_speaker_name(2, participants) == "Speaker 2" + + +@pytest.mark.asyncio +async def test_get_speaker_name_without_participants(): + """Test speaker name resolution without participants list.""" + assert get_speaker_name(0, None) == "Speaker 0" + assert get_speaker_name(1, None) == "Speaker 1" + assert get_speaker_name(5, []) == "Speaker 5" + + +@pytest.mark.asyncio +async def test_format_timestamp_mmss(): + """Test timestamp formatting to MM:SS.""" + assert format_timestamp_mmss(0) == "00:00" + assert format_timestamp_mmss(5) == "00:05" + assert format_timestamp_mmss(65) == "01:05" + assert format_timestamp_mmss(125.7) == "02:05" + assert format_timestamp_mmss(3661) == "61:01" + + +@pytest.mark.asyncio +async def test_transcript_to_text(): + """Test plain text format conversion.""" + topics = [ + TranscriptTopic( + id="1", + title="Topic 1", + summary="Summary 1", + timestamp=0.0, + words=[ + Word(text="Hello", start=0.0, end=1.0, speaker=0), + Word(text=" world.", start=1.0, end=2.0, speaker=0), + ], + ), + TranscriptTopic( + id="2", + title="Topic 2", + summary="Summary 2", + timestamp=2.0, + words=[ + Word(text="How", start=2.0, end=3.0, speaker=1), + Word(text=" are", start=3.0, end=4.0, speaker=1), + Word(text=" you?", start=4.0, end=5.0, speaker=1), + ], + ), + ] + + participants = [ + TranscriptParticipant(id="1", speaker=0, name="John Smith"), + TranscriptParticipant(id="2", speaker=1, name="Jane Doe"), + ] + + result = transcript_to_text(topics, participants) + lines = result.split("\n") + + assert len(lines) == 2 + assert lines[0] == "John Smith: Hello world." + assert lines[1] == "Jane Doe: How are you?" + + +@pytest.mark.asyncio +async def test_transcript_to_text_timestamped(): + """Test timestamped text format conversion.""" + topics = [ + TranscriptTopic( + id="1", + title="Topic 1", + summary="Summary 1", + timestamp=0.0, + words=[ + Word(text="Hello", start=0.0, end=1.0, speaker=0), + Word(text=" world.", start=1.0, end=2.0, speaker=0), + ], + ), + TranscriptTopic( + id="2", + title="Topic 2", + summary="Summary 2", + timestamp=65.0, + words=[ + Word(text="How", start=65.0, end=66.0, speaker=1), + Word(text=" are", start=66.0, end=67.0, speaker=1), + Word(text=" you?", start=67.0, end=68.0, speaker=1), + ], + ), + ] + + participants = [ + TranscriptParticipant(id="1", speaker=0, name="John Smith"), + TranscriptParticipant(id="2", speaker=1, name="Jane Doe"), + ] + + result = transcript_to_text_timestamped(topics, participants) + lines = result.split("\n") + + assert len(lines) == 2 + assert lines[0] == "[00:00] John Smith: Hello world." + assert lines[1] == "[01:05] Jane Doe: How are you?" + + +@pytest.mark.asyncio +async def test_topics_to_webvtt_named(): + """Test WebVTT format conversion with participant names.""" + topics = [ + TranscriptTopic( + id="1", + title="Topic 1", + summary="Summary 1", + timestamp=0.0, + words=[ + Word(text="Hello", start=0.0, end=1.0, speaker=0), + Word(text=" world.", start=1.0, end=2.0, speaker=0), + ], + ), + ] + + participants = [ + TranscriptParticipant(id="1", speaker=0, name="John Smith"), + ] + + result = topics_to_webvtt_named(topics, participants) + + assert result.startswith("WEBVTT") + assert "" in result + assert "00:00:00.000 --> 00:00:02.000" in result + assert "Hello world." in result + + +@pytest.mark.asyncio +async def test_transcript_to_json_segments(): + """Test JSON segments format conversion.""" + topics = [ + TranscriptTopic( + id="1", + title="Topic 1", + summary="Summary 1", + timestamp=0.0, + words=[ + Word(text="Hello", start=0.0, end=1.0, speaker=0), + Word(text=" world.", start=1.0, end=2.0, speaker=0), + ], + ), + TranscriptTopic( + id="2", + title="Topic 2", + summary="Summary 2", + timestamp=2.0, + words=[ + Word(text="How", start=2.0, end=3.0, speaker=1), + Word(text=" are", start=3.0, end=4.0, speaker=1), + Word(text=" you?", start=4.0, end=5.0, speaker=1), + ], + ), + ] + + participants = [ + TranscriptParticipant(id="1", speaker=0, name="John Smith"), + TranscriptParticipant(id="2", speaker=1, name="Jane Doe"), + ] + + result = transcript_to_json_segments(topics, participants) + + assert len(result) == 2 + assert result[0].speaker == 0 + assert result[0].speaker_name == "John Smith" + assert result[0].text == "Hello world." + assert result[0].start == 0.0 + assert result[0].end == 2.0 + + assert result[1].speaker == 1 + assert result[1].speaker_name == "Jane Doe" + assert result[1].text == "How are you?" + assert result[1].start == 2.0 + assert result[1].end == 5.0 + + +@pytest.mark.asyncio +async def test_transcript_formats_with_empty_topics(): + """Test format conversion with empty topics list.""" + topics = [] + participants = [] + + assert transcript_to_text(topics, participants) == "" + assert transcript_to_text_timestamped(topics, participants) == "" + assert "WEBVTT" in topics_to_webvtt_named(topics, participants) + assert transcript_to_json_segments(topics, participants) == [] + + +@pytest.mark.asyncio +async def test_transcript_formats_with_empty_words(): + """Test format conversion with topics containing no words.""" + topics = [ + TranscriptTopic( + id="1", + title="Topic 1", + summary="Summary 1", + timestamp=0.0, + words=[], + ), + ] + participants = [] + + assert transcript_to_text(topics, participants) == "" + assert transcript_to_text_timestamped(topics, participants) == "" + assert "WEBVTT" in topics_to_webvtt_named(topics, participants) + assert transcript_to_json_segments(topics, participants) == [] + + +@pytest.mark.asyncio +async def test_transcript_formats_with_multiple_speakers(): + """Test format conversion with multiple speaker changes.""" + topics = [ + TranscriptTopic( + id="1", + title="Topic 1", + summary="Summary 1", + timestamp=0.0, + words=[ + Word(text="Hello", start=0.0, end=1.0, speaker=0), + Word(text=" there.", start=1.0, end=2.0, speaker=0), + Word(text="Hi", start=2.0, end=3.0, speaker=1), + Word(text=" back.", start=3.0, end=4.0, speaker=1), + Word(text="Good", start=4.0, end=5.0, speaker=0), + Word(text=" morning.", start=5.0, end=6.0, speaker=0), + ], + ), + ] + + participants = [ + TranscriptParticipant(id="1", speaker=0, name="Alice"), + TranscriptParticipant(id="2", speaker=1, name="Bob"), + ] + + text_result = transcript_to_text(topics, participants) + lines = text_result.split("\n") + assert len(lines) == 3 + assert "Alice: Hello there." in lines[0] + assert "Bob: Hi back." in lines[1] + assert "Alice: Good morning." in lines[2] + + json_result = transcript_to_json_segments(topics, participants) + assert len(json_result) == 3 + assert json_result[0].speaker_name == "Alice" + assert json_result[1].speaker_name == "Bob" + assert json_result[2].speaker_name == "Alice" + + +@pytest.mark.asyncio +async def test_transcript_formats_with_overlapping_speakers(): + """Test format conversion when multiple speakers speak at the same time (overlapping timestamps).""" + topics = [ + TranscriptTopic( + id="1", + title="Topic 1", + summary="Summary 1", + timestamp=0.0, + words=[ + Word(text="Hello", start=0.0, end=0.5, speaker=0), + Word(text=" there.", start=0.5, end=1.0, speaker=0), + # Speaker 1 overlaps with speaker 0 at 0.5-1.0 + Word(text="I'm", start=0.5, end=1.0, speaker=1), + Word(text=" good.", start=1.0, end=1.5, speaker=1), + ], + ), + ] + + participants = [ + TranscriptParticipant(id="1", speaker=0, name="Alice"), + TranscriptParticipant(id="2", speaker=1, name="Bob"), + ] + + text_result = transcript_to_text(topics, participants) + lines = text_result.split("\n") + assert len(lines) >= 2 + assert any("Alice:" in line for line in lines) + assert any("Bob:" in line for line in lines) + + timestamped_result = transcript_to_text_timestamped(topics, participants) + timestamped_lines = timestamped_result.split("\n") + assert len(timestamped_lines) >= 2 + assert any("Alice:" in line for line in timestamped_lines) + assert any("Bob:" in line for line in timestamped_lines) + assert any("[00:00]" in line for line in timestamped_lines) + + webvtt_result = topics_to_webvtt_named(topics, participants) + expected_webvtt = """WEBVTT + +00:00:00.000 --> 00:00:01.000 +Hello there. + +00:00:00.500 --> 00:00:01.500 +I'm good. +""" + assert webvtt_result == expected_webvtt + + segments = transcript_to_json_segments(topics, participants) + assert len(segments) >= 2 + speakers = {seg.speaker for seg in segments} + assert 0 in speakers and 1 in speakers + + alice_seg = next(seg for seg in segments if seg.speaker == 0) + bob_seg = next(seg for seg in segments if seg.speaker == 1) + + # Verify timestamps overlap: Alice (0.0-1.0) and Bob (0.5-1.5) overlap at 0.5-1.0 + assert alice_seg.start < bob_seg.end, "Alice segment should start before Bob ends" + assert bob_seg.start < alice_seg.end, "Bob segment should start before Alice ends" + + overlap_start = max(alice_seg.start, bob_seg.start) + overlap_end = min(alice_seg.end, bob_seg.end) + assert ( + overlap_start < overlap_end + ), f"Segments should overlap between {overlap_start} and {overlap_end}" + + +@pytest.mark.asyncio +async def test_api_transcript_format_text(client): + """Test GET /transcripts/{id} with transcript_format=text.""" + response = await client.post("/transcripts", json={"name": "Test transcript"}) + assert response.status_code == 200 + tid = response.json()["id"] + + from reflector.db.transcripts import ( + TranscriptParticipant, + TranscriptTopic, + transcripts_controller, + ) + from reflector.processors.types import Word + + transcript = await transcripts_controller.get_by_id(tid) + + await transcripts_controller.update( + transcript, + { + "participants": [ + TranscriptParticipant( + id="1", speaker=0, name="John Smith" + ).model_dump(), + TranscriptParticipant(id="2", speaker=1, name="Jane Doe").model_dump(), + ] + }, + ) + + await transcripts_controller.upsert_topic( + transcript, + TranscriptTopic( + title="Topic 1", + summary="Summary 1", + timestamp=0, + words=[ + Word(text="Hello", start=0, end=1, speaker=0), + Word(text=" world.", start=1, end=2, speaker=0), + ], + ), + ) + + response = await client.get(f"/transcripts/{tid}?transcript_format=text") + assert response.status_code == 200 + data = response.json() + + assert data["transcript_format"] == "text" + assert "transcript" in data + assert "John Smith: Hello world." in data["transcript"] + + +@pytest.mark.asyncio +async def test_api_transcript_format_text_timestamped(client): + """Test GET /transcripts/{id} with transcript_format=text-timestamped.""" + response = await client.post("/transcripts", json={"name": "Test transcript"}) + assert response.status_code == 200 + tid = response.json()["id"] + + from reflector.db.transcripts import ( + TranscriptParticipant, + TranscriptTopic, + transcripts_controller, + ) + from reflector.processors.types import Word + + transcript = await transcripts_controller.get_by_id(tid) + + await transcripts_controller.update( + transcript, + { + "participants": [ + TranscriptParticipant( + id="1", speaker=0, name="John Smith" + ).model_dump(), + ] + }, + ) + + await transcripts_controller.upsert_topic( + transcript, + TranscriptTopic( + title="Topic 1", + summary="Summary 1", + timestamp=0, + words=[ + Word(text="Hello", start=65, end=66, speaker=0), + Word(text=" world.", start=66, end=67, speaker=0), + ], + ), + ) + + response = await client.get( + f"/transcripts/{tid}?transcript_format=text-timestamped" + ) + assert response.status_code == 200 + data = response.json() + + assert data["transcript_format"] == "text-timestamped" + assert "transcript" in data + assert "[01:05] John Smith: Hello world." in data["transcript"] + + +@pytest.mark.asyncio +async def test_api_transcript_format_webvtt_named(client): + """Test GET /transcripts/{id} with transcript_format=webvtt-named.""" + response = await client.post("/transcripts", json={"name": "Test transcript"}) + assert response.status_code == 200 + tid = response.json()["id"] + + from reflector.db.transcripts import ( + TranscriptParticipant, + TranscriptTopic, + transcripts_controller, + ) + from reflector.processors.types import Word + + transcript = await transcripts_controller.get_by_id(tid) + + await transcripts_controller.update( + transcript, + { + "participants": [ + TranscriptParticipant( + id="1", speaker=0, name="John Smith" + ).model_dump(), + ] + }, + ) + + await transcripts_controller.upsert_topic( + transcript, + TranscriptTopic( + title="Topic 1", + summary="Summary 1", + timestamp=0, + words=[ + Word(text="Hello", start=0, end=1, speaker=0), + Word(text=" world.", start=1, end=2, speaker=0), + ], + ), + ) + + response = await client.get(f"/transcripts/{tid}?transcript_format=webvtt-named") + assert response.status_code == 200 + data = response.json() + + assert data["transcript_format"] == "webvtt-named" + assert "transcript" in data + assert "WEBVTT" in data["transcript"] + assert "" in data["transcript"] + + +@pytest.mark.asyncio +async def test_api_transcript_format_json(client): + """Test GET /transcripts/{id} with transcript_format=json.""" + response = await client.post("/transcripts", json={"name": "Test transcript"}) + assert response.status_code == 200 + tid = response.json()["id"] + + from reflector.db.transcripts import ( + TranscriptParticipant, + TranscriptTopic, + transcripts_controller, + ) + from reflector.processors.types import Word + + transcript = await transcripts_controller.get_by_id(tid) + + await transcripts_controller.update( + transcript, + { + "participants": [ + TranscriptParticipant( + id="1", speaker=0, name="John Smith" + ).model_dump(), + ] + }, + ) + + await transcripts_controller.upsert_topic( + transcript, + TranscriptTopic( + title="Topic 1", + summary="Summary 1", + timestamp=0, + words=[ + Word(text="Hello", start=0, end=1, speaker=0), + Word(text=" world.", start=1, end=2, speaker=0), + ], + ), + ) + + response = await client.get(f"/transcripts/{tid}?transcript_format=json") + assert response.status_code == 200 + data = response.json() + + assert data["transcript_format"] == "json" + assert "transcript" in data + assert isinstance(data["transcript"], list) + assert len(data["transcript"]) == 1 + assert data["transcript"][0]["speaker"] == 0 + assert data["transcript"][0]["speaker_name"] == "John Smith" + assert data["transcript"][0]["text"] == "Hello world." + + +@pytest.mark.asyncio +async def test_api_transcript_format_default_is_text(client): + """Test GET /transcripts/{id} defaults to text format.""" + response = await client.post("/transcripts", json={"name": "Test transcript"}) + assert response.status_code == 200 + tid = response.json()["id"] + + from reflector.db.transcripts import TranscriptTopic, transcripts_controller + from reflector.processors.types import Word + + transcript = await transcripts_controller.get_by_id(tid) + + await transcripts_controller.upsert_topic( + transcript, + TranscriptTopic( + title="Topic 1", + summary="Summary 1", + timestamp=0, + words=[ + Word(text="Hello", start=0, end=1, speaker=0), + ], + ), + ) + + response = await client.get(f"/transcripts/{tid}") + assert response.status_code == 200 + data = response.json() + + assert data["transcript_format"] == "text" + assert "transcript" in data diff --git a/www/app/(app)/transcripts/shareCopy.tsx b/www/app/(app)/transcripts/shareCopy.tsx index bdbff5f4..e18b5ab7 100644 --- a/www/app/(app)/transcripts/shareCopy.tsx +++ b/www/app/(app)/transcripts/shareCopy.tsx @@ -1,14 +1,16 @@ import { useState } from "react"; -import type { components } from "../../reflector-api"; -type GetTranscript = components["schemas"]["GetTranscript"]; +import type { components, operations } from "../../reflector-api"; +type GetTranscriptWithParticipants = + components["schemas"]["GetTranscriptWithParticipants"]; type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"]; -import { Button, BoxProps, Box } from "@chakra-ui/react"; -import { buildTranscriptWithTopics } from "./buildTranscriptWithTopics"; -import { useTranscriptParticipants } from "../../lib/apiHooks"; +import { Button, BoxProps, Box, Menu, Text } from "@chakra-ui/react"; +import { LuChevronDown } from "react-icons/lu"; +import { client } from "../../lib/apiClient"; +import { toaster } from "../../components/ui/toaster"; type ShareCopyProps = { finalSummaryElement: HTMLDivElement | null; - transcript: GetTranscript; + transcript: GetTranscriptWithParticipants; topics: GetTranscriptTopic[]; }; @@ -20,11 +22,33 @@ export default function ShareCopy({ }: ShareCopyProps & BoxProps) { const [isCopiedSummary, setIsCopiedSummary] = useState(false); const [isCopiedTranscript, setIsCopiedTranscript] = useState(false); - const participantsQuery = useTranscriptParticipants(transcript?.id || null); + const [isCopying, setIsCopying] = useState(false); + + type ApiTranscriptFormat = NonNullable< + operations["v1_transcript_get"]["parameters"]["query"] + >["transcript_format"]; + const TRANSCRIPT_FORMATS = [ + "text", + "text-timestamped", + "webvtt-named", + "json", + ] as const satisfies ApiTranscriptFormat[]; + type TranscriptFormat = (typeof TRANSCRIPT_FORMATS)[number]; + + const TRANSCRIPT_FORMAT_LABELS: { [k in TranscriptFormat]: string } = { + text: "Plain text", + "text-timestamped": "Text + timestamps", + "webvtt-named": "WebVTT (named)", + json: "JSON", + }; + + const formatOptions = TRANSCRIPT_FORMATS.map((f) => ({ + value: f, + label: TRANSCRIPT_FORMAT_LABELS[f], + })); const onCopySummaryClick = () => { const text_to_copy = finalSummaryElement?.innerText; - if (text_to_copy) { navigator.clipboard.writeText(text_to_copy).then(() => { setIsCopiedSummary(true); @@ -34,27 +58,91 @@ export default function ShareCopy({ } }; - const onCopyTranscriptClick = () => { - const text_to_copy = - buildTranscriptWithTopics( - topics || [], - participantsQuery?.data || null, - transcript?.title || null, - ) || ""; + const onCopyTranscriptFormatClick = async (format: TranscriptFormat) => { + try { + setIsCopying(true); + const { data, error } = await client.GET( + "/v1/transcripts/{transcript_id}", + { + params: { + path: { transcript_id: transcript.id }, + query: { transcript_format: format }, + }, + }, + ); + if (error) { + console.error("Failed to copy transcript:", error); + toaster.create({ + duration: 3000, + render: () => ( + + Error + Failed to fetch transcript + + ), + }); + return; + } - text_to_copy && - navigator.clipboard.writeText(text_to_copy).then(() => { + const copiedText = + format === "json" + ? JSON.stringify(data?.transcript ?? {}, null, 2) + : String(data?.transcript ?? ""); + + if (copiedText) { + await navigator.clipboard.writeText(copiedText); setIsCopiedTranscript(true); - // Reset the copied state after 2 seconds setTimeout(() => setIsCopiedTranscript(false), 2000); + } + } catch (e) { + console.error("Failed to copy transcript:", e); + toaster.create({ + duration: 3000, + render: () => ( + + Error + Failed to copy transcript + + ), }); + } finally { + setIsCopying(false); + } }; return ( - + + + + + + + {formatOptions.map((opt) => ( + onCopyTranscriptFormatClick(opt.value)} + > + {opt.label} + + ))} + + + diff --git a/www/app/lib/authBackend.ts b/www/app/lib/authBackend.ts index 7a8fa433..c28ee224 100644 --- a/www/app/lib/authBackend.ts +++ b/www/app/lib/authBackend.ts @@ -32,6 +32,11 @@ async function getUserId(accessToken: string): Promise { }); if (!response.ok) { + try { + console.error(await response.text()); + } catch (e) { + console.error("Failed to parse error response", e); + } return null; } diff --git a/www/app/reflector-api.d.ts b/www/app/reflector-api.d.ts index 9b9582ba..4aa6ee36 100644 --- a/www/app/reflector-api.d.ts +++ b/www/app/reflector-api.d.ts @@ -696,7 +696,7 @@ export interface paths { patch?: never; trace?: never; }; - "/v1/webhook": { + "/v1/daily/webhook": { parameters: { query?: never; header?: never; @@ -708,6 +708,27 @@ export interface paths { /** * Webhook * @description Handle Daily webhook events. + * + * Example webhook payload: + * { + * "version": "1.0.0", + * "type": "recording.ready-to-download", + * "id": "rec-rtd-c3df927c-f738-4471-a2b7-066fa7e95a6b-1692124192", + * "payload": { + * "recording_id": "08fa0b24-9220-44c5-846c-3f116cf8e738", + * "room_name": "Xcm97xRZ08b2dePKb78g", + * "start_ts": 1692124183, + * "status": "finished", + * "max_participants": 1, + * "duration": 9, + * "share_token": "ntDCL5k98Ulq", #gitleaks:allow + * "s3_key": "api-test-1j8fizhzd30c/Xcm97xRZ08b2dePKb78g/1692124183028" + * }, + * "event_ts": 1692124192 + * } + * + * Daily.co circuit-breaker: After 3+ failed responses (4xx/5xx), webhook + * state→FAILED, stops sending events. Reset: scripts/recreate_daily_webhook.py */ post: operations["v1_webhook"]; delete?: never; @@ -899,81 +920,11 @@ export interface components { target_language: string; source_kind?: components["schemas"]["SourceKind"] | null; }; - /** - * DailyWebhookEvent - * @description Daily webhook event structure. - */ - DailyWebhookEvent: { - /** Type */ - type: string; - /** Id */ - id: string; - /** Ts */ - ts: number; - /** Data */ - data: { - [key: string]: unknown; - }; - }; /** DeletionStatus */ DeletionStatus: { /** Status */ status: string; }; - /** GetTranscript */ - GetTranscript: { - /** Id */ - id: string; - /** User Id */ - user_id: string | null; - /** Name */ - name: string; - /** - * Status - * @enum {string} - */ - status: - | "idle" - | "uploaded" - | "recording" - | "processing" - | "error" - | "ended"; - /** Locked */ - locked: boolean; - /** Duration */ - duration: number; - /** Title */ - title: string | null; - /** Short Summary */ - short_summary: string | null; - /** Long Summary */ - long_summary: string | null; - /** Created At */ - created_at: string; - /** - * Share Mode - * @default private - */ - share_mode: string; - /** Source Language */ - source_language: string | null; - /** Target Language */ - target_language: string | null; - /** Reviewed */ - reviewed: boolean; - /** Meeting Id */ - meeting_id: string | null; - source_kind: components["schemas"]["SourceKind"]; - /** Room Id */ - room_id?: string | null; - /** Room Name */ - room_name?: string | null; - /** Audio Deleted */ - audio_deleted?: boolean | null; - /** Participants */ - participants: components["schemas"]["TranscriptParticipant"][] | null; - }; /** GetTranscriptMinimal */ GetTranscriptMinimal: { /** Id */ @@ -1105,6 +1056,345 @@ export interface components { */ words_per_speaker: components["schemas"]["SpeakerWords"][]; }; + /** + * GetTranscriptWithJSON + * @description Transcript response as structured JSON segments. + * + * Format: Array of segment objects with speaker info, text, and timing. + * Example: + * [ + * { + * "speaker": 0, + * "speaker_name": "John Smith", + * "text": "Hello everyone", + * "start": 0.0, + * "end": 5.0 + * } + * ] + */ + GetTranscriptWithJSON: { + /** Id */ + id: string; + /** User Id */ + user_id: string | null; + /** Name */ + name: string; + /** + * Status + * @enum {string} + */ + status: + | "idle" + | "uploaded" + | "recording" + | "processing" + | "error" + | "ended"; + /** Locked */ + locked: boolean; + /** Duration */ + duration: number; + /** Title */ + title: string | null; + /** Short Summary */ + short_summary: string | null; + /** Long Summary */ + long_summary: string | null; + /** Created At */ + created_at: string; + /** + * Share Mode + * @default private + */ + share_mode: string; + /** Source Language */ + source_language: string | null; + /** Target Language */ + target_language: string | null; + /** Reviewed */ + reviewed: boolean; + /** Meeting Id */ + meeting_id: string | null; + source_kind: components["schemas"]["SourceKind"]; + /** Room Id */ + room_id?: string | null; + /** Room Name */ + room_name?: string | null; + /** Audio Deleted */ + audio_deleted?: boolean | null; + /** Participants */ + participants: components["schemas"]["TranscriptParticipant"][] | null; + /** + * @description discriminator enum property added by openapi-typescript + * @enum {string} + */ + transcript_format: "json"; + /** Transcript */ + transcript: components["schemas"]["TranscriptSegment"][]; + }; + /** GetTranscriptWithParticipants */ + GetTranscriptWithParticipants: { + /** Id */ + id: string; + /** User Id */ + user_id: string | null; + /** Name */ + name: string; + /** + * Status + * @enum {string} + */ + status: + | "idle" + | "uploaded" + | "recording" + | "processing" + | "error" + | "ended"; + /** Locked */ + locked: boolean; + /** Duration */ + duration: number; + /** Title */ + title: string | null; + /** Short Summary */ + short_summary: string | null; + /** Long Summary */ + long_summary: string | null; + /** Created At */ + created_at: string; + /** + * Share Mode + * @default private + */ + share_mode: string; + /** Source Language */ + source_language: string | null; + /** Target Language */ + target_language: string | null; + /** Reviewed */ + reviewed: boolean; + /** Meeting Id */ + meeting_id: string | null; + source_kind: components["schemas"]["SourceKind"]; + /** Room Id */ + room_id?: string | null; + /** Room Name */ + room_name?: string | null; + /** Audio Deleted */ + audio_deleted?: boolean | null; + /** Participants */ + participants: components["schemas"]["TranscriptParticipant"][] | null; + }; + /** + * GetTranscriptWithText + * @description Transcript response with plain text format. + * + * Format: Speaker names followed by their dialogue, one line per segment. + * Example: + * John Smith: Hello everyone + * Jane Doe: Hi there + */ + GetTranscriptWithText: { + /** Id */ + id: string; + /** User Id */ + user_id: string | null; + /** Name */ + name: string; + /** + * Status + * @enum {string} + */ + status: + | "idle" + | "uploaded" + | "recording" + | "processing" + | "error" + | "ended"; + /** Locked */ + locked: boolean; + /** Duration */ + duration: number; + /** Title */ + title: string | null; + /** Short Summary */ + short_summary: string | null; + /** Long Summary */ + long_summary: string | null; + /** Created At */ + created_at: string; + /** + * Share Mode + * @default private + */ + share_mode: string; + /** Source Language */ + source_language: string | null; + /** Target Language */ + target_language: string | null; + /** Reviewed */ + reviewed: boolean; + /** Meeting Id */ + meeting_id: string | null; + source_kind: components["schemas"]["SourceKind"]; + /** Room Id */ + room_id?: string | null; + /** Room Name */ + room_name?: string | null; + /** Audio Deleted */ + audio_deleted?: boolean | null; + /** Participants */ + participants: components["schemas"]["TranscriptParticipant"][] | null; + /** + * @description discriminator enum property added by openapi-typescript + * @enum {string} + */ + transcript_format: "text"; + /** Transcript */ + transcript: string; + }; + /** + * GetTranscriptWithTextTimestamped + * @description Transcript response with timestamped text format. + * + * Format: [MM:SS] timestamp prefix before each speaker and dialogue. + * Example: + * [00:00] John Smith: Hello everyone + * [00:05] Jane Doe: Hi there + */ + GetTranscriptWithTextTimestamped: { + /** Id */ + id: string; + /** User Id */ + user_id: string | null; + /** Name */ + name: string; + /** + * Status + * @enum {string} + */ + status: + | "idle" + | "uploaded" + | "recording" + | "processing" + | "error" + | "ended"; + /** Locked */ + locked: boolean; + /** Duration */ + duration: number; + /** Title */ + title: string | null; + /** Short Summary */ + short_summary: string | null; + /** Long Summary */ + long_summary: string | null; + /** Created At */ + created_at: string; + /** + * Share Mode + * @default private + */ + share_mode: string; + /** Source Language */ + source_language: string | null; + /** Target Language */ + target_language: string | null; + /** Reviewed */ + reviewed: boolean; + /** Meeting Id */ + meeting_id: string | null; + source_kind: components["schemas"]["SourceKind"]; + /** Room Id */ + room_id?: string | null; + /** Room Name */ + room_name?: string | null; + /** Audio Deleted */ + audio_deleted?: boolean | null; + /** Participants */ + participants: components["schemas"]["TranscriptParticipant"][] | null; + /** + * @description discriminator enum property added by openapi-typescript + * @enum {string} + */ + transcript_format: "text-timestamped"; + /** Transcript */ + transcript: string; + }; + /** + * GetTranscriptWithWebVTTNamed + * @description Transcript response in WebVTT subtitle format with participant names. + * + * Format: Standard WebVTT with voice tags using participant names. + * Example: + * WEBVTT + * + * 00:00:00.000 --> 00:00:05.000 + * Hello everyone + */ + GetTranscriptWithWebVTTNamed: { + /** Id */ + id: string; + /** User Id */ + user_id: string | null; + /** Name */ + name: string; + /** + * Status + * @enum {string} + */ + status: + | "idle" + | "uploaded" + | "recording" + | "processing" + | "error" + | "ended"; + /** Locked */ + locked: boolean; + /** Duration */ + duration: number; + /** Title */ + title: string | null; + /** Short Summary */ + short_summary: string | null; + /** Long Summary */ + long_summary: string | null; + /** Created At */ + created_at: string; + /** + * Share Mode + * @default private + */ + share_mode: string; + /** Source Language */ + source_language: string | null; + /** Target Language */ + target_language: string | null; + /** Reviewed */ + reviewed: boolean; + /** Meeting Id */ + meeting_id: string | null; + source_kind: components["schemas"]["SourceKind"]; + /** Room Id */ + room_id?: string | null; + /** Room Name */ + room_name?: string | null; + /** Audio Deleted */ + audio_deleted?: boolean | null; + /** Participants */ + participants: components["schemas"]["TranscriptParticipant"][] | null; + /** + * @description discriminator enum property added by openapi-typescript + * @enum {string} + */ + transcript_format: "webvtt-named"; + /** Transcript */ + transcript: string; + }; /** HTTPValidationError */ HTTPValidationError: { /** Detail */ @@ -1233,7 +1523,6 @@ export interface components { } | null; /** * Platform - * @default whereby * @enum {string} */ platform: "whereby" | "daily"; @@ -1325,7 +1614,6 @@ export interface components { ics_last_etag?: string | null; /** * Platform - * @default whereby * @enum {string} */ platform: "whereby" | "daily"; @@ -1377,7 +1665,6 @@ export interface components { ics_last_etag?: string | null; /** * Platform - * @default whereby * @enum {string} */ platform: "whereby" | "daily"; @@ -1523,6 +1810,24 @@ export interface components { speaker: number | null; /** Name */ name: string; + /** User Id */ + user_id?: string | null; + }; + /** + * TranscriptSegment + * @description A single transcript segment with speaker and timing information. + */ + TranscriptSegment: { + /** Speaker */ + speaker: number; + /** Speaker Name */ + speaker_name: string; + /** Text */ + text: string; + /** Start */ + start: number; + /** End */ + end: number; }; /** UpdateParticipant */ UpdateParticipant: { @@ -2311,7 +2616,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["GetTranscript"]; + "application/json": components["schemas"]["GetTranscriptWithParticipants"]; }; }; /** @description Validation Error */ @@ -2369,7 +2674,13 @@ export interface operations { }; v1_transcript_get: { parameters: { - query?: never; + query?: { + transcript_format?: + | "text" + | "text-timestamped" + | "webvtt-named" + | "json"; + }; header?: never; path: { transcript_id: string; @@ -2384,7 +2695,11 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["GetTranscript"]; + "application/json": + | components["schemas"]["GetTranscriptWithText"] + | components["schemas"]["GetTranscriptWithTextTimestamped"] + | components["schemas"]["GetTranscriptWithWebVTTNamed"] + | components["schemas"]["GetTranscriptWithJSON"]; }; }; /** @description Validation Error */ @@ -2450,7 +2765,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["GetTranscript"]; + "application/json": components["schemas"]["GetTranscriptWithParticipants"]; }; }; /** @description Validation Error */ @@ -3256,11 +3571,7 @@ export interface operations { path?: never; cookie?: never; }; - requestBody: { - content: { - "application/json": components["schemas"]["DailyWebhookEvent"]; - }; - }; + requestBody?: never; responses: { /** @description Successful Response */ 200: { @@ -3271,15 +3582,6 @@ export interface operations { "application/json": unknown; }; }; - /** @description Validation Error */ - 422: { - headers: { - [name: string]: unknown; - }; - content: { - "application/json": components["schemas"]["HTTPValidationError"]; - }; - }; }; }; }