feat: add transcript format parameter to GET endpoint (#709)

* feat: add transcript format parameter to GET endpoint Add transcript_format query parameter to /v1/transcripts/{id} endpoint with support for multiple output formats using discriminated unions. Formats supported: - text: Plain speaker dialogue (default) - text-timestamped: Dialogue with [MM:SS] timestamps - webvtt-named: WebVTT subtitles with participant names - json: Structured segments with full metadata Response models use Pydantic discriminated unions with transcript_format as discriminator field. POST/PATCH endpoints return GetTranscriptWithParticipants for minimal responses. GET endpoint returns format-specific models. * Copy transcript format * Regenerate types * Fix transcript formats * Don't throw inside try * Remove any type * Toast share copy errors * transcript_format exhaustiveness and python idiomatic assert_never * format_timestamp_mmss clear type definition * Rename seconds_to_timestamp * Test transcript format with overlapping speakers * exact match for vtt multispeaker test --------- Co-authored-by: Sergey Mankovsky <sergey@monadical.com> Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-04-19 19:56:58 +00:00 · 2025-11-26 11:51:14 -06:00
parent 3aef926203
commit f6ca07505f
12 changed files with 1625 additions and 138 deletions
--- a/server/reflector/views/transcripts.py
+++ b/server/reflector/views/transcripts.py
@@ -1,11 +1,18 @@
 from datetime import datetime, timedelta, timezone
-from typing import Annotated, Literal, Optional
+from typing import Annotated, Literal, Optional, assert_never

 from fastapi import APIRouter, Depends, HTTPException, Query
 from fastapi_pagination import Page
 from fastapi_pagination.ext.databases import apaginate
 from jose import jwt
-from pydantic import AwareDatetime, BaseModel, Field, constr, field_serializer
+from pydantic import (
+    AwareDatetime,
+    BaseModel,
+    Discriminator,
+    Field,
+    constr,
+    field_serializer,
+)

 import reflector.auth as auth
 from reflector.db import get_database
@@ -31,7 +38,14 @@ from reflector.db.transcripts import (
 )
 from reflector.processors.types import Transcript as ProcessorTranscript
 from reflector.processors.types import Word
+from reflector.schemas.transcript_formats import TranscriptFormat, TranscriptSegment
 from reflector.settings import settings
+from reflector.utils.transcript_formats import (
+    topics_to_webvtt_named,
+    transcript_to_json_segments,
+    transcript_to_text,
+    transcript_to_text_timestamped,
+)
 from reflector.ws_manager import get_ws_manager
 from reflector.zulip import (
    InvalidMessageError,
@@ -88,10 +102,84 @@ class GetTranscriptMinimal(BaseModel):
    audio_deleted: bool | None = None


-class GetTranscript(GetTranscriptMinimal):
+class GetTranscriptWithParticipants(GetTranscriptMinimal):
    participants: list[TranscriptParticipant] | None


+class GetTranscriptWithText(GetTranscriptWithParticipants):
+    """
+    Transcript response with plain text format.
+
+    Format: Speaker names followed by their dialogue, one line per segment.
+    Example:
+        John Smith: Hello everyone
+        Jane Doe: Hi there
+    """
+
+    transcript_format: Literal["text"] = "text"
+    transcript: str
+
+
+class GetTranscriptWithTextTimestamped(GetTranscriptWithParticipants):
+    """
+    Transcript response with timestamped text format.
+
+    Format: [MM:SS] timestamp prefix before each speaker and dialogue.
+    Example:
+        [00:00] John Smith: Hello everyone
+        [00:05] Jane Doe: Hi there
+    """
+
+    transcript_format: Literal["text-timestamped"] = "text-timestamped"
+    transcript: str
+
+
+class GetTranscriptWithWebVTTNamed(GetTranscriptWithParticipants):
+    """
+    Transcript response in WebVTT subtitle format with participant names.
+
+    Format: Standard WebVTT with voice tags using participant names.
+    Example:
+        WEBVTT
+
+        00:00:00.000 --> 00:00:05.000
+        <v John Smith>Hello everyone
+    """
+
+    transcript_format: Literal["webvtt-named"] = "webvtt-named"
+    transcript: str
+
+
+class GetTranscriptWithJSON(GetTranscriptWithParticipants):
+    """
+    Transcript response as structured JSON segments.
+
+    Format: Array of segment objects with speaker info, text, and timing.
+    Example:
+        [
+            {
+                "speaker": 0,
+                "speaker_name": "John Smith",
+                "text": "Hello everyone",
+                "start": 0.0,
+                "end": 5.0
+            }
+        ]
+    """
+
+    transcript_format: Literal["json"] = "json"
+    transcript: list[TranscriptSegment]
+
+
+GetTranscript = Annotated[
+    GetTranscriptWithText
+    | GetTranscriptWithTextTimestamped
+    | GetTranscriptWithWebVTTNamed
+    | GetTranscriptWithJSON,
+    Discriminator("transcript_format"),
+]
+
+
 class CreateTranscript(BaseModel):
    name: str
    source_language: str = Field("en")
@@ -228,7 +316,7 @@ async def transcripts_search(
    )


-@router.post("/transcripts", response_model=GetTranscript)
+@router.post("/transcripts", response_model=GetTranscriptWithParticipants)
 async def transcripts_create(
    info: CreateTranscript,
    user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
@@ -362,14 +450,72 @@ class GetTranscriptTopicWithWordsPerSpeaker(GetTranscriptTopic):
 async def transcript_get(
    transcript_id: str,
    user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
+    transcript_format: TranscriptFormat = "text",
 ):
    user_id = user["sub"] if user else None
-    return await transcripts_controller.get_by_id_for_http(
+    transcript = await transcripts_controller.get_by_id_for_http(
        transcript_id, user_id=user_id
    )

+    base_data = {
+        "id": transcript.id,
+        "user_id": transcript.user_id,
+        "name": transcript.name,
+        "status": transcript.status,
+        "locked": transcript.locked,
+        "duration": transcript.duration,
+        "title": transcript.title,
+        "short_summary": transcript.short_summary,
+        "long_summary": transcript.long_summary,
+        "created_at": transcript.created_at,
+        "share_mode": transcript.share_mode,
+        "source_language": transcript.source_language,
+        "target_language": transcript.target_language,
+        "reviewed": transcript.reviewed,
+        "meeting_id": transcript.meeting_id,
+        "source_kind": transcript.source_kind,
+        "room_id": transcript.room_id,
+        "audio_deleted": transcript.audio_deleted,
+        "participants": transcript.participants,
+    }

-@router.patch("/transcripts/{transcript_id}", response_model=GetTranscript)
+    if transcript_format == "text":
+        return GetTranscriptWithText(
+            **base_data,
+            transcript_format="text",
+            transcript=transcript_to_text(transcript.topics, transcript.participants),
+        )
+    elif transcript_format == "text-timestamped":
+        return GetTranscriptWithTextTimestamped(
+            **base_data,
+            transcript_format="text-timestamped",
+            transcript=transcript_to_text_timestamped(
+                transcript.topics, transcript.participants
+            ),
+        )
+    elif transcript_format == "webvtt-named":
+        return GetTranscriptWithWebVTTNamed(
+            **base_data,
+            transcript_format="webvtt-named",
+            transcript=topics_to_webvtt_named(
+                transcript.topics, transcript.participants
+            ),
+        )
+    elif transcript_format == "json":
+        return GetTranscriptWithJSON(
+            **base_data,
+            transcript_format="json",
+            transcript=transcript_to_json_segments(
+                transcript.topics, transcript.participants
+            ),
+        )
+    else:
+        assert_never(transcript_format)
+
+
+@router.patch(
+    "/transcripts/{transcript_id}", response_model=GetTranscriptWithParticipants
+)
 async def transcript_update(
    transcript_id: str,
    info: UpdateTranscript,
--- a/server/reflector/views/transcripts_process.py
+++ b/server/reflector/views/transcripts_process.py
@@ -1,4 +1,4 @@
-from typing import Annotated, Optional
+from typing import Annotated, Optional, assert_never

 from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel
@@ -15,7 +15,6 @@ from reflector.services.transcript_process import (
    prepare_transcript_processing,
    validate_transcript_for_processing,
 )
-from reflector.utils.match import absurd

 router = APIRouter()

@@ -44,7 +43,7 @@ async def transcript_process(
    elif isinstance(validation, ValidationOk):
        pass
    else:
-        absurd(validation)
+        assert_never(validation)

    config = await prepare_transcript_processing(validation)