Files
reflector/server/reflector/views/transcripts_process.py
Mathieu Virbel f6ca07505f feat: add transcript format parameter to GET endpoint (#709)
* feat: add transcript format parameter to GET endpoint

Add transcript_format query parameter to /v1/transcripts/{id} endpoint
with support for multiple output formats using discriminated unions.

Formats supported:
- text: Plain speaker dialogue (default)
- text-timestamped: Dialogue with [MM:SS] timestamps
- webvtt-named: WebVTT subtitles with participant names
- json: Structured segments with full metadata

Response models use Pydantic discriminated unions with transcript_format
as discriminator field. POST/PATCH endpoints return GetTranscriptWithParticipants
for minimal responses. GET endpoint returns format-specific models.

* Copy transcript format

* Regenerate types

* Fix transcript formats

* Don't throw inside try

* Remove any type

* Toast share copy errors

* transcript_format exhaustiveness and python idiomatic assert_never

* format_timestamp_mmss clear type definition

* Rename seconds_to_timestamp

* Test transcript format with overlapping speakers

* exact match for vtt multispeaker test

---------

Co-authored-by: Sergey Mankovsky <sergey@monadical.com>
Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2025-11-26 18:51:14 +01:00

55 lines
1.7 KiB
Python

from typing import Annotated, Optional, assert_never
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
import reflector.auth as auth
from reflector.db.transcripts import transcripts_controller
from reflector.services.transcript_process import (
ProcessError,
ValidationAlreadyScheduled,
ValidationError,
ValidationLocked,
ValidationOk,
dispatch_transcript_processing,
prepare_transcript_processing,
validate_transcript_for_processing,
)
router = APIRouter()
class ProcessStatus(BaseModel):
status: str
@router.post("/transcripts/{transcript_id}/process")
async def transcript_process(
transcript_id: str,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
) -> ProcessStatus:
user_id = user["sub"] if user else None
transcript = await transcripts_controller.get_by_id_for_http(
transcript_id, user_id=user_id
)
validation = await validate_transcript_for_processing(transcript)
if isinstance(validation, ValidationLocked):
raise HTTPException(status_code=400, detail=validation.detail)
elif isinstance(validation, ValidationError):
raise HTTPException(status_code=400, detail=validation.detail)
elif isinstance(validation, ValidationAlreadyScheduled):
return ProcessStatus(status=validation.detail)
elif isinstance(validation, ValidationOk):
pass
else:
assert_never(validation)
config = await prepare_transcript_processing(validation)
if isinstance(config, ProcessError):
raise HTTPException(status_code=500, detail=config.detail)
else:
dispatch_transcript_processing(config)
return ProcessStatus(status="ok")