mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
* feat: add transcript format parameter to GET endpoint
Add transcript_format query parameter to /v1/transcripts/{id} endpoint
with support for multiple output formats using discriminated unions.
Formats supported:
- text: Plain speaker dialogue (default)
- text-timestamped: Dialogue with [MM:SS] timestamps
- webvtt-named: WebVTT subtitles with participant names
- json: Structured segments with full metadata
Response models use Pydantic discriminated unions with transcript_format
as discriminator field. POST/PATCH endpoints return GetTranscriptWithParticipants
for minimal responses. GET endpoint returns format-specific models.
* Copy transcript format
* Regenerate types
* Fix transcript formats
* Don't throw inside try
* Remove any type
* Toast share copy errors
* transcript_format exhaustiveness and python idiomatic assert_never
* format_timestamp_mmss clear type definition
* Rename seconds_to_timestamp
* Test transcript format with overlapping speakers
* exact match for vtt multispeaker test
---------
Co-authored-by: Sergey Mankovsky <sergey@monadical.com>
Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
64 lines
1.8 KiB
Python
64 lines
1.8 KiB
Python
"""WebVTT utilities for generating subtitle files from transcript data."""
|
|
|
|
from typing import TYPE_CHECKING, Annotated
|
|
|
|
import webvtt
|
|
|
|
from reflector.processors.types import Seconds, Word, words_to_segments
|
|
|
|
if TYPE_CHECKING:
|
|
from reflector.db.transcripts import TranscriptTopic
|
|
|
|
VttTimestamp = Annotated[str, "vtt_timestamp"]
|
|
WebVTTStr = Annotated[str, "webvtt_str"]
|
|
|
|
|
|
def seconds_to_timestamp(seconds: Seconds) -> VttTimestamp:
|
|
# lib doesn't do that
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
milliseconds = int((seconds % 1) * 1000)
|
|
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}.{milliseconds:03d}"
|
|
|
|
|
|
def words_to_webvtt(words: list[Word]) -> WebVTTStr:
|
|
"""Convert words to WebVTT using existing segmentation logic."""
|
|
vtt = webvtt.WebVTT()
|
|
if not words:
|
|
return vtt.content
|
|
|
|
segments = words_to_segments(words)
|
|
|
|
for segment in segments:
|
|
text = segment.text.strip()
|
|
# lib doesn't do that
|
|
text = f"<v Speaker{segment.speaker}>{text}"
|
|
|
|
caption = webvtt.Caption(
|
|
start=seconds_to_timestamp(segment.start),
|
|
end=seconds_to_timestamp(segment.end),
|
|
text=text,
|
|
)
|
|
vtt.captions.append(caption)
|
|
|
|
return vtt.content
|
|
|
|
|
|
def topics_to_webvtt(topics: list["TranscriptTopic"]) -> WebVTTStr:
|
|
if not topics:
|
|
return webvtt.WebVTT().content
|
|
|
|
all_words: list[Word] = []
|
|
for topic in topics:
|
|
all_words.extend(topic.words)
|
|
|
|
# assert it's in sequence
|
|
for i in range(len(all_words) - 1):
|
|
assert (
|
|
all_words[i].start <= all_words[i + 1].start
|
|
), f"Words are not in sequence: {all_words[i].text} and {all_words[i + 1].text} are not consecutive: {all_words[i].start} > {all_words[i + 1].start}"
|
|
|
|
return words_to_webvtt(all_words)
|