mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-29 18:36:49 +00:00
feat: add auto-generated captions, speaker-colored progress bar with sync controls, and speaker tooltip to cloud video player (#926)
* feat: webvtt captions inside video with sync controls * feat: highlight speaker timestamp progress bar
This commit is contained in:
committed by
GitHub
parent
e2ba502697
commit
f19113a3cf
@@ -228,6 +228,8 @@ export default function TranscriptDetails(details: TranscriptDetails) {
|
||||
duration={transcript.data?.cloud_video_duration ?? null}
|
||||
expanded={videoExpanded}
|
||||
onClose={() => setVideoExpanded(false)}
|
||||
sourceLanguage={transcript.data?.source_language ?? null}
|
||||
participants={transcript.data?.participants ?? null}
|
||||
/>
|
||||
</GridItem>
|
||||
)}
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import { useEffect, useState } from "react";
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
import { Box, Flex, Skeleton, Text } from "@chakra-ui/react";
|
||||
import { LuVideo, LuX } from "react-icons/lu";
|
||||
import { LuMinus, LuPlus, LuVideo, LuX } from "react-icons/lu";
|
||||
import { useAuth } from "../../lib/AuthProvider";
|
||||
import { API_URL } from "../../lib/apiClient";
|
||||
import { generateHighContrastColor } from "../../lib/utils";
|
||||
|
||||
type SpeakerInfo = { speaker: number | null; name: string };
|
||||
|
||||
type VideoPlayerProps = {
|
||||
transcriptId: string;
|
||||
duration: number | null;
|
||||
expanded: boolean;
|
||||
onClose: () => void;
|
||||
sourceLanguage?: string | null;
|
||||
participants?: SpeakerInfo[] | null;
|
||||
};
|
||||
|
||||
function formatDuration(seconds: number): string {
|
||||
@@ -20,15 +25,203 @@ function formatDuration(seconds: number): string {
|
||||
return `${m}:${String(s).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
const VTT_TIMESTAMP_RE =
|
||||
/(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})/g;
|
||||
|
||||
function parseVttTimestamp(ts: string): number {
|
||||
const [h, m, rest] = ts.split(":");
|
||||
const [s, ms] = rest.split(".");
|
||||
return Number(h) * 3600 + Number(m) * 60 + Number(s) + Number(ms) / 1000;
|
||||
}
|
||||
|
||||
function formatVttTimestamp(totalSeconds: number): string {
|
||||
const clamped = Math.max(0, totalSeconds);
|
||||
const h = Math.floor(clamped / 3600);
|
||||
const m = Math.floor((clamped % 3600) / 60);
|
||||
const s = Math.floor(clamped % 60);
|
||||
const ms = Math.round((clamped % 1) * 1000);
|
||||
return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(ms).padStart(3, "0")}`;
|
||||
}
|
||||
|
||||
function shiftVttTimestamps(vttContent: string, offsetSeconds: number): string {
|
||||
if (offsetSeconds === 0) return vttContent;
|
||||
return vttContent.replace(
|
||||
VTT_TIMESTAMP_RE,
|
||||
(_match, start: string, end: string) => {
|
||||
const newStart = formatVttTimestamp(
|
||||
parseVttTimestamp(start) + offsetSeconds,
|
||||
);
|
||||
const newEnd = formatVttTimestamp(parseVttTimestamp(end) + offsetSeconds);
|
||||
return `${newStart} --> ${newEnd}`;
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
type VttSegment = { start: number; end: number; speaker: string };
|
||||
|
||||
const VTT_CUE_RE =
|
||||
/(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})\n<v ([^>]+)>/g;
|
||||
|
||||
function parseVttSegments(vttContent: string): VttSegment[] {
|
||||
const segments: VttSegment[] = [];
|
||||
let match;
|
||||
while ((match = VTT_CUE_RE.exec(vttContent)) !== null) {
|
||||
segments.push({
|
||||
start: parseVttTimestamp(match[1]),
|
||||
end: parseVttTimestamp(match[2]),
|
||||
speaker: match[3],
|
||||
});
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
|
||||
// Same background as TopicSegment so speaker colors match the transcript UI
|
||||
const SPEAKER_COLOR_BG: [number, number, number] = [96, 165, 250];
|
||||
|
||||
function SpeakerProgressBar({
|
||||
segments,
|
||||
videoDuration,
|
||||
currentTime,
|
||||
captionOffset,
|
||||
onSeek,
|
||||
participants,
|
||||
}: {
|
||||
segments: VttSegment[];
|
||||
videoDuration: number;
|
||||
currentTime: number;
|
||||
captionOffset: number;
|
||||
onSeek: (time: number) => void;
|
||||
participants?: SpeakerInfo[] | null;
|
||||
}) {
|
||||
const barRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Build a name→"Speaker N" reverse lookup so colors match TopicSegment
|
||||
const speakerColors = useMemo(() => {
|
||||
const nameToColorKey: Record<string, string> = {};
|
||||
if (participants) {
|
||||
for (const p of participants) {
|
||||
if (p.speaker != null) {
|
||||
nameToColorKey[p.name] = `Speaker ${p.speaker}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
const map: Record<string, string | undefined> = {};
|
||||
for (const seg of segments) {
|
||||
if (!map[seg.speaker]) {
|
||||
const colorKey = nameToColorKey[seg.speaker] ?? seg.speaker;
|
||||
map[seg.speaker] = generateHighContrastColor(
|
||||
colorKey,
|
||||
SPEAKER_COLOR_BG,
|
||||
);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}, [segments, participants]);
|
||||
|
||||
const activeSpeaker = useMemo(() => {
|
||||
for (const seg of segments) {
|
||||
const adjStart = seg.start + captionOffset;
|
||||
const adjEnd = seg.end + captionOffset;
|
||||
if (currentTime >= adjStart && currentTime < adjEnd) {
|
||||
return seg.speaker;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}, [segments, currentTime, captionOffset]);
|
||||
|
||||
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
if (!barRef.current || !videoDuration) return;
|
||||
const rect = barRef.current.getBoundingClientRect();
|
||||
const fraction = Math.max(
|
||||
0,
|
||||
Math.min(1, (e.clientX - rect.left) / rect.width),
|
||||
);
|
||||
onSeek(fraction * videoDuration);
|
||||
};
|
||||
|
||||
const progressPct =
|
||||
videoDuration > 0 ? (currentTime / videoDuration) * 100 : 0;
|
||||
|
||||
return (
|
||||
<Box position="relative" mb={4}>
|
||||
<Box
|
||||
ref={barRef}
|
||||
position="relative"
|
||||
h="8px"
|
||||
bg="gray.700"
|
||||
cursor="pointer"
|
||||
onClick={handleClick}
|
||||
borderBottomRadius="md"
|
||||
overflow="hidden"
|
||||
>
|
||||
{segments.map((seg, i) => {
|
||||
const adjStart = Math.max(0, seg.start + captionOffset);
|
||||
const adjEnd = Math.max(0, seg.end + captionOffset);
|
||||
if (adjEnd <= 0 || adjStart >= videoDuration) return null;
|
||||
const leftPct = (adjStart / videoDuration) * 100;
|
||||
const widthPct = ((adjEnd - adjStart) / videoDuration) * 100;
|
||||
return (
|
||||
<Box
|
||||
key={i}
|
||||
position="absolute"
|
||||
top={0}
|
||||
bottom={0}
|
||||
left={`${leftPct}%`}
|
||||
width={`${widthPct}%`}
|
||||
bg={speakerColors[seg.speaker]}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
{/* Playhead */}
|
||||
<Box
|
||||
position="absolute"
|
||||
top={0}
|
||||
bottom={0}
|
||||
left={`${progressPct}%`}
|
||||
w="2px"
|
||||
bg="white"
|
||||
zIndex={1}
|
||||
pointerEvents="none"
|
||||
/>
|
||||
</Box>
|
||||
{/* Speaker tooltip below the bar */}
|
||||
{activeSpeaker && (
|
||||
<Text
|
||||
position="absolute"
|
||||
top="10px"
|
||||
left={`${progressPct}%`}
|
||||
transform="translateX(-50%)"
|
||||
fontSize="2xs"
|
||||
color={speakerColors[activeSpeaker]}
|
||||
fontWeight="semibold"
|
||||
whiteSpace="nowrap"
|
||||
pointerEvents="none"
|
||||
>
|
||||
{activeSpeaker}
|
||||
</Text>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
export default function VideoPlayer({
|
||||
transcriptId,
|
||||
duration,
|
||||
expanded,
|
||||
onClose,
|
||||
sourceLanguage,
|
||||
participants,
|
||||
}: VideoPlayerProps) {
|
||||
const [videoUrl, setVideoUrl] = useState<string | null>(null);
|
||||
const [rawVtt, setRawVtt] = useState<string | null>(null);
|
||||
const [captionsUrl, setCaptionsUrl] = useState<string | null>(null);
|
||||
const [captionOffset, setCaptionOffset] = useState(0);
|
||||
const [currentTime, setCurrentTime] = useState(0);
|
||||
const [videoDuration, setVideoDuration] = useState(0);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const prevBlobUrl = useRef<string | null>(null);
|
||||
const videoRef = useRef<HTMLVideoElement>(null);
|
||||
const auth = useAuth();
|
||||
const accessToken = auth.status === "authenticated" ? auth.accessToken : null;
|
||||
|
||||
@@ -63,6 +256,99 @@ export default function VideoPlayer({
|
||||
fetchVideoUrl();
|
||||
}, [expanded, transcriptId, accessToken, videoUrl]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!videoUrl || !transcriptId) return;
|
||||
|
||||
let cancelled = false;
|
||||
|
||||
const fetchCaptions = async () => {
|
||||
try {
|
||||
const url = `${API_URL}/v1/transcripts/${transcriptId}?transcript_format=webvtt-named`;
|
||||
const headers: Record<string, string> = {};
|
||||
if (accessToken) {
|
||||
headers["Authorization"] = `Bearer ${accessToken}`;
|
||||
}
|
||||
const resp = await fetch(url, { headers });
|
||||
if (!resp.ok) return;
|
||||
const data = await resp.json();
|
||||
const vttContent = data?.transcript;
|
||||
if (!vttContent || cancelled) return;
|
||||
setRawVtt(vttContent);
|
||||
} catch {
|
||||
// Captions are non-critical — fail silently
|
||||
}
|
||||
};
|
||||
|
||||
fetchCaptions();
|
||||
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [videoUrl, transcriptId, accessToken]);
|
||||
|
||||
// Rebuild blob URL whenever rawVtt or captionOffset changes
|
||||
useEffect(() => {
|
||||
if (!rawVtt) return;
|
||||
|
||||
const shifted = shiftVttTimestamps(rawVtt, captionOffset);
|
||||
const blob = new Blob([shifted], { type: "text/vtt" });
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
|
||||
if (prevBlobUrl.current) {
|
||||
URL.revokeObjectURL(prevBlobUrl.current);
|
||||
}
|
||||
prevBlobUrl.current = blobUrl;
|
||||
setCaptionsUrl(blobUrl);
|
||||
|
||||
return () => {
|
||||
URL.revokeObjectURL(blobUrl);
|
||||
prevBlobUrl.current = null;
|
||||
};
|
||||
}, [rawVtt, captionOffset]);
|
||||
|
||||
const adjustOffset = useCallback((delta: number) => {
|
||||
setCaptionOffset((prev) => Math.round((prev + delta) * 10) / 10);
|
||||
}, []);
|
||||
|
||||
const formattedOffset = useMemo(() => {
|
||||
const sign = captionOffset >= 0 ? "+" : "";
|
||||
return `${sign}${captionOffset.toFixed(1)}s`;
|
||||
}, [captionOffset]);
|
||||
|
||||
const segments = useMemo(
|
||||
() => (rawVtt ? parseVttSegments(rawVtt) : []),
|
||||
[rawVtt],
|
||||
);
|
||||
|
||||
// Track video currentTime and duration
|
||||
useEffect(() => {
|
||||
const video = videoRef.current;
|
||||
if (!video) return;
|
||||
|
||||
const onTimeUpdate = () => setCurrentTime(video.currentTime);
|
||||
const onDurationChange = () => {
|
||||
if (video.duration && isFinite(video.duration)) {
|
||||
setVideoDuration(video.duration);
|
||||
}
|
||||
};
|
||||
|
||||
video.addEventListener("timeupdate", onTimeUpdate);
|
||||
video.addEventListener("loadedmetadata", onDurationChange);
|
||||
video.addEventListener("durationchange", onDurationChange);
|
||||
|
||||
return () => {
|
||||
video.removeEventListener("timeupdate", onTimeUpdate);
|
||||
video.removeEventListener("loadedmetadata", onDurationChange);
|
||||
video.removeEventListener("durationchange", onDurationChange);
|
||||
};
|
||||
}, [videoUrl]);
|
||||
|
||||
const handleSeek = useCallback((time: number) => {
|
||||
if (videoRef.current) {
|
||||
videoRef.current.currentTime = time;
|
||||
}
|
||||
}, []);
|
||||
|
||||
if (!expanded) return null;
|
||||
|
||||
if (loading) {
|
||||
@@ -117,22 +403,64 @@ export default function VideoPlayer({
|
||||
</Text>
|
||||
)}
|
||||
</Flex>
|
||||
<Flex
|
||||
align="center"
|
||||
justify="center"
|
||||
borderRadius="full"
|
||||
p={1}
|
||||
cursor="pointer"
|
||||
onClick={onClose}
|
||||
_hover={{ bg: "whiteAlpha.300" }}
|
||||
transition="background 0.15s"
|
||||
>
|
||||
<LuX size={14} color="white" />
|
||||
<Flex align="center" gap={3}>
|
||||
{rawVtt && (
|
||||
<Flex align="center" gap={1}>
|
||||
<Text fontSize="2xs" color="gray.400">
|
||||
CC sync
|
||||
</Text>
|
||||
<Flex
|
||||
align="center"
|
||||
justify="center"
|
||||
borderRadius="sm"
|
||||
p={0.5}
|
||||
cursor="pointer"
|
||||
onClick={() => adjustOffset(-0.5)}
|
||||
_hover={{ bg: "whiteAlpha.300" }}
|
||||
transition="background 0.15s"
|
||||
>
|
||||
<LuMinus size={12} color="white" />
|
||||
</Flex>
|
||||
<Text
|
||||
fontSize="2xs"
|
||||
color="gray.300"
|
||||
fontFamily="mono"
|
||||
minW="3.5em"
|
||||
textAlign="center"
|
||||
>
|
||||
{formattedOffset}
|
||||
</Text>
|
||||
<Flex
|
||||
align="center"
|
||||
justify="center"
|
||||
borderRadius="sm"
|
||||
p={0.5}
|
||||
cursor="pointer"
|
||||
onClick={() => adjustOffset(0.5)}
|
||||
_hover={{ bg: "whiteAlpha.300" }}
|
||||
transition="background 0.15s"
|
||||
>
|
||||
<LuPlus size={12} color="white" />
|
||||
</Flex>
|
||||
</Flex>
|
||||
)}
|
||||
<Flex
|
||||
align="center"
|
||||
justify="center"
|
||||
borderRadius="full"
|
||||
p={1}
|
||||
cursor="pointer"
|
||||
onClick={onClose}
|
||||
_hover={{ bg: "whiteAlpha.300" }}
|
||||
transition="background 0.15s"
|
||||
>
|
||||
<LuX size={14} color="white" />
|
||||
</Flex>
|
||||
</Flex>
|
||||
</Flex>
|
||||
{/* Video element with visible controls */}
|
||||
{/* eslint-disable-next-line jsx-a11y/media-has-caption */}
|
||||
<video
|
||||
ref={videoRef}
|
||||
src={videoUrl}
|
||||
controls
|
||||
autoPlay
|
||||
@@ -147,10 +475,34 @@ export default function VideoPlayer({
|
||||
minHeight: "180px",
|
||||
objectFit: "contain",
|
||||
background: "black",
|
||||
borderBottomLeftRadius: "0.375rem",
|
||||
borderBottomRightRadius: "0.375rem",
|
||||
...(segments.length === 0
|
||||
? {
|
||||
borderBottomLeftRadius: "0.375rem",
|
||||
borderBottomRightRadius: "0.375rem",
|
||||
}
|
||||
: {}),
|
||||
}}
|
||||
/>
|
||||
>
|
||||
{captionsUrl && (
|
||||
<track
|
||||
kind="captions"
|
||||
src={captionsUrl}
|
||||
srcLang={sourceLanguage || "en"}
|
||||
label="Auto-generated captions"
|
||||
default
|
||||
/>
|
||||
)}
|
||||
</video>
|
||||
{segments.length > 0 && videoDuration > 0 && (
|
||||
<SpeakerProgressBar
|
||||
segments={segments}
|
||||
videoDuration={videoDuration}
|
||||
currentTime={currentTime}
|
||||
captionOffset={captionOffset}
|
||||
onSeek={handleSeek}
|
||||
participants={participants}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user