mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-24 06:05:19 +00:00
feat: add auto-generated captions, speaker-colored progress bar with sync controls, and speaker tooltip to cloud video player (#926)
* feat: webvtt captions inside video with sync controls * feat: highlight speaker timestamp progress bar
This commit is contained in:
committed by
GitHub
parent
e2ba502697
commit
f19113a3cf
@@ -228,6 +228,8 @@ export default function TranscriptDetails(details: TranscriptDetails) {
|
|||||||
duration={transcript.data?.cloud_video_duration ?? null}
|
duration={transcript.data?.cloud_video_duration ?? null}
|
||||||
expanded={videoExpanded}
|
expanded={videoExpanded}
|
||||||
onClose={() => setVideoExpanded(false)}
|
onClose={() => setVideoExpanded(false)}
|
||||||
|
sourceLanguage={transcript.data?.source_language ?? null}
|
||||||
|
participants={transcript.data?.participants ?? null}
|
||||||
/>
|
/>
|
||||||
</GridItem>
|
</GridItem>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@@ -1,14 +1,19 @@
|
|||||||
import { useEffect, useState } from "react";
|
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||||
import { Box, Flex, Skeleton, Text } from "@chakra-ui/react";
|
import { Box, Flex, Skeleton, Text } from "@chakra-ui/react";
|
||||||
import { LuVideo, LuX } from "react-icons/lu";
|
import { LuMinus, LuPlus, LuVideo, LuX } from "react-icons/lu";
|
||||||
import { useAuth } from "../../lib/AuthProvider";
|
import { useAuth } from "../../lib/AuthProvider";
|
||||||
import { API_URL } from "../../lib/apiClient";
|
import { API_URL } from "../../lib/apiClient";
|
||||||
|
import { generateHighContrastColor } from "../../lib/utils";
|
||||||
|
|
||||||
|
type SpeakerInfo = { speaker: number | null; name: string };
|
||||||
|
|
||||||
type VideoPlayerProps = {
|
type VideoPlayerProps = {
|
||||||
transcriptId: string;
|
transcriptId: string;
|
||||||
duration: number | null;
|
duration: number | null;
|
||||||
expanded: boolean;
|
expanded: boolean;
|
||||||
onClose: () => void;
|
onClose: () => void;
|
||||||
|
sourceLanguage?: string | null;
|
||||||
|
participants?: SpeakerInfo[] | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
function formatDuration(seconds: number): string {
|
function formatDuration(seconds: number): string {
|
||||||
@@ -20,15 +25,203 @@ function formatDuration(seconds: number): string {
|
|||||||
return `${m}:${String(s).padStart(2, "0")}`;
|
return `${m}:${String(s).padStart(2, "0")}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const VTT_TIMESTAMP_RE =
|
||||||
|
/(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})/g;
|
||||||
|
|
||||||
|
function parseVttTimestamp(ts: string): number {
|
||||||
|
const [h, m, rest] = ts.split(":");
|
||||||
|
const [s, ms] = rest.split(".");
|
||||||
|
return Number(h) * 3600 + Number(m) * 60 + Number(s) + Number(ms) / 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatVttTimestamp(totalSeconds: number): string {
|
||||||
|
const clamped = Math.max(0, totalSeconds);
|
||||||
|
const h = Math.floor(clamped / 3600);
|
||||||
|
const m = Math.floor((clamped % 3600) / 60);
|
||||||
|
const s = Math.floor(clamped % 60);
|
||||||
|
const ms = Math.round((clamped % 1) * 1000);
|
||||||
|
return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(ms).padStart(3, "0")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function shiftVttTimestamps(vttContent: string, offsetSeconds: number): string {
|
||||||
|
if (offsetSeconds === 0) return vttContent;
|
||||||
|
return vttContent.replace(
|
||||||
|
VTT_TIMESTAMP_RE,
|
||||||
|
(_match, start: string, end: string) => {
|
||||||
|
const newStart = formatVttTimestamp(
|
||||||
|
parseVttTimestamp(start) + offsetSeconds,
|
||||||
|
);
|
||||||
|
const newEnd = formatVttTimestamp(parseVttTimestamp(end) + offsetSeconds);
|
||||||
|
return `${newStart} --> ${newEnd}`;
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
type VttSegment = { start: number; end: number; speaker: string };
|
||||||
|
|
||||||
|
const VTT_CUE_RE =
|
||||||
|
/(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})\n<v ([^>]+)>/g;
|
||||||
|
|
||||||
|
function parseVttSegments(vttContent: string): VttSegment[] {
|
||||||
|
const segments: VttSegment[] = [];
|
||||||
|
let match;
|
||||||
|
while ((match = VTT_CUE_RE.exec(vttContent)) !== null) {
|
||||||
|
segments.push({
|
||||||
|
start: parseVttTimestamp(match[1]),
|
||||||
|
end: parseVttTimestamp(match[2]),
|
||||||
|
speaker: match[3],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return segments;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same background as TopicSegment so speaker colors match the transcript UI
|
||||||
|
const SPEAKER_COLOR_BG: [number, number, number] = [96, 165, 250];
|
||||||
|
|
||||||
|
function SpeakerProgressBar({
|
||||||
|
segments,
|
||||||
|
videoDuration,
|
||||||
|
currentTime,
|
||||||
|
captionOffset,
|
||||||
|
onSeek,
|
||||||
|
participants,
|
||||||
|
}: {
|
||||||
|
segments: VttSegment[];
|
||||||
|
videoDuration: number;
|
||||||
|
currentTime: number;
|
||||||
|
captionOffset: number;
|
||||||
|
onSeek: (time: number) => void;
|
||||||
|
participants?: SpeakerInfo[] | null;
|
||||||
|
}) {
|
||||||
|
const barRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
|
// Build a name→"Speaker N" reverse lookup so colors match TopicSegment
|
||||||
|
const speakerColors = useMemo(() => {
|
||||||
|
const nameToColorKey: Record<string, string> = {};
|
||||||
|
if (participants) {
|
||||||
|
for (const p of participants) {
|
||||||
|
if (p.speaker != null) {
|
||||||
|
nameToColorKey[p.name] = `Speaker ${p.speaker}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const map: Record<string, string | undefined> = {};
|
||||||
|
for (const seg of segments) {
|
||||||
|
if (!map[seg.speaker]) {
|
||||||
|
const colorKey = nameToColorKey[seg.speaker] ?? seg.speaker;
|
||||||
|
map[seg.speaker] = generateHighContrastColor(
|
||||||
|
colorKey,
|
||||||
|
SPEAKER_COLOR_BG,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}, [segments, participants]);
|
||||||
|
|
||||||
|
const activeSpeaker = useMemo(() => {
|
||||||
|
for (const seg of segments) {
|
||||||
|
const adjStart = seg.start + captionOffset;
|
||||||
|
const adjEnd = seg.end + captionOffset;
|
||||||
|
if (currentTime >= adjStart && currentTime < adjEnd) {
|
||||||
|
return seg.speaker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}, [segments, currentTime, captionOffset]);
|
||||||
|
|
||||||
|
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||||
|
if (!barRef.current || !videoDuration) return;
|
||||||
|
const rect = barRef.current.getBoundingClientRect();
|
||||||
|
const fraction = Math.max(
|
||||||
|
0,
|
||||||
|
Math.min(1, (e.clientX - rect.left) / rect.width),
|
||||||
|
);
|
||||||
|
onSeek(fraction * videoDuration);
|
||||||
|
};
|
||||||
|
|
||||||
|
const progressPct =
|
||||||
|
videoDuration > 0 ? (currentTime / videoDuration) * 100 : 0;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box position="relative" mb={4}>
|
||||||
|
<Box
|
||||||
|
ref={barRef}
|
||||||
|
position="relative"
|
||||||
|
h="8px"
|
||||||
|
bg="gray.700"
|
||||||
|
cursor="pointer"
|
||||||
|
onClick={handleClick}
|
||||||
|
borderBottomRadius="md"
|
||||||
|
overflow="hidden"
|
||||||
|
>
|
||||||
|
{segments.map((seg, i) => {
|
||||||
|
const adjStart = Math.max(0, seg.start + captionOffset);
|
||||||
|
const adjEnd = Math.max(0, seg.end + captionOffset);
|
||||||
|
if (adjEnd <= 0 || adjStart >= videoDuration) return null;
|
||||||
|
const leftPct = (adjStart / videoDuration) * 100;
|
||||||
|
const widthPct = ((adjEnd - adjStart) / videoDuration) * 100;
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
key={i}
|
||||||
|
position="absolute"
|
||||||
|
top={0}
|
||||||
|
bottom={0}
|
||||||
|
left={`${leftPct}%`}
|
||||||
|
width={`${widthPct}%`}
|
||||||
|
bg={speakerColors[seg.speaker]}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
{/* Playhead */}
|
||||||
|
<Box
|
||||||
|
position="absolute"
|
||||||
|
top={0}
|
||||||
|
bottom={0}
|
||||||
|
left={`${progressPct}%`}
|
||||||
|
w="2px"
|
||||||
|
bg="white"
|
||||||
|
zIndex={1}
|
||||||
|
pointerEvents="none"
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
{/* Speaker tooltip below the bar */}
|
||||||
|
{activeSpeaker && (
|
||||||
|
<Text
|
||||||
|
position="absolute"
|
||||||
|
top="10px"
|
||||||
|
left={`${progressPct}%`}
|
||||||
|
transform="translateX(-50%)"
|
||||||
|
fontSize="2xs"
|
||||||
|
color={speakerColors[activeSpeaker]}
|
||||||
|
fontWeight="semibold"
|
||||||
|
whiteSpace="nowrap"
|
||||||
|
pointerEvents="none"
|
||||||
|
>
|
||||||
|
{activeSpeaker}
|
||||||
|
</Text>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export default function VideoPlayer({
|
export default function VideoPlayer({
|
||||||
transcriptId,
|
transcriptId,
|
||||||
duration,
|
duration,
|
||||||
expanded,
|
expanded,
|
||||||
onClose,
|
onClose,
|
||||||
|
sourceLanguage,
|
||||||
|
participants,
|
||||||
}: VideoPlayerProps) {
|
}: VideoPlayerProps) {
|
||||||
const [videoUrl, setVideoUrl] = useState<string | null>(null);
|
const [videoUrl, setVideoUrl] = useState<string | null>(null);
|
||||||
|
const [rawVtt, setRawVtt] = useState<string | null>(null);
|
||||||
|
const [captionsUrl, setCaptionsUrl] = useState<string | null>(null);
|
||||||
|
const [captionOffset, setCaptionOffset] = useState(0);
|
||||||
|
const [currentTime, setCurrentTime] = useState(0);
|
||||||
|
const [videoDuration, setVideoDuration] = useState(0);
|
||||||
const [loading, setLoading] = useState(false);
|
const [loading, setLoading] = useState(false);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const prevBlobUrl = useRef<string | null>(null);
|
||||||
|
const videoRef = useRef<HTMLVideoElement>(null);
|
||||||
const auth = useAuth();
|
const auth = useAuth();
|
||||||
const accessToken = auth.status === "authenticated" ? auth.accessToken : null;
|
const accessToken = auth.status === "authenticated" ? auth.accessToken : null;
|
||||||
|
|
||||||
@@ -63,6 +256,99 @@ export default function VideoPlayer({
|
|||||||
fetchVideoUrl();
|
fetchVideoUrl();
|
||||||
}, [expanded, transcriptId, accessToken, videoUrl]);
|
}, [expanded, transcriptId, accessToken, videoUrl]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!videoUrl || !transcriptId) return;
|
||||||
|
|
||||||
|
let cancelled = false;
|
||||||
|
|
||||||
|
const fetchCaptions = async () => {
|
||||||
|
try {
|
||||||
|
const url = `${API_URL}/v1/transcripts/${transcriptId}?transcript_format=webvtt-named`;
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
if (accessToken) {
|
||||||
|
headers["Authorization"] = `Bearer ${accessToken}`;
|
||||||
|
}
|
||||||
|
const resp = await fetch(url, { headers });
|
||||||
|
if (!resp.ok) return;
|
||||||
|
const data = await resp.json();
|
||||||
|
const vttContent = data?.transcript;
|
||||||
|
if (!vttContent || cancelled) return;
|
||||||
|
setRawVtt(vttContent);
|
||||||
|
} catch {
|
||||||
|
// Captions are non-critical — fail silently
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchCaptions();
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
cancelled = true;
|
||||||
|
};
|
||||||
|
}, [videoUrl, transcriptId, accessToken]);
|
||||||
|
|
||||||
|
// Rebuild blob URL whenever rawVtt or captionOffset changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (!rawVtt) return;
|
||||||
|
|
||||||
|
const shifted = shiftVttTimestamps(rawVtt, captionOffset);
|
||||||
|
const blob = new Blob([shifted], { type: "text/vtt" });
|
||||||
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
|
|
||||||
|
if (prevBlobUrl.current) {
|
||||||
|
URL.revokeObjectURL(prevBlobUrl.current);
|
||||||
|
}
|
||||||
|
prevBlobUrl.current = blobUrl;
|
||||||
|
setCaptionsUrl(blobUrl);
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
URL.revokeObjectURL(blobUrl);
|
||||||
|
prevBlobUrl.current = null;
|
||||||
|
};
|
||||||
|
}, [rawVtt, captionOffset]);
|
||||||
|
|
||||||
|
const adjustOffset = useCallback((delta: number) => {
|
||||||
|
setCaptionOffset((prev) => Math.round((prev + delta) * 10) / 10);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const formattedOffset = useMemo(() => {
|
||||||
|
const sign = captionOffset >= 0 ? "+" : "";
|
||||||
|
return `${sign}${captionOffset.toFixed(1)}s`;
|
||||||
|
}, [captionOffset]);
|
||||||
|
|
||||||
|
const segments = useMemo(
|
||||||
|
() => (rawVtt ? parseVttSegments(rawVtt) : []),
|
||||||
|
[rawVtt],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Track video currentTime and duration
|
||||||
|
useEffect(() => {
|
||||||
|
const video = videoRef.current;
|
||||||
|
if (!video) return;
|
||||||
|
|
||||||
|
const onTimeUpdate = () => setCurrentTime(video.currentTime);
|
||||||
|
const onDurationChange = () => {
|
||||||
|
if (video.duration && isFinite(video.duration)) {
|
||||||
|
setVideoDuration(video.duration);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
video.addEventListener("timeupdate", onTimeUpdate);
|
||||||
|
video.addEventListener("loadedmetadata", onDurationChange);
|
||||||
|
video.addEventListener("durationchange", onDurationChange);
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
video.removeEventListener("timeupdate", onTimeUpdate);
|
||||||
|
video.removeEventListener("loadedmetadata", onDurationChange);
|
||||||
|
video.removeEventListener("durationchange", onDurationChange);
|
||||||
|
};
|
||||||
|
}, [videoUrl]);
|
||||||
|
|
||||||
|
const handleSeek = useCallback((time: number) => {
|
||||||
|
if (videoRef.current) {
|
||||||
|
videoRef.current.currentTime = time;
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
if (!expanded) return null;
|
if (!expanded) return null;
|
||||||
|
|
||||||
if (loading) {
|
if (loading) {
|
||||||
@@ -117,22 +403,64 @@ export default function VideoPlayer({
|
|||||||
</Text>
|
</Text>
|
||||||
)}
|
)}
|
||||||
</Flex>
|
</Flex>
|
||||||
<Flex
|
<Flex align="center" gap={3}>
|
||||||
align="center"
|
{rawVtt && (
|
||||||
justify="center"
|
<Flex align="center" gap={1}>
|
||||||
borderRadius="full"
|
<Text fontSize="2xs" color="gray.400">
|
||||||
p={1}
|
CC sync
|
||||||
cursor="pointer"
|
</Text>
|
||||||
onClick={onClose}
|
<Flex
|
||||||
_hover={{ bg: "whiteAlpha.300" }}
|
align="center"
|
||||||
transition="background 0.15s"
|
justify="center"
|
||||||
>
|
borderRadius="sm"
|
||||||
<LuX size={14} color="white" />
|
p={0.5}
|
||||||
|
cursor="pointer"
|
||||||
|
onClick={() => adjustOffset(-0.5)}
|
||||||
|
_hover={{ bg: "whiteAlpha.300" }}
|
||||||
|
transition="background 0.15s"
|
||||||
|
>
|
||||||
|
<LuMinus size={12} color="white" />
|
||||||
|
</Flex>
|
||||||
|
<Text
|
||||||
|
fontSize="2xs"
|
||||||
|
color="gray.300"
|
||||||
|
fontFamily="mono"
|
||||||
|
minW="3.5em"
|
||||||
|
textAlign="center"
|
||||||
|
>
|
||||||
|
{formattedOffset}
|
||||||
|
</Text>
|
||||||
|
<Flex
|
||||||
|
align="center"
|
||||||
|
justify="center"
|
||||||
|
borderRadius="sm"
|
||||||
|
p={0.5}
|
||||||
|
cursor="pointer"
|
||||||
|
onClick={() => adjustOffset(0.5)}
|
||||||
|
_hover={{ bg: "whiteAlpha.300" }}
|
||||||
|
transition="background 0.15s"
|
||||||
|
>
|
||||||
|
<LuPlus size={12} color="white" />
|
||||||
|
</Flex>
|
||||||
|
</Flex>
|
||||||
|
)}
|
||||||
|
<Flex
|
||||||
|
align="center"
|
||||||
|
justify="center"
|
||||||
|
borderRadius="full"
|
||||||
|
p={1}
|
||||||
|
cursor="pointer"
|
||||||
|
onClick={onClose}
|
||||||
|
_hover={{ bg: "whiteAlpha.300" }}
|
||||||
|
transition="background 0.15s"
|
||||||
|
>
|
||||||
|
<LuX size={14} color="white" />
|
||||||
|
</Flex>
|
||||||
</Flex>
|
</Flex>
|
||||||
</Flex>
|
</Flex>
|
||||||
{/* Video element with visible controls */}
|
{/* Video element with visible controls */}
|
||||||
{/* eslint-disable-next-line jsx-a11y/media-has-caption */}
|
|
||||||
<video
|
<video
|
||||||
|
ref={videoRef}
|
||||||
src={videoUrl}
|
src={videoUrl}
|
||||||
controls
|
controls
|
||||||
autoPlay
|
autoPlay
|
||||||
@@ -147,10 +475,34 @@ export default function VideoPlayer({
|
|||||||
minHeight: "180px",
|
minHeight: "180px",
|
||||||
objectFit: "contain",
|
objectFit: "contain",
|
||||||
background: "black",
|
background: "black",
|
||||||
borderBottomLeftRadius: "0.375rem",
|
...(segments.length === 0
|
||||||
borderBottomRightRadius: "0.375rem",
|
? {
|
||||||
|
borderBottomLeftRadius: "0.375rem",
|
||||||
|
borderBottomRightRadius: "0.375rem",
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
}}
|
}}
|
||||||
/>
|
>
|
||||||
|
{captionsUrl && (
|
||||||
|
<track
|
||||||
|
kind="captions"
|
||||||
|
src={captionsUrl}
|
||||||
|
srcLang={sourceLanguage || "en"}
|
||||||
|
label="Auto-generated captions"
|
||||||
|
default
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</video>
|
||||||
|
{segments.length > 0 && videoDuration > 0 && (
|
||||||
|
<SpeakerProgressBar
|
||||||
|
segments={segments}
|
||||||
|
videoDuration={videoDuration}
|
||||||
|
currentTime={currentTime}
|
||||||
|
captionOffset={captionOffset}
|
||||||
|
onSeek={handleSeek}
|
||||||
|
participants={participants}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user