Add transcript source kind

This commit is contained in:
2024-10-04 16:38:29 +02:00
parent ebb32ee613
commit 39d02ab265
9 changed files with 159 additions and 13 deletions

View File

@@ -0,0 +1,48 @@
"""Add transcript source kind
Revision ID: 74b2b0236931
Revises: 0925da921477
Create Date: 2024-10-04 14:19:23.625447
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "74b2b0236931"
down_revision: Union[str, None] = "0925da921477"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"transcript",
sa.Column(
"source_kind",
sa.Enum("ROOM", "LIVE", "FILE", name="sourcekind"),
nullable=True,
),
)
op.execute(
"UPDATE transcript SET source_kind = 'room' WHERE meeting_id IS NOT NULL"
)
op.execute("UPDATE transcript SET source_kind = 'live' WHERE meeting_id IS NULL")
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.alter_column("source_kind", nullable=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("transcript", "source_kind")
# ### end Alembic commands ###

View File

@@ -1,3 +1,4 @@
import enum
import json import json
import os import os
import shutil import shutil
@@ -14,8 +15,16 @@ from reflector.db import database, metadata
from reflector.processors.types import Word as ProcessorWord from reflector.processors.types import Word as ProcessorWord
from reflector.settings import settings from reflector.settings import settings
from reflector.storage import Storage from reflector.storage import Storage
from sqlalchemy import Enum
from sqlalchemy.sql import false from sqlalchemy.sql import false
class SourceKind(enum.StrEnum):
ROOM = enum.auto()
LIVE = enum.auto()
FILE = enum.auto()
transcripts = sqlalchemy.Table( transcripts = sqlalchemy.Table(
"transcript", "transcript",
metadata, metadata,
@@ -55,6 +64,11 @@ transcripts = sqlalchemy.Table(
sqlalchemy.String, sqlalchemy.String,
), ),
sqlalchemy.Column("zulip_message_id", sqlalchemy.Integer, nullable=True), sqlalchemy.Column("zulip_message_id", sqlalchemy.Integer, nullable=True),
sqlalchemy.Column(
"source_kind",
Enum(SourceKind, values_callable=lambda obj: [e.value for e in obj]),
nullable=False,
),
) )
@@ -152,6 +166,7 @@ class Transcript(BaseModel):
reviewed: bool = False reviewed: bool = False
meeting_id: str | None = None meeting_id: str | None = None
zulip_message_id: int | None = None zulip_message_id: int | None = None
source_kind: SourceKind
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent: def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
ev = TranscriptEvent(event=event, data=data.model_dump()) ev = TranscriptEvent(event=event, data=data.model_dump())
@@ -291,6 +306,7 @@ class TranscriptController:
order_by: str | None = None, order_by: str | None = None,
filter_empty: bool | None = False, filter_empty: bool | None = False,
filter_recording: bool | None = False, filter_recording: bool | None = False,
source_kind: SourceKind | None = None,
room_id: str | None = None, room_id: str | None = None,
search_term: str | None = None, search_term: str | None = None,
return_query: bool = False, return_query: bool = False,
@@ -320,6 +336,9 @@ class TranscriptController:
if user_id: if user_id:
query = query.where(transcripts.c.user_id == user_id) query = query.where(transcripts.c.user_id == user_id)
if source_kind:
query = query.where(transcripts.c.source_kind == source_kind)
if room_id: if room_id:
query = query.where(rooms.c.id == room_id) query = query.where(rooms.c.id == room_id)
@@ -422,6 +441,7 @@ class TranscriptController:
async def add( async def add(
self, self,
name: str, name: str,
source_kind: SourceKind,
source_language: str = "en", source_language: str = "en",
target_language: str = "en", target_language: str = "en",
user_id: str | None = None, user_id: str | None = None,
@@ -433,6 +453,7 @@ class TranscriptController:
""" """
transcript = Transcript( transcript = Transcript(
name=name, name=name,
source_kind=source_kind,
source_language=source_language, source_language=source_language,
target_language=target_language, target_language=target_language,
user_id=user_id, user_id=user_id,

View File

@@ -9,6 +9,7 @@ from jose import jwt
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from reflector.db.migrate_user import migrate_user from reflector.db.migrate_user import migrate_user
from reflector.db.transcripts import ( from reflector.db.transcripts import (
SourceKind,
TranscriptParticipant, TranscriptParticipant,
TranscriptTopic, TranscriptTopic,
transcripts_controller, transcripts_controller,
@@ -61,6 +62,7 @@ class GetTranscript(BaseModel):
meeting_id: str | None meeting_id: str | None
room_id: str | None room_id: str | None
room_name: str | None room_name: str | None
source_kind: SourceKind
class CreateTranscript(BaseModel): class CreateTranscript(BaseModel):
@@ -89,6 +91,7 @@ async def transcripts_list(
room_id: str | None, room_id: str | None,
search_term: str | None, search_term: str | None,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)], user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
source_kind: SourceKind | None = None,
): ):
from reflector.db import database from reflector.db import database
@@ -105,6 +108,7 @@ async def transcripts_list(
database, database,
await transcripts_controller.get_all( await transcripts_controller.get_all(
user_id=user_id, user_id=user_id,
source_kind=SourceKind(source_kind) if source_kind else None,
room_id=room_id, room_id=room_id,
search_term=search_term, search_term=search_term,
order_by="-created_at", order_by="-created_at",
@@ -121,6 +125,7 @@ async def transcripts_create(
user_id = user["sub"] if user else None user_id = user["sub"] if user else None
return await transcripts_controller.add( return await transcripts_controller.add(
info.name, info.name,
source_kind=SourceKind.LIVE,
source_language=info.source_language, source_language=info.source_language,
target_language=info.target_language, target_language=info.target_language,
user_id=user_id, user_id=user_id,

View File

@@ -8,7 +8,7 @@ import structlog
from celery import shared_task from celery import shared_task
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from reflector.db.meetings import meetings_controller from reflector.db.meetings import meetings_controller
from reflector.db.transcripts import transcripts_controller from reflector.db.transcripts import SourceKind, transcripts_controller
from reflector.pipelines.main_live_pipeline import asynctask, task_pipeline_process from reflector.pipelines.main_live_pipeline import asynctask, task_pipeline_process
from reflector.settings import settings from reflector.settings import settings
@@ -66,6 +66,7 @@ async def process_recording(bucket_name: str, object_key: str):
meeting = await meetings_controller.get_by_room_name(room_name) meeting = await meetings_controller.get_by_room_name(room_name)
transcript = await transcripts_controller.add( transcript = await transcripts_controller.add(
"", "",
source_kind=SourceKind.ROOM
source_language="en", source_language="en",
target_language="en", target_language="en",
user_id=meeting.user_id, user_id=meeting.user_id,

View File

@@ -49,8 +49,11 @@ import Pagination from "./pagination";
import { formatTimeMs } from "../../lib/time"; import { formatTimeMs } from "../../lib/time";
import useApi from "../../lib/useApi"; import useApi from "../../lib/useApi";
import { useError } from "../../(errors)/errorContext"; import { useError } from "../../(errors)/errorContext";
import { SourceKind } from "../../api";
export default function TranscriptBrowser() { export default function TranscriptBrowser() {
const [selectedSourceKind, setSelectedSourceKind] =
useState<SourceKind | null>(null);
const [selectedRoomId, setSelectedRoomId] = useState(""); const [selectedRoomId, setSelectedRoomId] = useState("");
const [rooms, setRooms] = useState<Room[]>([]); const [rooms, setRooms] = useState<Room[]>([]);
const [page, setPage] = useState(1); const [page, setPage] = useState(1);
@@ -58,6 +61,7 @@ export default function TranscriptBrowser() {
const [searchInputValue, setSearchInputValue] = useState(""); const [searchInputValue, setSearchInputValue] = useState("");
const { loading, response, refetch } = useTranscriptList( const { loading, response, refetch } = useTranscriptList(
page, page,
selectedSourceKind,
selectedRoomId, selectedRoomId,
searchTerm, searchTerm,
); );
@@ -86,7 +90,11 @@ export default function TranscriptBrowser() {
.catch((err) => setError(err, "There was an error fetching the rooms")); .catch((err) => setError(err, "There was an error fetching the rooms"));
}, [api]); }, [api]);
const handleFilterTranscripts = (roomId: string) => { const handleFilterTranscripts = (
sourceKind: SourceKind | null,
roomId: string,
) => {
setSelectedSourceKind(sourceKind);
setSelectedRoomId(roomId); setSelectedRoomId(roomId);
setPage(1); setPage(1);
}; };
@@ -187,10 +195,10 @@ export default function TranscriptBrowser() {
<Link <Link
as={NextLink} as={NextLink}
href="#" href="#"
onClick={() => handleFilterTranscripts("")} onClick={() => handleFilterTranscripts(null, "")}
color={selectedRoomId === "" ? "blue.500" : "gray.600"} color={selectedSourceKind === null ? "blue.500" : "gray.600"}
_hover={{ color: "blue.300" }} _hover={{ color: "blue.300" }}
fontWeight={selectedRoomId === "" ? "bold" : "normal"} fontWeight={selectedSourceKind === null ? "bold" : "normal"}
> >
All Transcripts All Transcripts
</Link> </Link>
@@ -208,10 +216,20 @@ export default function TranscriptBrowser() {
key={room.id} key={room.id}
as={NextLink} as={NextLink}
href="#" href="#"
onClick={() => handleFilterTranscripts(room.id)} onClick={() => handleFilterTranscripts("room", room.id)}
color={selectedRoomId === room.id ? "blue.500" : "gray.600"} color={
selectedSourceKind === "room" &&
selectedRoomId === room.id
? "blue.500"
: "gray.600"
}
_hover={{ color: "blue.300" }} _hover={{ color: "blue.300" }}
fontWeight={selectedRoomId === room.id ? "bold" : "normal"} fontWeight={
selectedSourceKind === "room" &&
selectedRoomId === room.id
? "bold"
: "normal"
}
ml={4} ml={4}
> >
{room.name} {room.name}
@@ -219,6 +237,28 @@ export default function TranscriptBrowser() {
))} ))}
</> </>
)} )}
<Divider />
<Link
as={NextLink}
href="#"
onClick={() => handleFilterTranscripts("live", "")}
color={selectedSourceKind === "live" ? "blue.500" : "gray.600"}
_hover={{ color: "blue.300" }}
fontWeight={selectedSourceKind === "live" ? "bold" : "normal"}
>
Live Transcripts
</Link>
<Link
as={NextLink}
href="#"
onClick={() => handleFilterTranscripts("file", "")}
color={selectedSourceKind === "file" ? "blue.500" : "gray.600"}
_hover={{ color: "blue.300" }}
fontWeight={selectedSourceKind === "file" ? "bold" : "normal"}
>
Uploaded Files
</Link>
</Stack> </Stack>
</Box> </Box>
@@ -241,7 +281,7 @@ export default function TranscriptBrowser() {
<Th pl={12} width="400px"> <Th pl={12} width="400px">
Transcription Title Transcription Title
</Th> </Th>
<Th width="150px">Room</Th> <Th width="150px">Source</Th>
<Th width="200px">Date</Th> <Th width="200px">Date</Th>
<Th width="100px">Duration</Th> <Th width="100px">Duration</Th>
<Th width="50px"></Th> <Th width="50px"></Th>
@@ -296,7 +336,11 @@ export default function TranscriptBrowser() {
</Link> </Link>
</Flex> </Flex>
</Td> </Td>
<Td>{item.room_name}</Td> <Td>
{item.source_kind === "room"
? item.room_name
: item.source_kind}
</Td>
<Td> <Td>
{new Date(item.created_at).toLocaleString("en-US", { {new Date(item.created_at).toLocaleString("en-US", {
year: "numeric", year: "numeric",
@@ -376,7 +420,12 @@ export default function TranscriptBrowser() {
<Text fontWeight="bold"> <Text fontWeight="bold">
{item.title || "Unnamed Transcript"} {item.title || "Unnamed Transcript"}
</Text> </Text>
<Text>Room: {item.room_name}</Text> <Text>
Source:{" "}
{item.source_kind === "room"
? item.room_name
: item.source_kind}
</Text>
<Text> <Text>
Date: {new Date(item.created_at).toLocaleString()} Date: {new Date(item.created_at).toLocaleString()}
</Text> </Text>

View File

@@ -1,7 +1,7 @@
import { useEffect, useState } from "react"; import { useEffect, useState } from "react";
import { useError } from "../../(errors)/errorContext"; import { useError } from "../../(errors)/errorContext";
import useApi from "../../lib/useApi"; import useApi from "../../lib/useApi";
import { Page_GetTranscript_ } from "../../api"; import { Page_GetTranscript_, SourceKind } from "../../api";
type TranscriptList = { type TranscriptList = {
response: Page_GetTranscript_ | null; response: Page_GetTranscript_ | null;
@@ -12,6 +12,7 @@ type TranscriptList = {
const useTranscriptList = ( const useTranscriptList = (
page: number, page: number,
sourceKind: SourceKind | null,
roomId: string | null, roomId: string | null,
searchTerm: string | null, searchTerm: string | null,
): TranscriptList => { ): TranscriptList => {
@@ -31,7 +32,12 @@ const useTranscriptList = (
if (!api) return; if (!api) return;
setLoading(true); setLoading(true);
api api
.v1TranscriptsList({ page, roomId, searchTerm }) .v1TranscriptsList({
page,
sourceKind,
roomId,
searchTerm,
})
.then((response) => { .then((response) => {
setResponse(response); setResponse(response);
setLoading(false); setLoading(false);

View File

@@ -285,6 +285,9 @@ export const $GetTranscript = {
], ],
title: "Room Name", title: "Room Name",
}, },
source_kind: {
$ref: "#/components/schemas/SourceKind",
},
}, },
type: "object", type: "object",
required: [ required: [
@@ -305,6 +308,7 @@ export const $GetTranscript = {
"meeting_id", "meeting_id",
"room_id", "room_id",
"room_name", "room_name",
"source_kind",
], ],
title: "GetTranscript", title: "GetTranscript",
} as const; } as const;
@@ -766,6 +770,12 @@ export const $RtcOffer = {
title: "RtcOffer", title: "RtcOffer",
} as const; } as const;
export const $SourceKind = {
type: "string",
enum: ["room", "live", "file"],
title: "SourceKind",
} as const;
export const $SpeakerAssignment = { export const $SpeakerAssignment = {
properties: { properties: {
speaker: { speaker: {

View File

@@ -201,6 +201,7 @@ export class DefaultService {
* @param data The data for the request. * @param data The data for the request.
* @param data.roomId * @param data.roomId
* @param data.searchTerm * @param data.searchTerm
* @param data.sourceKind
* @param data.page Page number * @param data.page Page number
* @param data.size Page size * @param data.size Page size
* @returns Page_GetTranscript_ Successful Response * @returns Page_GetTranscript_ Successful Response
@@ -215,6 +216,7 @@ export class DefaultService {
query: { query: {
room_id: data.roomId, room_id: data.roomId,
search_term: data.searchTerm, search_term: data.searchTerm,
source_kind: data.sourceKind,
page: data.page, page: data.page,
size: data.size, size: data.size,
}, },

View File

@@ -54,6 +54,7 @@ export type GetTranscript = {
meeting_id: string | null; meeting_id: string | null;
room_id: string | null; room_id: string | null;
room_name: string | null; room_name: string | null;
source_kind: SourceKind;
}; };
export type GetTranscriptSegmentTopic = { export type GetTranscriptSegmentTopic = {
@@ -149,6 +150,8 @@ export type RtcOffer = {
type: string; type: string;
}; };
export type SourceKind = "room" | "live" | "file";
export type SpeakerAssignment = { export type SpeakerAssignment = {
speaker?: number | null; speaker?: number | null;
participant?: string | null; participant?: string | null;
@@ -282,6 +285,7 @@ export type V1TranscriptsListData = {
* Page size * Page size
*/ */
size?: number; size?: number;
sourceKind?: SourceKind | null;
}; };
export type V1TranscriptsListResponse = Page_GetTranscript_; export type V1TranscriptsListResponse = Page_GetTranscript_;