refactor: improve transcript list performance (#480)

* refactor: improve transcript list performance

* fix: sync openapi

* fix: frontend types

* fix: remove drop table _alembic_tmp_meeting

* fix: remove create table too

* fix: remove uq_recording_object_key
This commit is contained in:
2025-07-15 15:10:05 -06:00
committed by GitHub
parent 3d370336cc
commit 9deb717e5b
21 changed files with 470 additions and 126 deletions

View File

@@ -40,6 +40,7 @@ meetings = sa.Table(
nullable=False,
server_default=sa.true(),
),
sa.Index("idx_meeting_room_id", "room_id"),
)
meeting_consent = sa.Table(

View File

@@ -20,6 +20,7 @@ recordings = sa.Table(
server_default="pending",
),
sa.Column("meeting_id", sa.String),
sa.Index("idx_recording_meeting_id", "meeting_id"),
)

View File

@@ -39,6 +39,7 @@ rooms = sqlalchemy.Table(
sqlalchemy.Column(
"is_shared", sqlalchemy.Boolean, nullable=False, server_default=false()
),
sqlalchemy.Index("idx_room_is_shared", "is_shared"),
)

View File

@@ -6,7 +6,6 @@ from contextlib import asynccontextmanager
from datetime import datetime
from pathlib import Path
from typing import Any, Literal
from reflector.utils import generate_uuid4
import sqlalchemy
from fastapi import HTTPException
@@ -15,6 +14,7 @@ from reflector.db import database, metadata
from reflector.processors.types import Word as ProcessorWord
from reflector.settings import settings
from reflector.storage import get_transcripts_storage
from reflector.utils import generate_uuid4
from sqlalchemy import Enum
from sqlalchemy.sql import false, or_
@@ -74,6 +74,9 @@ transcripts = sqlalchemy.Table(
# the main "audio deleted" is the presence of the audio itself / consents not-given
# same field could've been in recording/meeting, and it's maybe even ok to dupe it at need
sqlalchemy.Column("audio_deleted", sqlalchemy.Boolean, nullable=True),
sqlalchemy.Index("idx_transcript_recording_id", "recording_id"),
sqlalchemy.Index("idx_transcript_user_id", "user_id"),
sqlalchemy.Index("idx_transcript_created_at", "created_at"),
)
@@ -306,6 +309,7 @@ class TranscriptController:
room_id: str | None = None,
search_term: str | None = None,
return_query: bool = False,
exclude_columns: list[str] = ["topics", "events", "participants"],
) -> list[Transcript]:
"""
Get all transcripts
@@ -348,9 +352,14 @@ class TranscriptController:
if search_term:
query = query.where(transcripts.c.title.ilike(f"%{search_term}%"))
# Exclude heavy JSON columns from list queries
transcript_columns = [
col for col in transcripts.c if col.name not in exclude_columns
]
query = query.with_only_columns(
[
transcripts,
transcript_columns
+ [
rooms.c.id.label("room_id"),
rooms.c.name.label("room_name"),
]

View File

@@ -45,7 +45,7 @@ def create_access_token(data: dict, expires_delta: timedelta):
# ==============================================================
class GetTranscript(BaseModel):
class GetTranscriptMinimal(BaseModel):
id: str
user_id: str | None
name: str
@@ -59,7 +59,6 @@ class GetTranscript(BaseModel):
share_mode: str = Field("private")
source_language: str | None
target_language: str | None
participants: list[TranscriptParticipant] | None
reviewed: bool
meeting_id: str | None
source_kind: SourceKind
@@ -68,6 +67,10 @@ class GetTranscript(BaseModel):
audio_deleted: bool | None = None
class GetTranscript(GetTranscriptMinimal):
participants: list[TranscriptParticipant] | None
class CreateTranscript(BaseModel):
name: str
source_language: str = Field("en")
@@ -90,7 +93,7 @@ class DeletionStatus(BaseModel):
status: str
@router.get("/transcripts", response_model=Page[GetTranscript])
@router.get("/transcripts", response_model=Page[GetTranscriptMinimal])
async def transcripts_list(
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
source_kind: SourceKind | None = None,