slop review WIP

This commit is contained in:
Igor Loskutov
2025-06-17 19:26:11 -04:00
parent 5b1f11047c
commit 0c91f5dd59
13 changed files with 372 additions and 170 deletions

View File

@@ -12,7 +12,7 @@ from alembic import op
# revision identifiers, used by Alembic.
revision: str = "20250617140003"
down_revision: Union[str, None] = "f819277e5169"
down_revision: Union[str, None] = "d3ff3a39297f"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
@@ -23,10 +23,9 @@ def upgrade() -> None:
'meeting_consent',
sa.Column('id', sa.String(), nullable=False),
sa.Column('meeting_id', sa.String(), nullable=False),
sa.Column('user_identifier', sa.String(), nullable=False),
sa.Column('user_id', sa.String(), nullable=True),
sa.Column('consent_given', sa.Boolean(), nullable=False),
sa.Column('consent_timestamp', sa.DateTime(), nullable=False),
sa.Column('user_agent', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.ForeignKeyConstraint(['meeting_id'], ['meeting.id']),
)

View File

@@ -1,32 +0,0 @@
"""make user_identifier optional in meeting_consent
Revision ID: 38e116c82385
Revises: 20250617140003
Create Date: 2025-06-17 15:23:41.346980
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '38e116c82385'
down_revision: Union[str, None] = '20250617140003'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Make user_identifier column nullable
op.alter_column('meeting_consent', 'user_identifier',
existing_type=sa.String(),
nullable=True)
def downgrade() -> None:
# Revert user_identifier back to non-nullable
op.alter_column('meeting_consent', 'user_identifier',
existing_type=sa.String(),
nullable=False)

View File

@@ -1,32 +0,0 @@
"""add source and target language
Revision ID: b3df9681cae9
Revises: 543ed284d69a
Create Date: 2023-08-29 10:55:37.690469
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'b3df9681cae9'
down_revision: Union[str, None] = '543ed284d69a'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('transcript', sa.Column('source_language', sa.String(), nullable=True))
op.add_column('transcript', sa.Column('target_language', sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('transcript', 'target_language')
op.drop_column('transcript', 'source_language')
# ### end Alembic commands ###

View File

@@ -47,20 +47,18 @@ meeting_consent = sa.Table(
metadata,
sa.Column("id", sa.String, primary_key=True),
sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id")),
sa.Column("user_identifier", sa.String, nullable=True),
sa.Column("user_id", sa.String, nullable=True),
sa.Column("consent_given", sa.Boolean),
sa.Column("consent_timestamp", sa.DateTime),
sa.Column("user_agent", sa.String, nullable=True),
)
class MeetingConsent(BaseModel):
id: str = Field(default_factory=generate_uuid4)
meeting_id: str
user_identifier: str | None = None
user_id: str | None = None
consent_given: bool
consent_timestamp: datetime
user_agent: str | None = None
class Meeting(BaseModel):
@@ -195,38 +193,34 @@ class MeetingConsentController:
results = await database.fetch_all(query)
return [MeetingConsent(**result) for result in results]
async def get_by_meeting_and_user(self, meeting_id: str, user_identifier: str) -> MeetingConsent | None:
async def get_by_meeting_and_user(self, meeting_id: str, user_id: str) -> MeetingConsent | None:
"""Get existing consent for a specific user and meeting"""
query = meeting_consent.select().where(
meeting_consent.c.meeting_id == meeting_id,
meeting_consent.c.user_identifier == user_identifier
meeting_consent.c.user_id == user_id
)
result = await database.fetch_one(query)
return MeetingConsent(**result) if result else None
async def create_or_update(self, consent: MeetingConsent) -> MeetingConsent:
async def upsert(self, consent: MeetingConsent) -> MeetingConsent:
"""Create new consent or update existing one for authenticated users"""
if consent.user_identifier:
if consent.user_id:
# For authenticated users, check if consent already exists
existing = await self.get_by_meeting_and_user(consent.meeting_id, consent.user_identifier)
# not transactional but we're ok with that; the consents ain't deleted anyways
existing = await self.get_by_meeting_and_user(consent.meeting_id, consent.user_id)
if existing:
# Update existing consent
query = meeting_consent.update().where(
meeting_consent.c.id == existing.id
).values(
consent_given=consent.consent_given,
consent_timestamp=consent.consent_timestamp,
user_agent=consent.user_agent
)
await database.execute(query)
# Return updated consent object
existing.consent_given = consent.consent_given
existing.consent_timestamp = consent.consent_timestamp
existing.user_agent = consent.user_agent
return existing
# For anonymous users or first-time authenticated users, create new record
query = meeting_consent.insert().values(**consent.model_dump())
await database.execute(query)
return consent

View File

@@ -5,6 +5,7 @@ from uuid import uuid4
import sqlalchemy as sa
from pydantic import BaseModel, Field
from reflector.db import database, metadata
from reflector.utils import generate_uuid4
recordings = sa.Table(
"recording",
@@ -22,11 +23,6 @@ recordings = sa.Table(
sa.Column("meeting_id", sa.String),
)
def generate_uuid4() -> str:
return str(uuid4())
class Recording(BaseModel):
id: str = Field(default_factory=generate_uuid4)
bucket_name: str

View File

@@ -6,7 +6,7 @@ import sqlalchemy
from fastapi import HTTPException
from pydantic import BaseModel, Field
from reflector.db import database, metadata
from reflector.db.transcripts import generate_uuid4
from reflector.utils import generate_uuid4
from sqlalchemy.sql import false, or_
rooms = sqlalchemy.Table(

View File

@@ -6,7 +6,7 @@ from contextlib import asynccontextmanager
from datetime import datetime
from pathlib import Path
from typing import Any, Literal
from uuid import uuid4
from reflector.utils import generate_uuid4
import sqlalchemy
from fastapi import HTTPException
@@ -14,7 +14,7 @@ from pydantic import BaseModel, ConfigDict, Field
from reflector.db import database, metadata
from reflector.processors.types import Word as ProcessorWord
from reflector.settings import settings
from reflector.storage import Storage
from reflector.storage import Storage, get_transcripts_storage
from sqlalchemy import Enum
from sqlalchemy.sql import false, or_
@@ -72,23 +72,11 @@ transcripts = sqlalchemy.Table(
),
)
def generate_uuid4() -> str:
return str(uuid4())
def generate_transcript_name() -> str:
now = datetime.utcnow()
return f"Transcript {now.strftime('%Y-%m-%d %H:%M:%S')}"
def get_storage() -> Storage:
return Storage.get_instance(
name=settings.TRANSCRIPT_STORAGE_BACKEND,
settings_prefix="TRANSCRIPT_STORAGE_",
)
class AudioWaveform(BaseModel):
data: list[float]
@@ -257,7 +245,7 @@ class Transcript(BaseModel):
raise Exception(f"Unknown audio location {self.audio_location}")
async def _generate_storage_audio_link(self) -> str:
return await get_storage().get_file_url(self.storage_audio_path)
return await get_transcripts_storage().get_file_url(self.storage_audio_path)
def _generate_local_audio_link(self) -> str:
# we need to create an url to be used for diarization
@@ -558,7 +546,7 @@ class TranscriptController:
if transcript.audio_location == "local":
# store the audio on external storage if it's not already there
await get_storage().put_file(
await get_transcripts_storage().put_file(
transcript.storage_audio_path,
transcript.audio_mp3_filename.read_bytes(),
)
@@ -574,7 +562,7 @@ class TranscriptController:
Download audio from storage
"""
transcript.audio_mp3_filename.write_bytes(
await get_storage().get_file(
await get_transcripts_storage().get_file(
transcript.storage_audio_path,
)
)

View File

@@ -1,7 +1,8 @@
from .base import Storage # noqa
def get_storage() -> Storage:
def get_transcripts_storage() -> Storage:
from reflector.settings import settings
return Storage.get_instance(
name=settings.TRANSCRIPT_STORAGE_BACKEND,
settings_prefix="TRANSCRIPT_STORAGE_",
)

View File

@@ -1,6 +1,8 @@
from datetime import datetime
from typing import Annotated, Optional
from fastapi import APIRouter, HTTPException, Request
import reflector.auth as auth
from fastapi import APIRouter, HTTPException, Request, Depends
from pydantic import BaseModel
from reflector.db.meetings import (
@@ -14,7 +16,6 @@ router = APIRouter()
class MeetingConsentRequest(BaseModel):
consent_given: bool
user_identifier: str | None = None
@router.post("/meetings/{meeting_id}/consent")
@@ -22,21 +23,21 @@ async def meeting_audio_consent(
meeting_id: str,
request: MeetingConsentRequest,
user_request: Request,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
meeting = await meetings_controller.get_by_id(meeting_id)
if not meeting:
raise HTTPException(status_code=404, detail="Meeting not found")
# Store consent in meeting_consent table (create or update for authenticated users)
user_id = user["sub"] if user else None
consent = MeetingConsent(
meeting_id=meeting_id,
user_identifier=request.user_identifier,
user_id=user_id,
consent_given=request.consent_given,
consent_timestamp=datetime.utcnow(),
user_agent=user_request.headers.get("user-agent")
)
# Use create_or_update to handle consent overrides for authenticated users
updated_consent = await meeting_consent_controller.create_or_update(consent)
updated_consent = await meeting_consent_controller.upsert(consent)
return {"status": "success", "consent_id": updated_consent.id}

View File

@@ -13,7 +13,7 @@ from reflector.db.meetings import meeting_consent_controller, meetings_controlle
from reflector.db.recordings import Recording, recordings_controller
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import SourceKind, transcripts_controller
from reflector.storage import get_storage
from reflector.storage import get_transcripts_storage
from reflector.pipelines.main_live_pipeline import asynctask, task_pipeline_process
from reflector.settings import settings
from reflector.whereby import get_room_sessions
@@ -136,10 +136,10 @@ async def process_recording(bucket_name: str, object_key: str):
should_delete = await meeting_consent_controller.has_any_denial(meeting.id)
if should_delete:
logger.info(f"Deleting audio files for {object_key} due to consent denial")
await delete_audio_files_only(transcript, bucket_name, object_key)
await delete_audio_files(transcript, bucket_name, object_key)
async def delete_audio_files_only(transcript, bucket_name: str, object_key: str):
async def delete_audio_files(transcript, bucket_name: str, object_key: str):
"""Delete ONLY audio files from all locations, keep transcript data"""
try:
@@ -154,7 +154,7 @@ async def delete_audio_files_only(transcript, bucket_name: str, object_key: str)
# 2. Delete processed audio from transcript storage S3 bucket
if transcript.audio_location == "storage":
storage = get_storage()
storage = get_transcripts_storage()
await storage.delete_file(transcript.storage_audio_path)
logger.info(f"Deleted processed audio from storage: {transcript.storage_audio_path}")