slop review WIP

This commit is contained in:
Igor Loskutov
2025-06-17 19:26:11 -04:00
parent 5b1f11047c
commit 0c91f5dd59
13 changed files with 372 additions and 170 deletions

312
GUIDE.md
View File

@@ -0,0 +1,312 @@
# Audio Storage Consent Implementation Guide
This guide documents the complete implementation of the audio storage consent feature based on the requirements in `REQUIREMENTS.md` and the plan outlined in `PLAN.md`.
## Overview
The implementation moves consent from room entry to during recording, asking specifically about audio storage while allowing transcription to continue regardless of response. The system now allows immediate room joining without consent barriers and handles consent responses during post-processing.
## Backend API Implementation
## SQS Processing and Background Tasks
### 1. Enhanced SQS Polling
**File:** `server/reflector/settings.py`
Added configurable SQS polling timeout:
## Frontend Implementation
### 1. Room Page Changes
**File:** `www/app/[roomName]/page.tsx`
Completely restructured to add consent dialog functionality:
```typescript
// Added imports for consent functionality
import AudioConsentDialog from "../(app)/rooms/audioConsentDialog";
import { DomainContext } from "../domainContext";
import { useRecordingConsent } from "../recordingConsentContext";
import useSessionAccessToken from "../lib/useSessionAccessToken";
import useSessionUser from "../lib/useSessionUser";
// Added state management for consent
const [showConsentDialog, setShowConsentDialog] = useState(false);
const [consentLoading, setConsentLoading] = useState(false);
const { state: consentState, touch, hasConsent } = useRecordingConsent();
const { api_url } = useContext(DomainContext);
const { accessToken } = useSessionAccessToken();
const { id: userId } = useSessionUser();
// User identification logic for authenticated vs anonymous users
const getUserIdentifier = useCallback(() => {
if (isAuthenticated && userId) {
return userId; // Send actual user ID for authenticated users
}
// For anonymous users, send no identifier
return null;
}, [isAuthenticated, userId]);
// Consent handling with proper API integration
const handleConsent = useCallback(async (meetingId: string, given: boolean) => {
setConsentLoading(true);
setShowConsentDialog(false); // Close dialog immediately
if (meeting?.response?.id && api_url) {
try {
const userIdentifier = getUserIdentifier();
const requestBody: any = {
consent_given: given
};
// Only include user_identifier if we have one (authenticated users)
if (userIdentifier) {
requestBody.user_identifier = userIdentifier;
}
const response = await fetch(`${api_url}/v1/meetings/${meeting.response.id}/consent`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(accessToken && { 'Authorization': `Bearer ${accessToken}` })
},
body: JSON.stringify(requestBody),
});
if (response.ok) {
touch(meetingId);
} else {
console.error('Failed to submit consent');
}
} catch (error) {
console.error('Error submitting consent:', error);
} finally {
setConsentLoading(false);
}
} else {
setConsentLoading(false);
}
}, [meeting?.response?.id, api_url, accessToken, touch, getUserIdentifier]);
// Show consent dialog when meeting is loaded and consent hasn't been answered yet
useEffect(() => {
if (
consentState.ready &&
meetingId &&
!hasConsent(meetingId) &&
!showConsentDialog &&
!consentLoading
) {
setShowConsentDialog(true);
}
}, [consentState.ready, meetingId, hasConsent, showConsentDialog, consentLoading]);
// Consent dialog in render
{meetingId && consentState.ready && !hasConsent(meetingId) && !consentLoading && (
<AudioConsentDialog
isOpen={showConsentDialog}
onClose={() => {}} // No-op: ESC should not close without consent
onConsent={b => handleConsent(meetingId, b)}
/>
)}
```
### 2. Consent Dialog Component
**File:** `www/app/(app)/rooms/audioConsentDialog.tsx`
Created new audio consent dialog component:
```typescript
import {
Modal,
ModalOverlay,
ModalContent,
ModalHeader,
ModalBody,
ModalCloseButton,
Text,
Button,
VStack,
HStack,
} from "@chakra-ui/react";
interface AudioConsentDialogProps {
isOpen: boolean;
onClose: () => void;
onConsent: (given: boolean) => void;
}
const AudioConsentDialog = ({ isOpen, onClose, onConsent }: AudioConsentDialogProps) => {
return (
<Modal isOpen={isOpen} onClose={onClose} closeOnOverlayClick={false} closeOnEsc={false}>
<ModalOverlay />
<ModalContent>
<ModalHeader>Audio Storage Consent</ModalHeader>
<ModalBody pb={6}>
<VStack spacing={4} align="start">
<Text>
Do you consent to storing this audio recording?
The transcript will be generated regardless of your choice.
</Text>
<HStack spacing={4} width="100%" justifyContent="center">
<Button colorScheme="green" onClick={() => onConsent(true)}>
Yes, store the audio
</Button>
<Button colorScheme="red" onClick={() => onConsent(false)}>
No, delete after transcription
</Button>
</HStack>
</VStack>
</ModalBody>
</ModalContent>
</Modal>
);
};
export default AudioConsentDialog;
```
### 3. Recording Consent Context
**File:** `www/app/recordingConsentContext.tsx`
Added context for managing consent state across the application:
```typescript
import React, { createContext, useContext, useState, useCallback, ReactNode } from 'react';
interface ConsentState {
ready: boolean;
consents: Record<string, boolean>;
}
interface RecordingConsentContextType {
state: ConsentState;
hasConsent: (meetingId: string) => boolean;
touch: (meetingId: string) => void;
}
const RecordingConsentContext = createContext<RecordingConsentContextType | undefined>(undefined);
export const RecordingConsentProvider = ({ children }: { children: ReactNode }) => {
const [state, setState] = useState<ConsentState>({
ready: true,
consents: {}
});
const hasConsent = useCallback((meetingId: string): boolean => {
return meetingId in state.consents;
}, [state.consents]);
const touch = useCallback((meetingId: string) => {
setState(prev => ({
...prev,
consents: {
...prev.consents,
[meetingId]: true
}
}));
}, []);
return (
<RecordingConsentContext.Provider value={{ state, hasConsent, touch }}>
{children}
</RecordingConsentContext.Provider>
);
};
export const useRecordingConsent = () => {
const context = useContext(RecordingConsentContext);
if (context === undefined) {
throw new Error('useRecordingConsent must be used within a RecordingConsentProvider');
}
return context;
};
```
## Key Features Implemented
### 1. User Identification System
The system now properly distinguishes between authenticated and anonymous users:
- **Authenticated users**: Use actual user ID, consent can be overridden in subsequent visits
- **Anonymous users**: No user identifier stored, each consent is treated as separate
### 2. Consent Override Functionality
For authenticated users, new consent responses override previous ones for the same meeting, ensuring users can change their mind during the same meeting session.
### 3. ESC Key Behavior
The consent dialog cannot be closed with ESC key (`closeOnEsc={false}`) and the onClose handler is a no-op, ensuring users must explicitly choose to give or deny consent.
### 4. Meeting ID Persistence
The system properly handles meeting ID persistence by checking both `end_date` and `is_active` flags to determine if a meeting should be reused or if a new one should be created.
### 5. Background Processing Pipeline
Complete SQS polling and Celery worker setup with:
- 5-second polling timeout for development
- Proper task registration and discovery
- Redis as message broker
- Comprehensive logging
## Environment Setup
### Development Environment Variables
The implementation requires several environment variables to be properly configured:
```bash
# SQS Configuration
AWS_PROCESS_RECORDING_QUEUE_URL=https://sqs.us-east-1.amazonaws.com/950402358378/ProcessRecordingLocal
SQS_POLLING_TIMEOUT_SECONDS=5
# AWS Credentials with SQS permissions
TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID=***REMOVED***
TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY="***REMOVED***"
```
### Services Required
The system requires the following services to be running:
1. **Backend Server**: FastAPI/Uvicorn on port 1250
2. **Frontend Server**: Next.js on port 3000
3. **Redis**: For Celery message broker
4. **Celery Worker**: For background task processing
5. **Celery Beat**: For scheduled SQS polling
## Known Issues
### Frontend SSR Issue
The room page currently has a server-side rendering issue due to the Whereby SDK import:
```typescript
import "@whereby.com/browser-sdk/embed";
```
This causes "ReferenceError: document is not defined" during Next.js pre-rendering. The import should be moved to a client-side effect or use dynamic imports to resolve this issue.
## Success Criteria Met
 **Users join rooms without barriers** - Removed pre-entry consent blocking
 **Audio storage consent requested during meeting** - Dialog appears when meeting loads
 **Post-processing handles consent** - SQS polling and background processing implemented
 **Transcription unaffected by consent choice** - Full transcript processing continues
 **Multiple meeting sessions handled independently** - Proper meeting ID persistence and scoping
 **Authenticated vs anonymous user handling** - Proper user identification system
 **Consent override functionality** - Authenticated users can change consent for same meeting
The implementation successfully transforms the consent flow from a room-entry barrier to an in-meeting dialog while maintaining all transcript processing capabilities and properly handling both authenticated and anonymous users.

View File

@@ -1,9 +0,0 @@
frontend explicitly calls backend to create meeting. upsert semantic (meeting gets "stale" somehow - how?)
frontend only listens for users own "leave" event to redirect away
how do we know it starts recording? meeting has different meeting configurations. to simplify, probably show the consent ALWAYS
Q: how S3 and SQS gets filled? by what system?
we have meeting entity, we have users. let's always ask users for consent in an overlay and send this to server to attach to the meeting entity, if we have it
- consent endpoint

View File

@@ -12,7 +12,7 @@ from alembic import op
# revision identifiers, used by Alembic.
revision: str = "20250617140003"
down_revision: Union[str, None] = "f819277e5169"
down_revision: Union[str, None] = "d3ff3a39297f"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
@@ -23,10 +23,9 @@ def upgrade() -> None:
'meeting_consent',
sa.Column('id', sa.String(), nullable=False),
sa.Column('meeting_id', sa.String(), nullable=False),
sa.Column('user_identifier', sa.String(), nullable=False),
sa.Column('user_id', sa.String(), nullable=True),
sa.Column('consent_given', sa.Boolean(), nullable=False),
sa.Column('consent_timestamp', sa.DateTime(), nullable=False),
sa.Column('user_agent', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.ForeignKeyConstraint(['meeting_id'], ['meeting.id']),
)

View File

@@ -1,32 +0,0 @@
"""make user_identifier optional in meeting_consent
Revision ID: 38e116c82385
Revises: 20250617140003
Create Date: 2025-06-17 15:23:41.346980
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '38e116c82385'
down_revision: Union[str, None] = '20250617140003'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Make user_identifier column nullable
op.alter_column('meeting_consent', 'user_identifier',
existing_type=sa.String(),
nullable=True)
def downgrade() -> None:
# Revert user_identifier back to non-nullable
op.alter_column('meeting_consent', 'user_identifier',
existing_type=sa.String(),
nullable=False)

View File

@@ -1,32 +0,0 @@
"""add source and target language
Revision ID: b3df9681cae9
Revises: 543ed284d69a
Create Date: 2023-08-29 10:55:37.690469
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'b3df9681cae9'
down_revision: Union[str, None] = '543ed284d69a'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('transcript', sa.Column('source_language', sa.String(), nullable=True))
op.add_column('transcript', sa.Column('target_language', sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('transcript', 'target_language')
op.drop_column('transcript', 'source_language')
# ### end Alembic commands ###

View File

@@ -47,20 +47,18 @@ meeting_consent = sa.Table(
metadata,
sa.Column("id", sa.String, primary_key=True),
sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id")),
sa.Column("user_identifier", sa.String, nullable=True),
sa.Column("user_id", sa.String, nullable=True),
sa.Column("consent_given", sa.Boolean),
sa.Column("consent_timestamp", sa.DateTime),
sa.Column("user_agent", sa.String, nullable=True),
)
class MeetingConsent(BaseModel):
id: str = Field(default_factory=generate_uuid4)
meeting_id: str
user_identifier: str | None = None
user_id: str | None = None
consent_given: bool
consent_timestamp: datetime
user_agent: str | None = None
class Meeting(BaseModel):
@@ -195,38 +193,34 @@ class MeetingConsentController:
results = await database.fetch_all(query)
return [MeetingConsent(**result) for result in results]
async def get_by_meeting_and_user(self, meeting_id: str, user_identifier: str) -> MeetingConsent | None:
async def get_by_meeting_and_user(self, meeting_id: str, user_id: str) -> MeetingConsent | None:
"""Get existing consent for a specific user and meeting"""
query = meeting_consent.select().where(
meeting_consent.c.meeting_id == meeting_id,
meeting_consent.c.user_identifier == user_identifier
meeting_consent.c.user_id == user_id
)
result = await database.fetch_one(query)
return MeetingConsent(**result) if result else None
async def create_or_update(self, consent: MeetingConsent) -> MeetingConsent:
async def upsert(self, consent: MeetingConsent) -> MeetingConsent:
"""Create new consent or update existing one for authenticated users"""
if consent.user_identifier:
if consent.user_id:
# For authenticated users, check if consent already exists
existing = await self.get_by_meeting_and_user(consent.meeting_id, consent.user_identifier)
# not transactional but we're ok with that; the consents ain't deleted anyways
existing = await self.get_by_meeting_and_user(consent.meeting_id, consent.user_id)
if existing:
# Update existing consent
query = meeting_consent.update().where(
meeting_consent.c.id == existing.id
).values(
consent_given=consent.consent_given,
consent_timestamp=consent.consent_timestamp,
user_agent=consent.user_agent
)
await database.execute(query)
# Return updated consent object
existing.consent_given = consent.consent_given
existing.consent_timestamp = consent.consent_timestamp
existing.user_agent = consent.user_agent
return existing
# For anonymous users or first-time authenticated users, create new record
query = meeting_consent.insert().values(**consent.model_dump())
await database.execute(query)
return consent

View File

@@ -5,6 +5,7 @@ from uuid import uuid4
import sqlalchemy as sa
from pydantic import BaseModel, Field
from reflector.db import database, metadata
from reflector.utils import generate_uuid4
recordings = sa.Table(
"recording",
@@ -22,11 +23,6 @@ recordings = sa.Table(
sa.Column("meeting_id", sa.String),
)
def generate_uuid4() -> str:
return str(uuid4())
class Recording(BaseModel):
id: str = Field(default_factory=generate_uuid4)
bucket_name: str

View File

@@ -6,7 +6,7 @@ import sqlalchemy
from fastapi import HTTPException
from pydantic import BaseModel, Field
from reflector.db import database, metadata
from reflector.db.transcripts import generate_uuid4
from reflector.utils import generate_uuid4
from sqlalchemy.sql import false, or_
rooms = sqlalchemy.Table(

View File

@@ -6,7 +6,7 @@ from contextlib import asynccontextmanager
from datetime import datetime
from pathlib import Path
from typing import Any, Literal
from uuid import uuid4
from reflector.utils import generate_uuid4
import sqlalchemy
from fastapi import HTTPException
@@ -14,7 +14,7 @@ from pydantic import BaseModel, ConfigDict, Field
from reflector.db import database, metadata
from reflector.processors.types import Word as ProcessorWord
from reflector.settings import settings
from reflector.storage import Storage
from reflector.storage import Storage, get_transcripts_storage
from sqlalchemy import Enum
from sqlalchemy.sql import false, or_
@@ -72,23 +72,11 @@ transcripts = sqlalchemy.Table(
),
)
def generate_uuid4() -> str:
return str(uuid4())
def generate_transcript_name() -> str:
now = datetime.utcnow()
return f"Transcript {now.strftime('%Y-%m-%d %H:%M:%S')}"
def get_storage() -> Storage:
return Storage.get_instance(
name=settings.TRANSCRIPT_STORAGE_BACKEND,
settings_prefix="TRANSCRIPT_STORAGE_",
)
class AudioWaveform(BaseModel):
data: list[float]
@@ -257,7 +245,7 @@ class Transcript(BaseModel):
raise Exception(f"Unknown audio location {self.audio_location}")
async def _generate_storage_audio_link(self) -> str:
return await get_storage().get_file_url(self.storage_audio_path)
return await get_transcripts_storage().get_file_url(self.storage_audio_path)
def _generate_local_audio_link(self) -> str:
# we need to create an url to be used for diarization
@@ -558,7 +546,7 @@ class TranscriptController:
if transcript.audio_location == "local":
# store the audio on external storage if it's not already there
await get_storage().put_file(
await get_transcripts_storage().put_file(
transcript.storage_audio_path,
transcript.audio_mp3_filename.read_bytes(),
)
@@ -574,7 +562,7 @@ class TranscriptController:
Download audio from storage
"""
transcript.audio_mp3_filename.write_bytes(
await get_storage().get_file(
await get_transcripts_storage().get_file(
transcript.storage_audio_path,
)
)

View File

@@ -1,7 +1,8 @@
from .base import Storage # noqa
def get_storage() -> Storage:
def get_transcripts_storage() -> Storage:
from reflector.settings import settings
return Storage.get_instance(
name=settings.TRANSCRIPT_STORAGE_BACKEND,
settings_prefix="TRANSCRIPT_STORAGE_",
)

View File

@@ -1,6 +1,8 @@
from datetime import datetime
from typing import Annotated, Optional
from fastapi import APIRouter, HTTPException, Request
import reflector.auth as auth
from fastapi import APIRouter, HTTPException, Request, Depends
from pydantic import BaseModel
from reflector.db.meetings import (
@@ -14,7 +16,6 @@ router = APIRouter()
class MeetingConsentRequest(BaseModel):
consent_given: bool
user_identifier: str | None = None
@router.post("/meetings/{meeting_id}/consent")
@@ -22,21 +23,21 @@ async def meeting_audio_consent(
meeting_id: str,
request: MeetingConsentRequest,
user_request: Request,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
meeting = await meetings_controller.get_by_id(meeting_id)
if not meeting:
raise HTTPException(status_code=404, detail="Meeting not found")
# Store consent in meeting_consent table (create or update for authenticated users)
user_id = user["sub"] if user else None
consent = MeetingConsent(
meeting_id=meeting_id,
user_identifier=request.user_identifier,
user_id=user_id,
consent_given=request.consent_given,
consent_timestamp=datetime.utcnow(),
user_agent=user_request.headers.get("user-agent")
)
# Use create_or_update to handle consent overrides for authenticated users
updated_consent = await meeting_consent_controller.create_or_update(consent)
updated_consent = await meeting_consent_controller.upsert(consent)
return {"status": "success", "consent_id": updated_consent.id}

View File

@@ -13,7 +13,7 @@ from reflector.db.meetings import meeting_consent_controller, meetings_controlle
from reflector.db.recordings import Recording, recordings_controller
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import SourceKind, transcripts_controller
from reflector.storage import get_storage
from reflector.storage import get_transcripts_storage
from reflector.pipelines.main_live_pipeline import asynctask, task_pipeline_process
from reflector.settings import settings
from reflector.whereby import get_room_sessions
@@ -136,10 +136,10 @@ async def process_recording(bucket_name: str, object_key: str):
should_delete = await meeting_consent_controller.has_any_denial(meeting.id)
if should_delete:
logger.info(f"Deleting audio files for {object_key} due to consent denial")
await delete_audio_files_only(transcript, bucket_name, object_key)
await delete_audio_files(transcript, bucket_name, object_key)
async def delete_audio_files_only(transcript, bucket_name: str, object_key: str):
async def delete_audio_files(transcript, bucket_name: str, object_key: str):
"""Delete ONLY audio files from all locations, keep transcript data"""
try:
@@ -154,7 +154,7 @@ async def delete_audio_files_only(transcript, bucket_name: str, object_key: str)
# 2. Delete processed audio from transcript storage S3 bucket
if transcript.audio_location == "storage":
storage = get_storage()
storage = get_transcripts_storage()
await storage.delete_file(transcript.storage_audio_path)
logger.info(f"Deleted processed audio from storage: {transcript.storage_audio_path}")

View File

@@ -43,54 +43,38 @@ export default function Room(details: RoomDetails) {
router.push("/browse");
}, [router]);
const getUserIdentifier = useCallback(() => {
if (isAuthenticated && userId) {
return userId; // Send actual user ID for authenticated users
}
// For anonymous users, send no identifier
return null;
}, [isAuthenticated, userId]);
// TODO hook
const handleConsent = useCallback(async (meetingId: string, given: boolean) => {
setShowConsentDialog(false);
setConsentLoading(true);
setShowConsentDialog(false); // Close dialog immediately
if (meeting?.response?.id && api_url) {
try {
const userIdentifier = getUserIdentifier();
const requestBody: any = {
consent_given: given
};
// Only include user_identifier if we have one (authenticated users)
if (userIdentifier) {
requestBody.user_identifier = userIdentifier;
}
const response = await fetch(`${api_url}/v1/meetings/${meeting.response.id}/consent`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(accessToken && { 'Authorization': `Bearer ${accessToken}` })
},
body: JSON.stringify(requestBody),
});
if (response.ok) {
touch(meetingId);
} else {
console.error('Failed to submit consent');
}
} catch (error) {
console.error('Error submitting consent:', error);
} finally {
setConsentLoading(false);
try {
const requestBody = {
consent_given: given
};
// TODO generated API
const response = await fetch(`${api_url}/v1/meetings/${meetingId}/consent`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(accessToken && { 'Authorization': `Bearer ${accessToken}` })
},
body: JSON.stringify(requestBody),
});
if (response.ok) {
touch(meetingId);
} else {
console.error('Failed to submit consent');
}
} else {
} catch (error) {
console.error('Error submitting consent:', error);
} finally {
setConsentLoading(false);
}
}, [meeting?.response?.id, api_url, accessToken, touch, getUserIdentifier]);
}, [api_url, accessToken, touch]);
useEffect(() => {