diff --git a/server/migrations/versions/20250618140000_add_audio_deleted_field_to_transcript.py b/server/migrations/versions/20250618140000_add_audio_deleted_field_to_transcript.py new file mode 100644 index 00000000..928e8183 --- /dev/null +++ b/server/migrations/versions/20250618140000_add_audio_deleted_field_to_transcript.py @@ -0,0 +1,25 @@ +"""add audio_deleted field to transcript + +Revision ID: 20250618140000 +Revises: 20250617140003 +Create Date: 2025-06-18 14:00:00.000000 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "20250618140000" +down_revision: Union[str, None] = "20250617140003" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column("transcript", sa.Column("audio_deleted", sa.Boolean(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("transcript", "audio_deleted") \ No newline at end of file diff --git a/server/reflector/db/transcripts.py b/server/reflector/db/transcripts.py index b030cf0e..9157a742 100644 --- a/server/reflector/db/transcripts.py +++ b/server/reflector/db/transcripts.py @@ -70,6 +70,7 @@ transcripts = sqlalchemy.Table( Enum(SourceKind, values_callable=lambda obj: [e.value for e in obj]), nullable=False, ), + sqlalchemy.Column("audio_deleted", sqlalchemy.Boolean, nullable=True), ) def generate_transcript_name() -> str: @@ -157,6 +158,7 @@ class Transcript(BaseModel): recording_id: str | None = None zulip_message_id: int | None = None source_kind: SourceKind + audio_deleted: bool | None = None def add_event(self, event: str, data: BaseModel) -> TranscriptEvent: ev = TranscriptEvent(event=event, data=data.model_dump()) @@ -544,8 +546,14 @@ class TranscriptController: Move mp3 file to storage """ + if transcript.audio_deleted: + raise FileNotFoundError(f"Invalid state of transcript {transcript.id}: audio_deleted mark is set true") + if transcript.audio_location == "local": # store the audio on external storage if it's not already there + if not transcript.audio_mp3_filename.exists(): + raise FileNotFoundError(f"Audio file not found: {transcript.audio_mp3_filename}") + await get_transcripts_storage().put_file( transcript.storage_audio_path, transcript.audio_mp3_filename.read_bytes(), diff --git a/server/reflector/pipelines/main_live_pipeline.py b/server/reflector/pipelines/main_live_pipeline.py index b42bcbad..1896e199 100644 --- a/server/reflector/pipelines/main_live_pipeline.py +++ b/server/reflector/pipelines/main_live_pipeline.py @@ -59,6 +59,13 @@ from reflector.zulip import ( send_message_to_zulip, update_zulip_message, ) + +from reflector.db.meetings import meetings_controller, meeting_consent_controller +from reflector.db.recordings import recordings_controller +from reflector.storage import get_transcripts_storage + +import boto3 + from structlog import BoundLogger as Logger @@ -470,6 +477,7 @@ class PipelineMainWaveform(PipelineMainFromTopics): @get_transcript async def pipeline_remove_upload(transcript: Transcript, logger: Logger): + # for future changes: note that there's also a consent process happens, beforehand and users may not consent with keeping files. currently, we delete regardless, so it's no need for that logger.info("Starting remove upload") uploads = transcript.data_path.glob("upload.*") for upload in uploads: @@ -520,6 +528,10 @@ async def pipeline_upload_mp3(transcript: Transcript, logger: Logger): logger.info("No storage backend configured, skipping mp3 upload") return + if transcript.audio_deleted: + logger.info("Skipping MP3 upload - audio marked as deleted") + return + logger.info("Starting upload mp3") # If the audio mp3 is not available, just skip @@ -558,6 +570,67 @@ async def pipeline_summaries(transcript: Transcript, logger: Logger): logger.info("Summaries done") +@get_transcript +async def cleanup_consent(transcript: Transcript, logger: Logger): + logger.info("Starting consent cleanup") + + consent_denied = False + recording = None + try: + if transcript.recording_id: + recording = await recordings_controller.get_by_id(transcript.recording_id) + if recording and recording.meeting_id: + meeting = await meetings_controller.get_by_id(recording.meeting_id) + if meeting: + consent_denied = await meeting_consent_controller.has_any_denial(meeting.id) + except Exception as e: + logger.error(f"Failed to get fetch consent: {e}") + consent_denied = True + + if not consent_denied: + logger.info("Consent approved, keeping all files") + return + + logger.info("Consent denied, cleaning up all related audio files") + + # 1. Delete original Whereby recording from S3 + if recording and recording.s3_bucket and recording.s3_key: + + s3_whereby = boto3.client( + "s3", + aws_access_key_id=settings.AWS_WHEREBY_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_WHEREBY_ACCESS_KEY_SECRET, + ) + try: + s3_whereby.delete_object(Bucket=recording.s3_bucket, Key=recording.s3_key) + logger.info(f"Deleted original Whereby recording: {recording.s3_bucket}/{recording.s3_key}") + except Exception as e: + logger.error(f"Failed to delete Whereby recording: {e}") + + # non-transactional, files marked for deletion not actually deleted is possible + await transcripts_controller.update(transcript, {"audio_deleted": True}) + # 2. Delete processed audio from transcript storage S3 bucket + if transcript.audio_location == "storage": + + storage = get_transcripts_storage() + try: + await storage.delete_file(transcript.storage_audio_path) + logger.info(f"Deleted processed audio from storage: {transcript.storage_audio_path}") + except Exception as e: + logger.error(f"Failed to delete processed audio: {e}") + + # 3. Delete local audio files + try: + if hasattr(transcript, 'audio_mp3_filename') and transcript.audio_mp3_filename: + transcript.audio_mp3_filename.unlink(missing_ok=True) + if hasattr(transcript, 'audio_wav_filename') and transcript.audio_wav_filename: + transcript.audio_wav_filename.unlink(missing_ok=True) + except Exception as e: + logger.error(f"Failed to delete local audio files: {e}") + + logger.info("Consent cleanup done") + + @get_transcript async def pipeline_post_to_zulip(transcript: Transcript, logger: Logger): logger.info("Starting post to zulip") @@ -659,6 +732,12 @@ async def task_pipeline_final_summaries(*, transcript_id: str): await pipeline_summaries(transcript_id=transcript_id) +@shared_task +@asynctask +async def task_cleanup_consent(*, transcript_id: str): + await cleanup_consent(transcript_id=transcript_id) + + @shared_task @asynctask async def task_pipeline_post_to_zulip(*, transcript_id: str): @@ -675,6 +754,7 @@ def pipeline_post(*, transcript_id: str): | task_pipeline_upload_mp3.si(transcript_id=transcript_id) | task_pipeline_remove_upload.si(transcript_id=transcript_id) | task_pipeline_diarization.si(transcript_id=transcript_id) + | task_cleanup_consent.si(transcript_id=transcript_id) ) chain_title_preview = task_pipeline_title.si(transcript_id=transcript_id) chain_final_summaries = task_pipeline_final_summaries.si( diff --git a/server/reflector/views/_range_requests_response.py b/server/reflector/views/_range_requests_response.py index 2fac632d..f74529a0 100644 --- a/server/reflector/views/_range_requests_response.py +++ b/server/reflector/views/_range_requests_response.py @@ -43,6 +43,10 @@ def range_requests_response( ): """Returns StreamingResponse using Range Requests of a given file""" + if not os.path.exists(file_path): + from fastapi import HTTPException + raise HTTPException(status_code=404, detail="File not found") + file_size = os.stat(file_path).st_size range_header = request.headers.get("range") diff --git a/server/reflector/views/transcripts.py b/server/reflector/views/transcripts.py index b6e56c44..27d6188e 100644 --- a/server/reflector/views/transcripts.py +++ b/server/reflector/views/transcripts.py @@ -65,6 +65,7 @@ class GetTranscript(BaseModel): source_kind: SourceKind room_id: str | None = None room_name: str | None = None + audio_deleted: bool | None = None class CreateTranscript(BaseModel): @@ -82,6 +83,7 @@ class UpdateTranscript(BaseModel): share_mode: Optional[Literal["public", "semi-private", "private"]] = Field(None) participants: Optional[list[TranscriptParticipant]] = Field(None) reviewed: Optional[bool] = Field(None) + audio_deleted: Optional[bool] = Field(None) class DeletionStatus(BaseModel): diff --git a/server/reflector/views/transcripts_audio.py b/server/reflector/views/transcripts_audio.py index 45d4eccc..0c177163 100644 --- a/server/reflector/views/transcripts_audio.py +++ b/server/reflector/views/transcripts_audio.py @@ -86,8 +86,11 @@ async def transcript_get_audio_mp3( headers=resp.headers, ) + if transcript.audio_deleted: + raise HTTPException(status_code=404, detail="Audio unavailable due to privacy settings") + if not transcript.audio_mp3_filename.exists(): - raise HTTPException(status_code=500, detail="Audio not found") + raise HTTPException(status_code=404, detail="Audio file not found") truncated_id = str(transcript.id).split("-")[0] filename = f"recording_{truncated_id}.mp3" diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py index c61a4f35..5697f075 100644 --- a/server/reflector/worker/process.py +++ b/server/reflector/worker/process.py @@ -9,7 +9,7 @@ import structlog from celery import shared_task from celery.utils.log import get_task_logger from pydantic import ValidationError -from reflector.db.meetings import meeting_consent_controller, meetings_controller +from reflector.db.meetings import meetings_controller from reflector.db.recordings import Recording, recordings_controller from reflector.db.rooms import rooms_controller from reflector.db.transcripts import SourceKind, transcripts_controller @@ -131,52 +131,6 @@ async def process_recording(bucket_name: str, object_key: str): await transcripts_controller.update(transcript, {"status": "uploaded"}) task_pipeline_process.delay(transcript_id=transcript.id) - - # Check if any participant denied consent after transcript processing is complete - should_delete = await meeting_consent_controller.has_any_denial(meeting.id) - if should_delete: - logger.info(f"Deleting audio files for {object_key} due to consent denial") - await delete_audio_files(transcript, bucket_name, object_key) - - -async def delete_audio_files(transcript, bucket_name: str, object_key: str): - """Delete ONLY audio files from all locations, keep transcript data""" - - try: - # 1. Delete original Whereby recording from S3 - s3_whereby = boto3.client( - "s3", - aws_access_key_id=settings.AWS_WHEREBY_ACCESS_KEY_ID, - aws_secret_access_key=settings.AWS_WHEREBY_ACCESS_KEY_SECRET, - ) - s3_whereby.delete_object(Bucket=bucket_name, Key=object_key) - logger.info(f"Deleted original Whereby recording: {bucket_name}/{object_key}") - - # 2. Delete processed audio from transcript storage S3 bucket - if transcript.audio_location == "storage": - storage = get_transcripts_storage() - await storage.delete_file(transcript.storage_audio_path) - logger.info(f"Deleted processed audio from storage: {transcript.storage_audio_path}") - - # 3. Delete local audio files (if any remain) - if hasattr(transcript, 'audio_mp3_filename') and transcript.audio_mp3_filename: - transcript.audio_mp3_filename.unlink(missing_ok=True) - if hasattr(transcript, 'audio_wav_filename') and transcript.audio_wav_filename: - transcript.audio_wav_filename.unlink(missing_ok=True) - - upload_path = transcript.data_path / f"upload{os.path.splitext(object_key)[1]}" - upload_path.unlink(missing_ok=True) - - # 4. Update transcript to reflect audio deletion (keep all other data) - await transcripts_controller.update(transcript, { - 'audio_location_deleted': True - }) - - logger.info(f"Deleted all audio files for transcript {transcript.id}, kept transcript data") - - except Exception as e: - logger.error(f"Failed to delete audio files for {object_key}: {str(e)}") - @shared_task @asynctask diff --git a/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx b/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx index bdd9e5ea..830a7c69 100644 --- a/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx +++ b/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx @@ -183,7 +183,18 @@ const TopicPlayer = ({ setIsPlaying(false); }; - const isLoaded = !!(mp3.media && topicTime); + const isLoaded = !!(mp3.loading && topicTime); + const error = mp3.error; + if (error !== null) { + return + Loading error: {error} + + } + if (mp3.audioDeleted) { + return + This topic file has been deleted. + + } return ( + ); + } + if (transcript?.loading || topics?.loading) { return ; } + if (mp3.error) { + return ( + + ); + } + + + return ( <> { const webSockets = useWebSockets(details.params.transcriptId); - let mp3 = useMp3(details.params.transcriptId, true); + const mp3 = useMp3(details.params.transcriptId, true); const router = useRouter(); diff --git a/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx b/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx index c6e9eb69..bc9ff77a 100644 --- a/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx +++ b/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx @@ -21,7 +21,7 @@ const TranscriptUpload = (details: TranscriptUpload) => { const webSockets = useWebSockets(details.params.transcriptId); - let mp3 = useMp3(details.params.transcriptId, true); + const mp3 = useMp3(details.params.transcriptId, true); const router = useRouter(); diff --git a/www/app/(app)/transcripts/useMp3.ts b/www/app/(app)/transcripts/useMp3.ts index 178c5089..2a6a8faa 100644 --- a/www/app/(app)/transcripts/useMp3.ts +++ b/www/app/(app)/transcripts/useMp3.ts @@ -5,13 +5,19 @@ import getApi from "../../lib/useApi"; export type Mp3Response = { media: HTMLMediaElement | null; loading: boolean; + error: string | null; getNow: () => void; + audioDeleted: boolean | null; }; -const useMp3 = (id: string, waiting?: boolean): Mp3Response => { +const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => { const [media, setMedia] = useState(null); const [later, setLater] = useState(waiting); - const [loading, setLoading] = useState(false); + const [audioLoading, setAudioLoading] = useState(true); + const [audioLoadingError, setAudioLoadingError] = useState(null); + const [transcriptMetadataLoading, setTranscriptMetadataLoading] = useState(true); + const [transcriptMetadataLoadingError, setTranscriptMetadataLoadingError] = useState(null); + const [audioDeleted, setAudioDeleted] = useState(null); const api = getApi(); const { api_url } = useContext(DomainContext); const accessTokenInfo = api?.httpRequest?.config?.TOKEN; @@ -42,23 +48,69 @@ const useMp3 = (id: string, waiting?: boolean): Mp3Response => { }, [navigator.serviceWorker, !serviceWorker, accessTokenInfo]); useEffect(() => { - if (!id || !api || later) return; + if (!transcriptId || !api || later) return; - // createa a audio element and set the source - setLoading(true); + + setTranscriptMetadataLoading(true); + const audioElement = document.createElement("audio"); - audioElement.src = `${api_url}/v1/transcripts/${id}/audio/mp3`; + audioElement.src = `${api_url}/v1/transcripts/${transcriptId}/audio/mp3`; audioElement.crossOrigin = "anonymous"; audioElement.preload = "auto"; + + const handleCanPlay = () => { + setAudioLoading(false); + setAudioLoadingError(null); + }; + + const handleError = () => { + setAudioLoading(false); + setAudioLoadingError("Failed to load audio"); + }; + + audioElement.addEventListener('canplay', handleCanPlay); + audioElement.addEventListener('error', handleError); + setMedia(audioElement); - setLoading(false); - }, [id, !api, later]); + + + setAudioLoading(true); + + let stopped = false; + // Fetch transcript info in parallel + api.v1TranscriptGet({ transcriptId }) + .then((transcript) => { + if (stopped) return; + setAudioDeleted(transcript.audio_deleted || false); + setTranscriptMetadataLoadingError(null); + }) + .catch((error) => { + if (stopped) return; + console.error("Failed to fetch transcript:", error); + setAudioDeleted(null); + setTranscriptMetadataLoadingError(error.message); + }) + .finally(() => { + if (stopped) return; + setTranscriptMetadataLoading(false); + }) + + // Cleanup + return () => { + stopped = true; + audioElement.removeEventListener('canplay', handleCanPlay); + audioElement.removeEventListener('error', handleError); + }; + }, [transcriptId, !api, later, api_url]); const getNow = () => { setLater(false); }; - return { media, loading, getNow }; + const loading = audioLoading || transcriptMetadataLoading; + const error = audioLoadingError || transcriptMetadataLoadingError; + + return { media, loading, error, getNow, audioDeleted }; }; export default useMp3; diff --git a/www/app/[roomName]/page.tsx b/www/app/[roomName]/page.tsx index 4ca65107..30b4138c 100644 --- a/www/app/[roomName]/page.tsx +++ b/www/app/[roomName]/page.tsx @@ -2,12 +2,11 @@ import "@whereby.com/browser-sdk/embed"; import { useCallback, useEffect, useRef, useState, useContext } from "react"; -import { Box, Button, Text, VStack, HStack, Spinner } from "@chakra-ui/react"; +import { Box, Button, Text, VStack, HStack, Spinner, useToast } from "@chakra-ui/react"; import useRoomMeeting from "./useRoomMeeting"; import { useRouter } from "next/navigation"; import { notFound } from "next/navigation"; import useSessionStatus from "../lib/useSessionStatus"; -import AudioConsentDialog from "../(app)/rooms/audioConsentDialog"; import { DomainContext } from "../domainContext"; import { useRecordingConsent } from "../recordingConsentContext"; import useSessionAccessToken from "../lib/useSessionAccessToken"; @@ -26,13 +25,13 @@ export default function Room(details: RoomDetails) { const meeting = useRoomMeeting(roomName); const router = useRouter(); const { isLoading, isAuthenticated } = useSessionStatus(); - const [showConsentDialog, setShowConsentDialog] = useState(false); const [consentLoading, setConsentLoading] = useState(false); const { state: consentState, touch, hasConsent } = useRecordingConsent(); const { api_url } = useContext(DomainContext); const { accessToken } = useSessionAccessToken(); const { id: userId } = useSessionUser(); const api = useApi(); + const toast = useToast(); const roomUrl = meeting?.response?.host_room_url @@ -45,10 +44,10 @@ export default function Room(details: RoomDetails) { router.push("/browse"); }, [router]); - const handleConsent = useCallback(async (meetingId: string, given: boolean) => { + const handleConsent = useCallback(async (meetingId: string, given: boolean, onClose?: () => void) => { if (!api) return; - setShowConsentDialog(false); + if (onClose) onClose(); setConsentLoading(true); try { @@ -77,18 +76,49 @@ export default function Room(details: RoomDetails) { } }, [isLoading, meeting?.error]); - // Show consent dialog when meeting is loaded and consent hasn't been answered yet + // Show consent toast when meeting is loaded and consent hasn't been answered yet useEffect(() => { if ( consentState.ready && meetingId && !hasConsent(meetingId) && - !showConsentDialog && !consentLoading ) { - setShowConsentDialog(true); + const toastId = toast({ + position: "top", + duration: null, + render: ({ onClose }) => ( + + + + Can we have your permission to store this meeting's audio recording on our servers? + + + + + + + + ), + }); + + return () => { + toast.close(toastId); + }; } - }, [consentState.ready, meetingId, hasConsent, showConsentDialog, consentLoading]); + }, [consentState.ready, meetingId, hasConsent, consentLoading, toast, handleConsent]); useEffect(() => { if (isLoading || !isAuthenticated || !roomUrl) return; @@ -131,13 +161,6 @@ export default function Room(details: RoomDetails) { style={{ width: "100vw", height: "100vh" }} /> )} - {meetingId && consentState.ready && !hasConsent(meetingId) && !consentLoading && ( - {}} // No-op: ESC should not close without consent - onConsent={b => handleConsent(meetingId, b)} - /> - )} ); } diff --git a/www/app/api/schemas.gen.ts b/www/app/api/schemas.gen.ts index 359b6922..fb0e65a3 100644 --- a/www/app/api/schemas.gen.ts +++ b/www/app/api/schemas.gen.ts @@ -293,6 +293,17 @@ export const $GetTranscript = { ], title: "Room Name", }, + audio_deleted: { + anyOf: [ + { + type: "boolean", + }, + { + type: "null", + }, + ], + title: "Audio Deleted", + }, }, type: "object", required: [ @@ -1109,6 +1120,17 @@ export const $UpdateTranscript = { ], title: "Reviewed", }, + audio_deleted: { + anyOf: [ + { + type: "boolean", + }, + { + type: "null", + }, + ], + title: "Audio Deleted", + }, }, type: "object", title: "UpdateTranscript", diff --git a/www/app/api/types.gen.ts b/www/app/api/types.gen.ts index ef9ec43d..c47eef74 100644 --- a/www/app/api/types.gen.ts +++ b/www/app/api/types.gen.ts @@ -56,6 +56,7 @@ export type GetTranscript = { source_kind: SourceKind; room_id?: string | null; room_name?: string | null; + audio_deleted?: boolean | null; }; export type GetTranscriptSegmentTopic = { @@ -219,6 +220,7 @@ export type UpdateTranscript = { share_mode?: "public" | "semi-private" | "private" | null; participants?: Array | null; reviewed?: boolean | null; + audio_deleted?: boolean | null; }; export type UserInfo = {