diff --git a/server/migrations/versions/20250618140000_add_audio_deleted_field_to_transcript.py b/server/migrations/versions/20250618140000_add_audio_deleted_field_to_transcript.py
new file mode 100644
index 00000000..928e8183
--- /dev/null
+++ b/server/migrations/versions/20250618140000_add_audio_deleted_field_to_transcript.py
@@ -0,0 +1,25 @@
+"""add audio_deleted field to transcript
+
+Revision ID: 20250618140000
+Revises: 20250617140003
+Create Date: 2025-06-18 14:00:00.000000
+
+"""
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "20250618140000"
+down_revision: Union[str, None] = "20250617140003"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+ op.add_column("transcript", sa.Column("audio_deleted", sa.Boolean(), nullable=True))
+
+
+def downgrade() -> None:
+ op.drop_column("transcript", "audio_deleted")
\ No newline at end of file
diff --git a/server/reflector/db/transcripts.py b/server/reflector/db/transcripts.py
index b030cf0e..9157a742 100644
--- a/server/reflector/db/transcripts.py
+++ b/server/reflector/db/transcripts.py
@@ -70,6 +70,7 @@ transcripts = sqlalchemy.Table(
Enum(SourceKind, values_callable=lambda obj: [e.value for e in obj]),
nullable=False,
),
+ sqlalchemy.Column("audio_deleted", sqlalchemy.Boolean, nullable=True),
)
def generate_transcript_name() -> str:
@@ -157,6 +158,7 @@ class Transcript(BaseModel):
recording_id: str | None = None
zulip_message_id: int | None = None
source_kind: SourceKind
+ audio_deleted: bool | None = None
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
ev = TranscriptEvent(event=event, data=data.model_dump())
@@ -544,8 +546,14 @@ class TranscriptController:
Move mp3 file to storage
"""
+ if transcript.audio_deleted:
+ raise FileNotFoundError(f"Invalid state of transcript {transcript.id}: audio_deleted mark is set true")
+
if transcript.audio_location == "local":
# store the audio on external storage if it's not already there
+ if not transcript.audio_mp3_filename.exists():
+ raise FileNotFoundError(f"Audio file not found: {transcript.audio_mp3_filename}")
+
await get_transcripts_storage().put_file(
transcript.storage_audio_path,
transcript.audio_mp3_filename.read_bytes(),
diff --git a/server/reflector/pipelines/main_live_pipeline.py b/server/reflector/pipelines/main_live_pipeline.py
index b42bcbad..1896e199 100644
--- a/server/reflector/pipelines/main_live_pipeline.py
+++ b/server/reflector/pipelines/main_live_pipeline.py
@@ -59,6 +59,13 @@ from reflector.zulip import (
send_message_to_zulip,
update_zulip_message,
)
+
+from reflector.db.meetings import meetings_controller, meeting_consent_controller
+from reflector.db.recordings import recordings_controller
+from reflector.storage import get_transcripts_storage
+
+import boto3
+
from structlog import BoundLogger as Logger
@@ -470,6 +477,7 @@ class PipelineMainWaveform(PipelineMainFromTopics):
@get_transcript
async def pipeline_remove_upload(transcript: Transcript, logger: Logger):
+ # for future changes: note that there's also a consent process happens, beforehand and users may not consent with keeping files. currently, we delete regardless, so it's no need for that
logger.info("Starting remove upload")
uploads = transcript.data_path.glob("upload.*")
for upload in uploads:
@@ -520,6 +528,10 @@ async def pipeline_upload_mp3(transcript: Transcript, logger: Logger):
logger.info("No storage backend configured, skipping mp3 upload")
return
+ if transcript.audio_deleted:
+ logger.info("Skipping MP3 upload - audio marked as deleted")
+ return
+
logger.info("Starting upload mp3")
# If the audio mp3 is not available, just skip
@@ -558,6 +570,67 @@ async def pipeline_summaries(transcript: Transcript, logger: Logger):
logger.info("Summaries done")
+@get_transcript
+async def cleanup_consent(transcript: Transcript, logger: Logger):
+ logger.info("Starting consent cleanup")
+
+ consent_denied = False
+ recording = None
+ try:
+ if transcript.recording_id:
+ recording = await recordings_controller.get_by_id(transcript.recording_id)
+ if recording and recording.meeting_id:
+ meeting = await meetings_controller.get_by_id(recording.meeting_id)
+ if meeting:
+ consent_denied = await meeting_consent_controller.has_any_denial(meeting.id)
+ except Exception as e:
+ logger.error(f"Failed to get fetch consent: {e}")
+ consent_denied = True
+
+ if not consent_denied:
+ logger.info("Consent approved, keeping all files")
+ return
+
+ logger.info("Consent denied, cleaning up all related audio files")
+
+ # 1. Delete original Whereby recording from S3
+ if recording and recording.s3_bucket and recording.s3_key:
+
+ s3_whereby = boto3.client(
+ "s3",
+ aws_access_key_id=settings.AWS_WHEREBY_ACCESS_KEY_ID,
+ aws_secret_access_key=settings.AWS_WHEREBY_ACCESS_KEY_SECRET,
+ )
+ try:
+ s3_whereby.delete_object(Bucket=recording.s3_bucket, Key=recording.s3_key)
+ logger.info(f"Deleted original Whereby recording: {recording.s3_bucket}/{recording.s3_key}")
+ except Exception as e:
+ logger.error(f"Failed to delete Whereby recording: {e}")
+
+ # non-transactional, files marked for deletion not actually deleted is possible
+ await transcripts_controller.update(transcript, {"audio_deleted": True})
+ # 2. Delete processed audio from transcript storage S3 bucket
+ if transcript.audio_location == "storage":
+
+ storage = get_transcripts_storage()
+ try:
+ await storage.delete_file(transcript.storage_audio_path)
+ logger.info(f"Deleted processed audio from storage: {transcript.storage_audio_path}")
+ except Exception as e:
+ logger.error(f"Failed to delete processed audio: {e}")
+
+ # 3. Delete local audio files
+ try:
+ if hasattr(transcript, 'audio_mp3_filename') and transcript.audio_mp3_filename:
+ transcript.audio_mp3_filename.unlink(missing_ok=True)
+ if hasattr(transcript, 'audio_wav_filename') and transcript.audio_wav_filename:
+ transcript.audio_wav_filename.unlink(missing_ok=True)
+ except Exception as e:
+ logger.error(f"Failed to delete local audio files: {e}")
+
+ logger.info("Consent cleanup done")
+
+
@get_transcript
async def pipeline_post_to_zulip(transcript: Transcript, logger: Logger):
logger.info("Starting post to zulip")
@@ -659,6 +732,12 @@ async def task_pipeline_final_summaries(*, transcript_id: str):
await pipeline_summaries(transcript_id=transcript_id)
+@shared_task
+@asynctask
+async def task_cleanup_consent(*, transcript_id: str):
+ await cleanup_consent(transcript_id=transcript_id)
+
+
@shared_task
@asynctask
async def task_pipeline_post_to_zulip(*, transcript_id: str):
@@ -675,6 +754,7 @@ def pipeline_post(*, transcript_id: str):
| task_pipeline_upload_mp3.si(transcript_id=transcript_id)
| task_pipeline_remove_upload.si(transcript_id=transcript_id)
| task_pipeline_diarization.si(transcript_id=transcript_id)
+ | task_cleanup_consent.si(transcript_id=transcript_id)
)
chain_title_preview = task_pipeline_title.si(transcript_id=transcript_id)
chain_final_summaries = task_pipeline_final_summaries.si(
diff --git a/server/reflector/views/_range_requests_response.py b/server/reflector/views/_range_requests_response.py
index 2fac632d..f74529a0 100644
--- a/server/reflector/views/_range_requests_response.py
+++ b/server/reflector/views/_range_requests_response.py
@@ -43,6 +43,10 @@ def range_requests_response(
):
"""Returns StreamingResponse using Range Requests of a given file"""
+ if not os.path.exists(file_path):
+ from fastapi import HTTPException
+ raise HTTPException(status_code=404, detail="File not found")
+
file_size = os.stat(file_path).st_size
range_header = request.headers.get("range")
diff --git a/server/reflector/views/transcripts.py b/server/reflector/views/transcripts.py
index b6e56c44..27d6188e 100644
--- a/server/reflector/views/transcripts.py
+++ b/server/reflector/views/transcripts.py
@@ -65,6 +65,7 @@ class GetTranscript(BaseModel):
source_kind: SourceKind
room_id: str | None = None
room_name: str | None = None
+ audio_deleted: bool | None = None
class CreateTranscript(BaseModel):
@@ -82,6 +83,7 @@ class UpdateTranscript(BaseModel):
share_mode: Optional[Literal["public", "semi-private", "private"]] = Field(None)
participants: Optional[list[TranscriptParticipant]] = Field(None)
reviewed: Optional[bool] = Field(None)
+ audio_deleted: Optional[bool] = Field(None)
class DeletionStatus(BaseModel):
diff --git a/server/reflector/views/transcripts_audio.py b/server/reflector/views/transcripts_audio.py
index 45d4eccc..0c177163 100644
--- a/server/reflector/views/transcripts_audio.py
+++ b/server/reflector/views/transcripts_audio.py
@@ -86,8 +86,11 @@ async def transcript_get_audio_mp3(
headers=resp.headers,
)
+ if transcript.audio_deleted:
+ raise HTTPException(status_code=404, detail="Audio unavailable due to privacy settings")
+
if not transcript.audio_mp3_filename.exists():
- raise HTTPException(status_code=500, detail="Audio not found")
+ raise HTTPException(status_code=404, detail="Audio file not found")
truncated_id = str(transcript.id).split("-")[0]
filename = f"recording_{truncated_id}.mp3"
diff --git a/server/reflector/worker/process.py b/server/reflector/worker/process.py
index c61a4f35..5697f075 100644
--- a/server/reflector/worker/process.py
+++ b/server/reflector/worker/process.py
@@ -9,7 +9,7 @@ import structlog
from celery import shared_task
from celery.utils.log import get_task_logger
from pydantic import ValidationError
-from reflector.db.meetings import meeting_consent_controller, meetings_controller
+from reflector.db.meetings import meetings_controller
from reflector.db.recordings import Recording, recordings_controller
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import SourceKind, transcripts_controller
@@ -131,52 +131,6 @@ async def process_recording(bucket_name: str, object_key: str):
await transcripts_controller.update(transcript, {"status": "uploaded"})
task_pipeline_process.delay(transcript_id=transcript.id)
-
- # Check if any participant denied consent after transcript processing is complete
- should_delete = await meeting_consent_controller.has_any_denial(meeting.id)
- if should_delete:
- logger.info(f"Deleting audio files for {object_key} due to consent denial")
- await delete_audio_files(transcript, bucket_name, object_key)
-
-
-async def delete_audio_files(transcript, bucket_name: str, object_key: str):
- """Delete ONLY audio files from all locations, keep transcript data"""
-
- try:
- # 1. Delete original Whereby recording from S3
- s3_whereby = boto3.client(
- "s3",
- aws_access_key_id=settings.AWS_WHEREBY_ACCESS_KEY_ID,
- aws_secret_access_key=settings.AWS_WHEREBY_ACCESS_KEY_SECRET,
- )
- s3_whereby.delete_object(Bucket=bucket_name, Key=object_key)
- logger.info(f"Deleted original Whereby recording: {bucket_name}/{object_key}")
-
- # 2. Delete processed audio from transcript storage S3 bucket
- if transcript.audio_location == "storage":
- storage = get_transcripts_storage()
- await storage.delete_file(transcript.storage_audio_path)
- logger.info(f"Deleted processed audio from storage: {transcript.storage_audio_path}")
-
- # 3. Delete local audio files (if any remain)
- if hasattr(transcript, 'audio_mp3_filename') and transcript.audio_mp3_filename:
- transcript.audio_mp3_filename.unlink(missing_ok=True)
- if hasattr(transcript, 'audio_wav_filename') and transcript.audio_wav_filename:
- transcript.audio_wav_filename.unlink(missing_ok=True)
-
- upload_path = transcript.data_path / f"upload{os.path.splitext(object_key)[1]}"
- upload_path.unlink(missing_ok=True)
-
- # 4. Update transcript to reflect audio deletion (keep all other data)
- await transcripts_controller.update(transcript, {
- 'audio_location_deleted': True
- })
-
- logger.info(f"Deleted all audio files for transcript {transcript.id}, kept transcript data")
-
- except Exception as e:
- logger.error(f"Failed to delete audio files for {object_key}: {str(e)}")
-
@shared_task
@asynctask
diff --git a/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx b/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx
index bdd9e5ea..830a7c69 100644
--- a/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx
+++ b/www/app/(app)/transcripts/[transcriptId]/correct/topicPlayer.tsx
@@ -183,7 +183,18 @@ const TopicPlayer = ({
setIsPlaying(false);
};
- const isLoaded = !!(mp3.media && topicTime);
+ const isLoaded = !!(mp3.loading && topicTime);
+ const error = mp3.error;
+ if (error !== null) {
+ return
+ Loading error: {error}
+
+ }
+ if (mp3.audioDeleted) {
+ return
+ This topic file has been deleted.
+
+ }
return (
+ );
+ }
+
if (transcript?.loading || topics?.loading) {
return ;
}
+ if (mp3.error) {
+ return (
+
+ );
+ }
+
+
+
return (
<>
{
const webSockets = useWebSockets(details.params.transcriptId);
- let mp3 = useMp3(details.params.transcriptId, true);
+ const mp3 = useMp3(details.params.transcriptId, true);
const router = useRouter();
diff --git a/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx b/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx
index c6e9eb69..bc9ff77a 100644
--- a/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx
+++ b/www/app/(app)/transcripts/[transcriptId]/upload/page.tsx
@@ -21,7 +21,7 @@ const TranscriptUpload = (details: TranscriptUpload) => {
const webSockets = useWebSockets(details.params.transcriptId);
- let mp3 = useMp3(details.params.transcriptId, true);
+ const mp3 = useMp3(details.params.transcriptId, true);
const router = useRouter();
diff --git a/www/app/(app)/transcripts/useMp3.ts b/www/app/(app)/transcripts/useMp3.ts
index 178c5089..2a6a8faa 100644
--- a/www/app/(app)/transcripts/useMp3.ts
+++ b/www/app/(app)/transcripts/useMp3.ts
@@ -5,13 +5,19 @@ import getApi from "../../lib/useApi";
export type Mp3Response = {
media: HTMLMediaElement | null;
loading: boolean;
+ error: string | null;
getNow: () => void;
+ audioDeleted: boolean | null;
};
-const useMp3 = (id: string, waiting?: boolean): Mp3Response => {
+const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => {
const [media, setMedia] = useState(null);
const [later, setLater] = useState(waiting);
- const [loading, setLoading] = useState(false);
+ const [audioLoading, setAudioLoading] = useState(true);
+ const [audioLoadingError, setAudioLoadingError] = useState(null);
+ const [transcriptMetadataLoading, setTranscriptMetadataLoading] = useState(true);
+ const [transcriptMetadataLoadingError, setTranscriptMetadataLoadingError] = useState(null);
+ const [audioDeleted, setAudioDeleted] = useState(null);
const api = getApi();
const { api_url } = useContext(DomainContext);
const accessTokenInfo = api?.httpRequest?.config?.TOKEN;
@@ -42,23 +48,69 @@ const useMp3 = (id: string, waiting?: boolean): Mp3Response => {
}, [navigator.serviceWorker, !serviceWorker, accessTokenInfo]);
useEffect(() => {
- if (!id || !api || later) return;
+ if (!transcriptId || !api || later) return;
- // createa a audio element and set the source
- setLoading(true);
+
+ setTranscriptMetadataLoading(true);
+
const audioElement = document.createElement("audio");
- audioElement.src = `${api_url}/v1/transcripts/${id}/audio/mp3`;
+ audioElement.src = `${api_url}/v1/transcripts/${transcriptId}/audio/mp3`;
audioElement.crossOrigin = "anonymous";
audioElement.preload = "auto";
+
+ const handleCanPlay = () => {
+ setAudioLoading(false);
+ setAudioLoadingError(null);
+ };
+
+ const handleError = () => {
+ setAudioLoading(false);
+ setAudioLoadingError("Failed to load audio");
+ };
+
+ audioElement.addEventListener('canplay', handleCanPlay);
+ audioElement.addEventListener('error', handleError);
+
setMedia(audioElement);
- setLoading(false);
- }, [id, !api, later]);
+
+
+ setAudioLoading(true);
+
+ let stopped = false;
+ // Fetch transcript info in parallel
+ api.v1TranscriptGet({ transcriptId })
+ .then((transcript) => {
+ if (stopped) return;
+ setAudioDeleted(transcript.audio_deleted || false);
+ setTranscriptMetadataLoadingError(null);
+ })
+ .catch((error) => {
+ if (stopped) return;
+ console.error("Failed to fetch transcript:", error);
+ setAudioDeleted(null);
+ setTranscriptMetadataLoadingError(error.message);
+ })
+ .finally(() => {
+ if (stopped) return;
+ setTranscriptMetadataLoading(false);
+ })
+
+ // Cleanup
+ return () => {
+ stopped = true;
+ audioElement.removeEventListener('canplay', handleCanPlay);
+ audioElement.removeEventListener('error', handleError);
+ };
+ }, [transcriptId, !api, later, api_url]);
const getNow = () => {
setLater(false);
};
- return { media, loading, getNow };
+ const loading = audioLoading || transcriptMetadataLoading;
+ const error = audioLoadingError || transcriptMetadataLoadingError;
+
+ return { media, loading, error, getNow, audioDeleted };
};
export default useMp3;
diff --git a/www/app/[roomName]/page.tsx b/www/app/[roomName]/page.tsx
index 4ca65107..30b4138c 100644
--- a/www/app/[roomName]/page.tsx
+++ b/www/app/[roomName]/page.tsx
@@ -2,12 +2,11 @@
import "@whereby.com/browser-sdk/embed";
import { useCallback, useEffect, useRef, useState, useContext } from "react";
-import { Box, Button, Text, VStack, HStack, Spinner } from "@chakra-ui/react";
+import { Box, Button, Text, VStack, HStack, Spinner, useToast } from "@chakra-ui/react";
import useRoomMeeting from "./useRoomMeeting";
import { useRouter } from "next/navigation";
import { notFound } from "next/navigation";
import useSessionStatus from "../lib/useSessionStatus";
-import AudioConsentDialog from "../(app)/rooms/audioConsentDialog";
import { DomainContext } from "../domainContext";
import { useRecordingConsent } from "../recordingConsentContext";
import useSessionAccessToken from "../lib/useSessionAccessToken";
@@ -26,13 +25,13 @@ export default function Room(details: RoomDetails) {
const meeting = useRoomMeeting(roomName);
const router = useRouter();
const { isLoading, isAuthenticated } = useSessionStatus();
- const [showConsentDialog, setShowConsentDialog] = useState(false);
const [consentLoading, setConsentLoading] = useState(false);
const { state: consentState, touch, hasConsent } = useRecordingConsent();
const { api_url } = useContext(DomainContext);
const { accessToken } = useSessionAccessToken();
const { id: userId } = useSessionUser();
const api = useApi();
+ const toast = useToast();
const roomUrl = meeting?.response?.host_room_url
@@ -45,10 +44,10 @@ export default function Room(details: RoomDetails) {
router.push("/browse");
}, [router]);
- const handleConsent = useCallback(async (meetingId: string, given: boolean) => {
+ const handleConsent = useCallback(async (meetingId: string, given: boolean, onClose?: () => void) => {
if (!api) return;
- setShowConsentDialog(false);
+ if (onClose) onClose();
setConsentLoading(true);
try {
@@ -77,18 +76,49 @@ export default function Room(details: RoomDetails) {
}
}, [isLoading, meeting?.error]);
- // Show consent dialog when meeting is loaded and consent hasn't been answered yet
+ // Show consent toast when meeting is loaded and consent hasn't been answered yet
useEffect(() => {
if (
consentState.ready &&
meetingId &&
!hasConsent(meetingId) &&
- !showConsentDialog &&
!consentLoading
) {
- setShowConsentDialog(true);
+ const toastId = toast({
+ position: "top",
+ duration: null,
+ render: ({ onClose }) => (
+
+
+
+ Can we have your permission to store this meeting's audio recording on our servers?
+
+
+
+
+
+
+
+ ),
+ });
+
+ return () => {
+ toast.close(toastId);
+ };
}
- }, [consentState.ready, meetingId, hasConsent, showConsentDialog, consentLoading]);
+ }, [consentState.ready, meetingId, hasConsent, consentLoading, toast, handleConsent]);
useEffect(() => {
if (isLoading || !isAuthenticated || !roomUrl) return;
@@ -131,13 +161,6 @@ export default function Room(details: RoomDetails) {
style={{ width: "100vw", height: "100vh" }}
/>
)}
- {meetingId && consentState.ready && !hasConsent(meetingId) && !consentLoading && (
- {}} // No-op: ESC should not close without consent
- onConsent={b => handleConsent(meetingId, b)}
- />
- )}
>
);
}
diff --git a/www/app/api/schemas.gen.ts b/www/app/api/schemas.gen.ts
index 359b6922..fb0e65a3 100644
--- a/www/app/api/schemas.gen.ts
+++ b/www/app/api/schemas.gen.ts
@@ -293,6 +293,17 @@ export const $GetTranscript = {
],
title: "Room Name",
},
+ audio_deleted: {
+ anyOf: [
+ {
+ type: "boolean",
+ },
+ {
+ type: "null",
+ },
+ ],
+ title: "Audio Deleted",
+ },
},
type: "object",
required: [
@@ -1109,6 +1120,17 @@ export const $UpdateTranscript = {
],
title: "Reviewed",
},
+ audio_deleted: {
+ anyOf: [
+ {
+ type: "boolean",
+ },
+ {
+ type: "null",
+ },
+ ],
+ title: "Audio Deleted",
+ },
},
type: "object",
title: "UpdateTranscript",
diff --git a/www/app/api/types.gen.ts b/www/app/api/types.gen.ts
index ef9ec43d..c47eef74 100644
--- a/www/app/api/types.gen.ts
+++ b/www/app/api/types.gen.ts
@@ -56,6 +56,7 @@ export type GetTranscript = {
source_kind: SourceKind;
room_id?: string | null;
room_name?: string | null;
+ audio_deleted?: boolean | null;
};
export type GetTranscriptSegmentTopic = {
@@ -219,6 +220,7 @@ export type UpdateTranscript = {
share_mode?: "public" | "semi-private" | "private" | null;
participants?: Array | null;
reviewed?: boolean | null;
+ audio_deleted?: boolean | null;
};
export type UserInfo = {