mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-21 12:45:19 +00:00
feat: download files, show cloud video, solf deletion with no reprocessing (#920)
* fix: move upd ports out of MacOS internal Range * feat: download files, show cloud video, solf deletion with no reprocessing
This commit is contained in:
committed by
GitHub
parent
cb1beae90d
commit
a76f114378
@@ -19,12 +19,14 @@ from reflector.views.rooms import router as rooms_router
|
||||
from reflector.views.rtc_offer import router as rtc_offer_router
|
||||
from reflector.views.transcripts import router as transcripts_router
|
||||
from reflector.views.transcripts_audio import router as transcripts_audio_router
|
||||
from reflector.views.transcripts_download import router as transcripts_download_router
|
||||
from reflector.views.transcripts_participants import (
|
||||
router as transcripts_participants_router,
|
||||
)
|
||||
from reflector.views.transcripts_process import router as transcripts_process_router
|
||||
from reflector.views.transcripts_speaker import router as transcripts_speaker_router
|
||||
from reflector.views.transcripts_upload import router as transcripts_upload_router
|
||||
from reflector.views.transcripts_video import router as transcripts_video_router
|
||||
from reflector.views.transcripts_webrtc import router as transcripts_webrtc_router
|
||||
from reflector.views.transcripts_websocket import router as transcripts_websocket_router
|
||||
from reflector.views.user import router as user_router
|
||||
@@ -97,6 +99,8 @@ app.include_router(transcripts_audio_router, prefix="/v1")
|
||||
app.include_router(transcripts_participants_router, prefix="/v1")
|
||||
app.include_router(transcripts_speaker_router, prefix="/v1")
|
||||
app.include_router(transcripts_upload_router, prefix="/v1")
|
||||
app.include_router(transcripts_download_router, prefix="/v1")
|
||||
app.include_router(transcripts_video_router, prefix="/v1")
|
||||
app.include_router(transcripts_websocket_router, prefix="/v1")
|
||||
app.include_router(transcripts_webrtc_router, prefix="/v1")
|
||||
app.include_router(transcripts_process_router, prefix="/v1")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from typing import Literal
|
||||
|
||||
import sqlalchemy as sa
|
||||
@@ -24,6 +24,7 @@ recordings = sa.Table(
|
||||
),
|
||||
sa.Column("meeting_id", sa.String),
|
||||
sa.Column("track_keys", sa.JSON, nullable=True),
|
||||
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Index("idx_recording_meeting_id", "meeting_id"),
|
||||
)
|
||||
|
||||
@@ -40,6 +41,7 @@ class Recording(BaseModel):
|
||||
# track_keys can be empty list [] if recording finished but no audio was captured (silence/muted)
|
||||
# None means not a multitrack recording, [] means multitrack with no tracks
|
||||
track_keys: list[str] | None = None
|
||||
deleted_at: datetime | None = None
|
||||
|
||||
@property
|
||||
def is_multitrack(self) -> bool:
|
||||
@@ -69,7 +71,11 @@ class RecordingController:
|
||||
return Recording(**result) if result else None
|
||||
|
||||
async def remove_by_id(self, id: str) -> None:
|
||||
query = recordings.delete().where(recordings.c.id == id)
|
||||
query = (
|
||||
recordings.update()
|
||||
.where(recordings.c.id == id)
|
||||
.values(deleted_at=datetime.now(timezone.utc))
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
async def set_meeting_id(
|
||||
@@ -114,6 +120,7 @@ class RecordingController:
|
||||
.where(
|
||||
recordings.c.bucket_name == bucket_name,
|
||||
recordings.c.track_keys.isnot(None),
|
||||
recordings.c.deleted_at.is_(None),
|
||||
or_(
|
||||
transcripts.c.id.is_(None),
|
||||
transcripts.c.status == "error",
|
||||
|
||||
@@ -387,6 +387,8 @@ class SearchController:
|
||||
transcripts.join(rooms, transcripts.c.room_id == rooms.c.id, isouter=True)
|
||||
)
|
||||
|
||||
base_query = base_query.where(transcripts.c.deleted_at.is_(None))
|
||||
|
||||
if params.query_text is not None:
|
||||
# because already initialized based on params.query_text presence above
|
||||
assert search_query is not None
|
||||
|
||||
@@ -91,6 +91,7 @@ transcripts = sqlalchemy.Table(
|
||||
sqlalchemy.Column("webvtt", sqlalchemy.Text),
|
||||
# Hatchet workflow run ID for resumption of failed workflows
|
||||
sqlalchemy.Column("workflow_run_id", sqlalchemy.String),
|
||||
sqlalchemy.Column("deleted_at", sqlalchemy.DateTime(timezone=True), nullable=True),
|
||||
sqlalchemy.Column(
|
||||
"change_seq",
|
||||
sqlalchemy.BigInteger,
|
||||
@@ -238,6 +239,7 @@ class Transcript(BaseModel):
|
||||
webvtt: str | None = None
|
||||
workflow_run_id: str | None = None # Hatchet workflow run ID for resumption
|
||||
change_seq: int | None = None
|
||||
deleted_at: datetime | None = None
|
||||
|
||||
@field_serializer("created_at", when_used="json")
|
||||
def serialize_datetime(self, dt: datetime) -> str:
|
||||
@@ -418,6 +420,8 @@ class TranscriptController:
|
||||
rooms, transcripts.c.room_id == rooms.c.id, isouter=True
|
||||
)
|
||||
|
||||
query = query.where(transcripts.c.deleted_at.is_(None))
|
||||
|
||||
if user_id:
|
||||
query = query.where(
|
||||
or_(transcripts.c.user_id == user_id, rooms.c.is_shared)
|
||||
@@ -500,7 +504,10 @@ class TranscriptController:
|
||||
"""
|
||||
Get transcripts by room_id (direct access without joins)
|
||||
"""
|
||||
query = transcripts.select().where(transcripts.c.room_id == room_id)
|
||||
query = transcripts.select().where(
|
||||
transcripts.c.room_id == room_id,
|
||||
transcripts.c.deleted_at.is_(None),
|
||||
)
|
||||
if "user_id" in kwargs:
|
||||
query = query.where(transcripts.c.user_id == kwargs["user_id"])
|
||||
if "order_by" in kwargs:
|
||||
@@ -531,8 +538,11 @@ class TranscriptController:
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
|
||||
# if the transcript is anonymous, share mode is not checked
|
||||
transcript = Transcript(**result)
|
||||
if transcript.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
|
||||
# if the transcript is anonymous, share mode is not checked
|
||||
if transcript.user_id is None:
|
||||
return transcript
|
||||
|
||||
@@ -632,56 +642,49 @@ class TranscriptController:
|
||||
user_id: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Remove a transcript by id
|
||||
Soft-delete a transcript by id.
|
||||
|
||||
Sets deleted_at on the transcript and its associated recording.
|
||||
All files (S3 and local) are preserved for later retrieval.
|
||||
"""
|
||||
transcript = await self.get_by_id(transcript_id)
|
||||
if not transcript:
|
||||
return
|
||||
if user_id is not None and transcript.user_id != user_id:
|
||||
return
|
||||
if transcript.audio_location == "storage" and not transcript.audio_deleted:
|
||||
try:
|
||||
await get_transcripts_storage().delete_file(
|
||||
transcript.storage_audio_path
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete transcript audio from storage",
|
||||
exc_info=e,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
transcript.unlink()
|
||||
if transcript.deleted_at is not None:
|
||||
return
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Soft-delete the associated recording (keeps S3 files intact)
|
||||
if transcript.recording_id:
|
||||
try:
|
||||
recording = await recordings_controller.get_by_id(
|
||||
transcript.recording_id
|
||||
)
|
||||
if recording:
|
||||
try:
|
||||
await get_transcripts_storage().delete_file(
|
||||
recording.object_key, bucket=recording.bucket_name
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete recording object from S3",
|
||||
exc_info=e,
|
||||
recording_id=transcript.recording_id,
|
||||
)
|
||||
await recordings_controller.remove_by_id(transcript.recording_id)
|
||||
await recordings_controller.remove_by_id(transcript.recording_id)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete recording row",
|
||||
"Failed to soft-delete recording",
|
||||
exc_info=e,
|
||||
recording_id=transcript.recording_id,
|
||||
)
|
||||
query = transcripts.delete().where(transcripts.c.id == transcript_id)
|
||||
|
||||
# Soft-delete the transcript (keeps all files intact)
|
||||
query = (
|
||||
transcripts.update()
|
||||
.where(transcripts.c.id == transcript_id)
|
||||
.values(deleted_at=now)
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
async def remove_by_recording_id(self, recording_id: str):
|
||||
"""
|
||||
Remove a transcript by recording_id
|
||||
Soft-delete a transcript by recording_id
|
||||
"""
|
||||
query = transcripts.delete().where(transcripts.c.recording_id == recording_id)
|
||||
query = (
|
||||
transcripts.update()
|
||||
.where(transcripts.c.recording_id == recording_id)
|
||||
.values(deleted_at=datetime.now(timezone.utc))
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
@staticmethod
|
||||
|
||||
257
server/reflector/tools/deleted_transcripts.py
Normal file
257
server/reflector/tools/deleted_transcripts.py
Normal file
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
CLI tool for managing soft-deleted transcripts.
|
||||
|
||||
Usage:
|
||||
uv run python -m reflector.tools.deleted_transcripts list
|
||||
uv run python -m reflector.tools.deleted_transcripts files <transcript_id>
|
||||
uv run python -m reflector.tools.deleted_transcripts download <transcript_id> [--output-dir ./]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import structlog
|
||||
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.transcripts import Transcript, transcripts
|
||||
from reflector.storage import get_source_storage, get_transcripts_storage
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
async def list_deleted():
|
||||
"""List all soft-deleted transcripts."""
|
||||
database = get_database()
|
||||
await database.connect()
|
||||
try:
|
||||
query = (
|
||||
transcripts.select()
|
||||
.where(transcripts.c.deleted_at.isnot(None))
|
||||
.order_by(transcripts.c.deleted_at.desc())
|
||||
)
|
||||
results = await database.fetch_all(query)
|
||||
|
||||
if not results:
|
||||
print("No deleted transcripts found.")
|
||||
return
|
||||
|
||||
print(
|
||||
f"{'ID':<40} {'Title':<40} {'Deleted At':<28} {'Recording ID':<40} {'Meeting ID'}"
|
||||
)
|
||||
print("-" * 180)
|
||||
for row in results:
|
||||
t = Transcript(**row)
|
||||
title = (t.title or "")[:38]
|
||||
deleted = t.deleted_at.isoformat() if t.deleted_at else ""
|
||||
print(
|
||||
f"{t.id:<40} {title:<40} {deleted:<28} {t.recording_id or '':<40} {t.meeting_id or ''}"
|
||||
)
|
||||
|
||||
print(f"\nTotal: {len(results)} deleted transcript(s)")
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
async def list_files(transcript_id: str):
|
||||
"""List all S3 keys associated with a deleted transcript."""
|
||||
database = get_database()
|
||||
await database.connect()
|
||||
try:
|
||||
query = transcripts.select().where(transcripts.c.id == transcript_id)
|
||||
result = await database.fetch_one(query)
|
||||
if not result:
|
||||
print(f"Transcript {transcript_id} not found.")
|
||||
return
|
||||
|
||||
t = Transcript(**result)
|
||||
if t.deleted_at is None:
|
||||
print(f"Transcript {transcript_id} is not deleted.")
|
||||
return
|
||||
|
||||
print(f"Transcript: {t.id}")
|
||||
print(f"Title: {t.title}")
|
||||
print(f"Deleted at: {t.deleted_at}")
|
||||
print()
|
||||
|
||||
files = []
|
||||
|
||||
# Transcript audio
|
||||
if t.audio_location == "storage" and not t.audio_deleted:
|
||||
files.append(("Transcript audio", t.storage_audio_path, None))
|
||||
|
||||
# Recording files
|
||||
if t.recording_id:
|
||||
recording = await recordings_controller.get_by_id(t.recording_id)
|
||||
if recording:
|
||||
if recording.object_key:
|
||||
files.append(
|
||||
(
|
||||
"Recording object_key",
|
||||
recording.object_key,
|
||||
recording.bucket_name,
|
||||
)
|
||||
)
|
||||
if recording.track_keys:
|
||||
for i, key in enumerate(recording.track_keys):
|
||||
files.append((f"Track {i}", key, recording.bucket_name))
|
||||
|
||||
# Cloud video
|
||||
if t.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(t.meeting_id)
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
files.append(("Cloud video", meeting.daily_composed_video_s3_key, None))
|
||||
|
||||
if not files:
|
||||
print("No associated files found.")
|
||||
return
|
||||
|
||||
print(f"{'Type':<25} {'Bucket':<30} {'S3 Key'}")
|
||||
print("-" * 120)
|
||||
for label, key, bucket in files:
|
||||
print(f"{label:<25} {bucket or '(default)':<30} {key}")
|
||||
|
||||
# Generate presigned URLs
|
||||
print("\nPresigned URLs (valid for 1 hour):")
|
||||
print("-" * 120)
|
||||
storage = get_transcripts_storage()
|
||||
for label, key, bucket in files:
|
||||
try:
|
||||
url = await storage.get_file_url(key, bucket=bucket, expires_in=3600)
|
||||
print(f"{label}: {url}")
|
||||
except Exception as e:
|
||||
print(f"{label}: ERROR - {e}")
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
async def download_files(transcript_id: str, output_dir: str):
|
||||
"""Download all files associated with a deleted transcript."""
|
||||
database = get_database()
|
||||
await database.connect()
|
||||
try:
|
||||
query = transcripts.select().where(transcripts.c.id == transcript_id)
|
||||
result = await database.fetch_one(query)
|
||||
if not result:
|
||||
print(f"Transcript {transcript_id} not found.")
|
||||
return
|
||||
|
||||
t = Transcript(**result)
|
||||
if t.deleted_at is None:
|
||||
print(f"Transcript {transcript_id} is not deleted.")
|
||||
return
|
||||
|
||||
dest = os.path.join(output_dir, t.id)
|
||||
os.makedirs(dest, exist_ok=True)
|
||||
|
||||
storage = get_transcripts_storage()
|
||||
|
||||
# Download transcript audio
|
||||
if t.audio_location == "storage" and not t.audio_deleted:
|
||||
try:
|
||||
data = await storage.get_file(t.storage_audio_path)
|
||||
path = os.path.join(dest, "audio.mp3")
|
||||
with open(path, "wb") as f:
|
||||
f.write(data)
|
||||
print(f"Downloaded: {path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download audio: {e}")
|
||||
|
||||
# Download recording files
|
||||
if t.recording_id:
|
||||
recording = await recordings_controller.get_by_id(t.recording_id)
|
||||
if recording and recording.track_keys:
|
||||
tracks_dir = os.path.join(dest, "tracks")
|
||||
os.makedirs(tracks_dir, exist_ok=True)
|
||||
for i, key in enumerate(recording.track_keys):
|
||||
try:
|
||||
data = await storage.get_file(key, bucket=recording.bucket_name)
|
||||
filename = os.path.basename(key) or f"track_{i}"
|
||||
path = os.path.join(tracks_dir, filename)
|
||||
with open(path, "wb") as f:
|
||||
f.write(data)
|
||||
print(f"Downloaded: {path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download track {i}: {e}")
|
||||
|
||||
# Download cloud video
|
||||
if t.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(t.meeting_id)
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
try:
|
||||
source_storage = get_source_storage("daily")
|
||||
data = await source_storage.get_file(
|
||||
meeting.daily_composed_video_s3_key
|
||||
)
|
||||
path = os.path.join(dest, "cloud_video.mp4")
|
||||
with open(path, "wb") as f:
|
||||
f.write(data)
|
||||
print(f"Downloaded: {path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download cloud video: {e}")
|
||||
|
||||
# Write metadata
|
||||
metadata = {
|
||||
"id": t.id,
|
||||
"title": t.title,
|
||||
"created_at": t.created_at.isoformat() if t.created_at else None,
|
||||
"deleted_at": t.deleted_at.isoformat() if t.deleted_at else None,
|
||||
"duration": t.duration,
|
||||
"source_language": t.source_language,
|
||||
"target_language": t.target_language,
|
||||
"short_summary": t.short_summary,
|
||||
"long_summary": t.long_summary,
|
||||
"topics": [topic.model_dump() for topic in t.topics] if t.topics else [],
|
||||
"participants": [p.model_dump() for p in t.participants]
|
||||
if t.participants
|
||||
else [],
|
||||
"action_items": t.action_items,
|
||||
"webvtt": t.webvtt,
|
||||
"recording_id": t.recording_id,
|
||||
"meeting_id": t.meeting_id,
|
||||
}
|
||||
path = os.path.join(dest, "metadata.json")
|
||||
with open(path, "w") as f:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
print(f"Downloaded: {path}")
|
||||
|
||||
print(f"\nAll files saved to: {dest}")
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Manage soft-deleted transcripts")
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
subparsers.add_parser("list", help="List all deleted transcripts")
|
||||
|
||||
files_parser = subparsers.add_parser(
|
||||
"files", help="List S3 keys for a deleted transcript"
|
||||
)
|
||||
files_parser.add_argument("transcript_id", help="Transcript ID")
|
||||
|
||||
download_parser = subparsers.add_parser(
|
||||
"download", help="Download files for a deleted transcript"
|
||||
)
|
||||
download_parser.add_argument("transcript_id", help="Transcript ID")
|
||||
download_parser.add_argument(
|
||||
"--output-dir", default=".", help="Output directory (default: .)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "list":
|
||||
asyncio.run(list_deleted())
|
||||
elif args.command == "files":
|
||||
asyncio.run(list_files(args.transcript_id))
|
||||
elif args.command == "download":
|
||||
asyncio.run(download_files(args.transcript_id, args.output_dir))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -16,6 +16,7 @@ from pydantic import (
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.rooms import rooms_controller
|
||||
from reflector.db.search import (
|
||||
@@ -112,6 +113,8 @@ class GetTranscriptMinimal(BaseModel):
|
||||
room_name: str | None = None
|
||||
audio_deleted: bool | None = None
|
||||
change_seq: int | None = None
|
||||
has_cloud_video: bool = False
|
||||
cloud_video_duration: int | None = None
|
||||
|
||||
|
||||
class TranscriptParticipantWithEmail(TranscriptParticipant):
|
||||
@@ -501,6 +504,14 @@ async def transcript_get(
|
||||
)
|
||||
)
|
||||
|
||||
has_cloud_video = False
|
||||
cloud_video_duration = None
|
||||
if transcript.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
has_cloud_video = True
|
||||
cloud_video_duration = meeting.daily_composed_video_duration
|
||||
|
||||
base_data = {
|
||||
"id": transcript.id,
|
||||
"user_id": transcript.user_id,
|
||||
@@ -524,6 +535,8 @@ async def transcript_get(
|
||||
"audio_deleted": transcript.audio_deleted,
|
||||
"change_seq": transcript.change_seq,
|
||||
"participants": participants,
|
||||
"has_cloud_video": has_cloud_video,
|
||||
"cloud_video_duration": cloud_video_duration,
|
||||
}
|
||||
|
||||
if transcript_format == "text":
|
||||
|
||||
169
server/reflector/views/transcripts_download.py
Normal file
169
server/reflector/views/transcripts_download.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Transcript download endpoint — generates a zip archive with all transcript files.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.logger import logger
|
||||
from reflector.storage import get_source_storage, get_transcripts_storage
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/transcripts/{transcript_id}/download/zip",
|
||||
operation_id="transcript_download_zip",
|
||||
)
|
||||
async def transcript_download_zip(
|
||||
transcript_id: str,
|
||||
user: Annotated[auth.UserInfo, Depends(auth.current_user)],
|
||||
):
|
||||
user_id = user["sub"]
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if not transcripts_controller.user_can_mutate(transcript, user_id):
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
|
||||
recording = None
|
||||
if transcript.recording_id:
|
||||
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||
|
||||
meeting = None
|
||||
if transcript.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
|
||||
truncated_id = str(transcript.id).split("-")[0]
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
zip_path = os.path.join(tmpdir, f"transcript_{truncated_id}.zip")
|
||||
|
||||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
# Transcript audio
|
||||
if transcript.audio_location == "storage" and not transcript.audio_deleted:
|
||||
try:
|
||||
storage = get_transcripts_storage()
|
||||
data = await storage.get_file(transcript.storage_audio_path)
|
||||
audio_path = os.path.join(tmpdir, "audio.mp3")
|
||||
with open(audio_path, "wb") as f:
|
||||
f.write(data)
|
||||
zf.write(audio_path, "audio.mp3")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to download transcript audio for zip",
|
||||
exc_info=e,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
elif (
|
||||
not transcript.audio_deleted
|
||||
and hasattr(transcript, "audio_mp3_filename")
|
||||
and transcript.audio_mp3_filename
|
||||
and transcript.audio_mp3_filename.exists()
|
||||
):
|
||||
zf.write(str(transcript.audio_mp3_filename), "audio.mp3")
|
||||
|
||||
# Recording tracks (multitrack)
|
||||
if recording and recording.track_keys:
|
||||
try:
|
||||
source_storage = get_source_storage(
|
||||
"daily" if recording.track_keys else None
|
||||
)
|
||||
except Exception:
|
||||
source_storage = get_transcripts_storage()
|
||||
|
||||
for i, key in enumerate(recording.track_keys):
|
||||
try:
|
||||
data = await source_storage.get_file(
|
||||
key, bucket=recording.bucket_name
|
||||
)
|
||||
filename = os.path.basename(key) or f"track_{i}"
|
||||
track_path = os.path.join(tmpdir, f"track_{i}")
|
||||
with open(track_path, "wb") as f:
|
||||
f.write(data)
|
||||
zf.write(track_path, f"tracks/{filename}")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to download track for zip",
|
||||
exc_info=e,
|
||||
track_key=key,
|
||||
)
|
||||
|
||||
# Cloud video
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
try:
|
||||
source_storage = get_source_storage("daily")
|
||||
data = await source_storage.get_file(
|
||||
meeting.daily_composed_video_s3_key
|
||||
)
|
||||
video_path = os.path.join(tmpdir, "cloud_video.mp4")
|
||||
with open(video_path, "wb") as f:
|
||||
f.write(data)
|
||||
zf.write(video_path, "cloud_video.mp4")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to download cloud video for zip",
|
||||
exc_info=e,
|
||||
s3_key=meeting.daily_composed_video_s3_key,
|
||||
)
|
||||
|
||||
# Metadata JSON
|
||||
metadata = {
|
||||
"id": transcript.id,
|
||||
"title": transcript.title,
|
||||
"created_at": (
|
||||
transcript.created_at.isoformat() if transcript.created_at else None
|
||||
),
|
||||
"duration": transcript.duration,
|
||||
"source_language": transcript.source_language,
|
||||
"target_language": transcript.target_language,
|
||||
"short_summary": transcript.short_summary,
|
||||
"long_summary": transcript.long_summary,
|
||||
"topics": (
|
||||
[t.model_dump() for t in transcript.topics]
|
||||
if transcript.topics
|
||||
else []
|
||||
),
|
||||
"participants": (
|
||||
[p.model_dump() for p in transcript.participants]
|
||||
if transcript.participants
|
||||
else []
|
||||
),
|
||||
"action_items": transcript.action_items,
|
||||
"webvtt": transcript.webvtt,
|
||||
"recording_id": transcript.recording_id,
|
||||
"meeting_id": transcript.meeting_id,
|
||||
}
|
||||
meta_path = os.path.join(tmpdir, "metadata.json")
|
||||
with open(meta_path, "w") as f:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
zf.write(meta_path, "metadata.json")
|
||||
|
||||
# Read zip into memory before tmpdir is cleaned up
|
||||
with open(zip_path, "rb") as f:
|
||||
zip_bytes = f.read()
|
||||
|
||||
def iter_zip():
|
||||
offset = 0
|
||||
chunk_size = 64 * 1024
|
||||
while offset < len(zip_bytes):
|
||||
yield zip_bytes[offset : offset + chunk_size]
|
||||
offset += chunk_size
|
||||
|
||||
return StreamingResponse(
|
||||
iter_zip(),
|
||||
media_type="application/zip",
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename=transcript_{truncated_id}.zip"
|
||||
},
|
||||
)
|
||||
75
server/reflector/views/transcripts_video.py
Normal file
75
server/reflector/views/transcripts_video.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
Transcript cloud video endpoint — returns a presigned URL for streaming playback.
|
||||
"""
|
||||
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import jwt
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from pydantic import BaseModel
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.settings import settings
|
||||
from reflector.storage import get_source_storage
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class VideoUrlResponse(BaseModel):
|
||||
url: str
|
||||
duration: int | None = None
|
||||
content_type: str = "video/mp4"
|
||||
|
||||
|
||||
@router.get(
|
||||
"/transcripts/{transcript_id}/video/url",
|
||||
operation_id="transcript_get_video_url",
|
||||
response_model=VideoUrlResponse,
|
||||
)
|
||||
async def transcript_get_video_url(
|
||||
transcript_id: str,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
token: str | None = None,
|
||||
):
|
||||
user_id = user["sub"] if user else None
|
||||
if not user_id and token:
|
||||
try:
|
||||
token_user = await auth.verify_raw_token(token)
|
||||
except Exception:
|
||||
token_user = None
|
||||
if not token_user:
|
||||
try:
|
||||
payload = jwt.decode(token, settings.SECRET_KEY, algorithms=["HS256"])
|
||||
user_id = payload.get("sub")
|
||||
except jwt.PyJWTError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or expired token",
|
||||
)
|
||||
else:
|
||||
user_id = token_user["sub"]
|
||||
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
|
||||
if not transcript.meeting_id:
|
||||
raise HTTPException(status_code=404, detail="No video available")
|
||||
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
if not meeting or not meeting.daily_composed_video_s3_key:
|
||||
raise HTTPException(status_code=404, detail="No video available")
|
||||
|
||||
source_storage = get_source_storage("daily")
|
||||
url = await source_storage.get_file_url(
|
||||
meeting.daily_composed_video_s3_key,
|
||||
operation="get_object",
|
||||
expires_in=3600,
|
||||
)
|
||||
|
||||
return VideoUrlResponse(
|
||||
url=url,
|
||||
duration=meeting.daily_composed_video_duration,
|
||||
)
|
||||
@@ -90,7 +90,9 @@ async def cleanup_old_transcripts(
|
||||
):
|
||||
"""Delete old anonymous transcripts and their associated recordings/meetings."""
|
||||
query = transcripts.select().where(
|
||||
(transcripts.c.created_at < cutoff_date) & (transcripts.c.user_id.is_(None))
|
||||
(transcripts.c.created_at < cutoff_date)
|
||||
& (transcripts.c.user_id.is_(None))
|
||||
& (transcripts.c.deleted_at.is_(None))
|
||||
)
|
||||
old_transcripts = await db.fetch_all(query)
|
||||
|
||||
|
||||
@@ -104,6 +104,12 @@ async def process_recording(bucket_name: str, object_key: str):
|
||||
room = await rooms_controller.get_by_id(meeting.room_id)
|
||||
|
||||
recording = await recordings_controller.get_by_object_key(bucket_name, object_key)
|
||||
if recording and recording.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted recording",
|
||||
recording_id=recording.id,
|
||||
)
|
||||
return
|
||||
if not recording:
|
||||
recording = await recordings_controller.create(
|
||||
Recording(
|
||||
@@ -115,6 +121,13 @@ async def process_recording(bucket_name: str, object_key: str):
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
||||
if transcript and transcript.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted transcript for recording",
|
||||
recording_id=recording.id,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
return
|
||||
if transcript:
|
||||
await transcripts_controller.update(
|
||||
transcript,
|
||||
@@ -262,6 +275,13 @@ async def _process_multitrack_recording_inner(
|
||||
# Check if recording already exists (reprocessing path)
|
||||
recording = await recordings_controller.get_by_id(recording_id)
|
||||
|
||||
if recording and recording.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted recording",
|
||||
recording_id=recording_id,
|
||||
)
|
||||
return
|
||||
|
||||
if recording and recording.meeting_id:
|
||||
# Reprocessing: recording exists with meeting already linked
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
@@ -341,6 +361,13 @@ async def _process_multitrack_recording_inner(
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
||||
if transcript and transcript.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted transcript for recording",
|
||||
recording_id=recording.id,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
return
|
||||
if not transcript:
|
||||
transcript = await transcripts_controller.add(
|
||||
"",
|
||||
|
||||
Reference in New Issue
Block a user