reflector/server/reflector/views/transcripts_audio.py

"""
Transcripts audio related endpoints
===================================

"""

from typing import Annotated, Optional

import httpx
import jwt
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status

import reflector.auth as auth
from reflector.db.transcripts import AudioWaveform, transcripts_controller
from reflector.settings import settings

from ._range_requests_response import range_requests_response

router = APIRouter()


@router.get(
    "/transcripts/{transcript_id}/audio/mp3",
    operation_id="transcript_get_audio_mp3",
)
@router.head(
    "/transcripts/{transcript_id}/audio/mp3",
    operation_id="transcript_head_audio_mp3",
)
async def transcript_get_audio_mp3(
    request: Request,
    transcript_id: str,
    user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
    token: str | None = None,
):
    user_id = user["sub"] if user else None
    if not user_id and token:
        try:
            token_user = await auth.verify_raw_token(token)
        except Exception:
            token_user = None
        # Fallback: try as internal HS256 token (created by _generate_local_audio_link)
        if not token_user:
            try:
                payload = jwt.decode(token, settings.SECRET_KEY, algorithms=["HS256"])
                user_id = payload.get("sub")
            except jwt.PyJWTError:
                raise HTTPException(
                    status_code=status.HTTP_401_UNAUTHORIZED,
                    detail="Invalid or expired token",
                    headers={"WWW-Authenticate": "Bearer"},
                )
        else:
            user_id = token_user["sub"]

    if not user_id and not token:
        # No authentication provided at all. Only anonymous transcripts
        # (user_id=None) are accessible without auth, to preserve
        # pipeline access via _generate_local_audio_link().
        transcript = await transcripts_controller.get_by_id(transcript_id)
        if not transcript or transcript.deleted_at is not None:
            raise HTTPException(status_code=404, detail="Transcript not found")
        if transcript.user_id is not None:
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail="Authentication required",
            )
    else:
        transcript = await transcripts_controller.get_by_id_for_http(
            transcript_id, user_id=user_id
        )

    if transcript.audio_location == "storage":
        # proxy S3 file, to prevent issue with CORS
        url = await transcript.get_audio_url()
        headers = {}

        copy_headers = ["range", "accept-encoding"]
        for header in copy_headers:
            if header in request.headers:
                headers[header] = request.headers[header]

        async with httpx.AsyncClient() as client:
            resp = await client.request(request.method, url, headers=headers)
            return Response(
                content=resp.content,
                status_code=resp.status_code,
                headers=resp.headers,
            )

    if transcript.audio_deleted:
        raise HTTPException(
            status_code=404, detail="Audio unavailable due to privacy settings"
        )

    if (
        not hasattr(transcript, "audio_mp3_filename")
        or not transcript.audio_mp3_filename
        or not transcript.audio_mp3_filename.exists()
    ):
        raise HTTPException(status_code=404, detail="Audio file not found")

    truncated_id = str(transcript.id).split("-")[0]
    filename = f"recording_{truncated_id}.mp3"

    return range_requests_response(
        request,
        transcript.audio_mp3_filename,
        content_type="audio/mpeg",
        content_disposition=f"inline; filename={filename}",
    )


@router.get("/transcripts/{transcript_id}/audio/waveform")
async def transcript_get_audio_waveform(
    transcript_id: str,
    user: Annotated[auth.UserInfo, Depends(auth.current_user)],
) -> AudioWaveform:
    user_id = user["sub"]
    transcript = await transcripts_controller.get_by_id_for_http(
        transcript_id, user_id=user_id
    )

    if not transcript.audio_waveform_filename.exists():
        raise HTTPException(status_code=404, detail="Audio not found")

    return transcript.audio_waveform