mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-02-04 18:06:48 +00:00
Compare commits
16 Commits
brady-bunc
...
feature/sp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a57388723 | ||
| ddef1d4a4a | |||
| 88e0d11ccd | |||
|
|
9f6e7b515b | ||
|
|
d0110f4dd4 | ||
|
|
7dfb37154d | ||
|
|
67679e90b2 | ||
|
|
aa4c368479 | ||
|
|
deb5ed6010 | ||
|
|
30b28eed3b | ||
|
|
1b33fba3ba | ||
|
|
3ce279daa4 | ||
|
|
01650be787 | ||
| f00c16a41c | |||
| 859df5513e | |||
|
|
2af9918979 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,6 +1,5 @@
|
||||
.DS_Store
|
||||
server/.env
|
||||
server/.env.production
|
||||
.env
|
||||
Caddyfile
|
||||
server/exportdanswer
|
||||
|
||||
@@ -4,4 +4,4 @@ docs/docs/installation/daily-setup.md:curl-auth-header:277
|
||||
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:74
|
||||
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:83
|
||||
server/reflector/worker/process.py:generic-api-key:465
|
||||
server/tests/test_recording_request_flow.py:generic-api-key:121
|
||||
server/reflector/worker/process.py:generic-api-key:594
|
||||
|
||||
15
CHANGELOG.md
15
CHANGELOG.md
@@ -1,20 +1,5 @@
|
||||
# Changelog
|
||||
|
||||
## [0.32.1](https://github.com/Monadical-SAS/reflector/compare/v0.32.0...v0.32.1) (2026-01-30)
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* daily multitrack pipeline finalze dependency fix ([23eb137](https://github.com/Monadical-SAS/reflector/commit/23eb1371cb9348c4b81eb12ad506b582f8a4799e))
|
||||
* match httpx pad with hatchet audio timeout ([c05d1f0](https://github.com/Monadical-SAS/reflector/commit/c05d1f03cd8369fc06efd455527e50246887efd0))
|
||||
|
||||
## [0.32.0](https://github.com/Monadical-SAS/reflector/compare/v0.31.0...v0.32.0) (2026-01-30)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* modal padding ([#837](https://github.com/Monadical-SAS/reflector/issues/837)) ([7fde64e](https://github.com/Monadical-SAS/reflector/commit/7fde64e2529a1d37b0f7507c62d983a7bd0b5b89))
|
||||
|
||||
## [0.31.0](https://github.com/Monadical-SAS/reflector/compare/v0.30.0...v0.31.0) (2026-01-23)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# Reflector Caddyfile (optional reverse proxy)
|
||||
# Use this only when you run Caddy via: docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||
# If Coolify, Traefik, or nginx already use ports 80/443, do NOT start Caddy; point your proxy at web:3000 and server:1250.
|
||||
#
|
||||
# Replace example.com with your actual domains. CORS is handled by the backend - Caddy just proxies.
|
||||
# Reflector Caddyfile
|
||||
# Replace example.com with your actual domains
|
||||
# CORS is handled by the backend - Caddy just proxies
|
||||
#
|
||||
# For environment variable substitution, set:
|
||||
# FRONTEND_DOMAIN=app.example.com
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
# Production Docker Compose configuration
|
||||
# Usage: docker compose -f docker-compose.prod.yml up -d
|
||||
#
|
||||
# Caddy (reverse proxy on ports 80/443) is OPTIONAL and behind the "caddy" profile:
|
||||
# - With Caddy (self-hosted, you manage SSL): docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||
# - Without Caddy (Coolify/Traefik/nginx already on 80/443): docker compose -f docker-compose.prod.yml up -d
|
||||
# Then point your proxy at web:3000 (frontend) and server:1250 (API).
|
||||
#
|
||||
# Prerequisites:
|
||||
# 1. Copy .env.example to .env and configure for both server/ and www/
|
||||
# 2. If using Caddy: copy Caddyfile.example to Caddyfile and edit your domains
|
||||
# 2. Copy Caddyfile.example to Caddyfile and edit with your domains
|
||||
# 3. Deploy Modal GPU functions (see gpu/modal_deployments/deploy-all.sh)
|
||||
|
||||
services:
|
||||
@@ -89,8 +84,6 @@ services:
|
||||
retries: 3
|
||||
|
||||
caddy:
|
||||
profiles:
|
||||
- caddy
|
||||
image: caddy:2-alpine
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
|
||||
@@ -11,15 +11,15 @@ This page documents the Docker Compose configuration for Reflector. For the comp
|
||||
|
||||
The `docker-compose.prod.yml` includes these services:
|
||||
|
||||
| Service | Image | Purpose |
|
||||
| ---------- | --------------------------------- | --------------------------------------------------------------------------- |
|
||||
| `web` | `monadicalsas/reflector-frontend` | Next.js frontend |
|
||||
| `server` | `monadicalsas/reflector-backend` | FastAPI backend |
|
||||
| `worker` | `monadicalsas/reflector-backend` | Celery worker for background tasks |
|
||||
| `beat` | `monadicalsas/reflector-backend` | Celery beat scheduler |
|
||||
| `redis` | `redis:7.2-alpine` | Message broker and cache |
|
||||
| `postgres` | `postgres:17-alpine` | Primary database |
|
||||
| `caddy` | `caddy:2-alpine` | Reverse proxy with auto-SSL (optional; see [Caddy profile](#caddy-profile)) |
|
||||
| Service | Image | Purpose |
|
||||
|---------|-------|---------|
|
||||
| `web` | `monadicalsas/reflector-frontend` | Next.js frontend |
|
||||
| `server` | `monadicalsas/reflector-backend` | FastAPI backend |
|
||||
| `worker` | `monadicalsas/reflector-backend` | Celery worker for background tasks |
|
||||
| `beat` | `monadicalsas/reflector-backend` | Celery beat scheduler |
|
||||
| `redis` | `redis:7.2-alpine` | Message broker and cache |
|
||||
| `postgres` | `postgres:17-alpine` | Primary database |
|
||||
| `caddy` | `caddy:2-alpine` | Reverse proxy with auto-SSL |
|
||||
|
||||
## Environment Files
|
||||
|
||||
@@ -30,7 +30,6 @@ Reflector uses two separate environment files:
|
||||
Used by: `server`, `worker`, `beat`
|
||||
|
||||
Key variables:
|
||||
|
||||
```env
|
||||
# Database connection
|
||||
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
@@ -55,7 +54,6 @@ TRANSCRIPT_MODAL_API_KEY=...
|
||||
Used by: `web`
|
||||
|
||||
Key variables:
|
||||
|
||||
```env
|
||||
# Domain configuration
|
||||
SITE_URL=https://app.example.com
|
||||
@@ -72,42 +70,26 @@ Note: `API_URL` is used client-side (browser), `SERVER_API_URL` is used server-s
|
||||
|
||||
## Volumes
|
||||
|
||||
| Volume | Purpose |
|
||||
| --------------- | ----------------------------- |
|
||||
| `redis_data` | Redis persistence |
|
||||
| `postgres_data` | PostgreSQL data |
|
||||
| `server_data` | Uploaded files, local storage |
|
||||
| `caddy_data` | SSL certificates |
|
||||
| `caddy_config` | Caddy configuration |
|
||||
| Volume | Purpose |
|
||||
|--------|---------|
|
||||
| `redis_data` | Redis persistence |
|
||||
| `postgres_data` | PostgreSQL data |
|
||||
| `server_data` | Uploaded files, local storage |
|
||||
| `caddy_data` | SSL certificates |
|
||||
| `caddy_config` | Caddy configuration |
|
||||
|
||||
## Network
|
||||
|
||||
All services share the default network. The network is marked `attachable: true` to allow external containers (like Authentik) to join.
|
||||
|
||||
## Caddy profile
|
||||
|
||||
Caddy (ports 80 and 443) is **optional** and behind the `caddy` profile so it does not conflict with an existing reverse proxy (e.g. Coolify, Traefik, nginx).
|
||||
|
||||
- **With Caddy** (you want Reflector to handle SSL):
|
||||
`docker compose -f docker-compose.prod.yml --profile caddy up -d`
|
||||
- **Without Caddy** (Coolify or another proxy already on 80/443):
|
||||
`docker compose -f docker-compose.prod.yml up -d`
|
||||
Then configure your proxy to send traffic to `web:3000` (frontend) and `server:1250` (API).
|
||||
|
||||
## Common Commands
|
||||
|
||||
### Start all services
|
||||
|
||||
```bash
|
||||
# Without Caddy (e.g. when using Coolify)
|
||||
docker compose -f docker-compose.prod.yml up -d
|
||||
|
||||
# With Caddy as reverse proxy
|
||||
docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||
```
|
||||
|
||||
### View logs
|
||||
|
||||
```bash
|
||||
# All services
|
||||
docker compose -f docker-compose.prod.yml logs -f
|
||||
@@ -117,7 +99,6 @@ docker compose -f docker-compose.prod.yml logs server --tail 50
|
||||
```
|
||||
|
||||
### Restart a service
|
||||
|
||||
```bash
|
||||
# Quick restart (doesn't reload .env changes)
|
||||
docker compose -f docker-compose.prod.yml restart server
|
||||
@@ -127,32 +108,27 @@ docker compose -f docker-compose.prod.yml up -d server
|
||||
```
|
||||
|
||||
### Run database migrations
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
|
||||
```
|
||||
|
||||
### Access database
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml exec postgres psql -U reflector
|
||||
```
|
||||
|
||||
### Pull latest images
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml pull
|
||||
docker compose -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
### Stop all services
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml down
|
||||
```
|
||||
|
||||
### Full reset (WARNING: deletes data)
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml down -v
|
||||
```
|
||||
@@ -211,7 +187,6 @@ The Caddyfile supports environment variable substitution:
|
||||
Set `FRONTEND_DOMAIN` and `API_DOMAIN` environment variables, or edit the file directly.
|
||||
|
||||
### Reload Caddy after changes
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml exec caddy caddy reload --config /etc/caddy/Caddyfile
|
||||
```
|
||||
|
||||
@@ -26,7 +26,7 @@ flowchart LR
|
||||
|
||||
Before starting, you need:
|
||||
|
||||
- **Production server** - 4+ cores, 8GB+ RAM, public IP
|
||||
- **Production server** - 4+ cores, 8GB+ RAM, public IP
|
||||
- **Two domain names** - e.g., `app.example.com` (frontend) and `api.example.com` (backend)
|
||||
- **GPU processing** - Choose one:
|
||||
- Modal.com account, OR
|
||||
@@ -60,17 +60,16 @@ Type: A Name: api Value: <your-server-ip>
|
||||
|
||||
Reflector requires GPU processing for transcription and speaker diarization. Choose one option:
|
||||
|
||||
| | **Modal.com (Cloud)** | **Self-Hosted GPU** |
|
||||
| ------------ | --------------------------------- | ---------------------------- |
|
||||
| | **Modal.com (Cloud)** | **Self-Hosted GPU** |
|
||||
|---|---|---|
|
||||
| **Best for** | No GPU hardware, zero maintenance | Own GPU server, full control |
|
||||
| **Pricing** | Pay-per-use | Fixed infrastructure cost |
|
||||
| **Pricing** | Pay-per-use | Fixed infrastructure cost |
|
||||
|
||||
### Option A: Modal.com (Serverless Cloud GPU)
|
||||
|
||||
#### Accept HuggingFace Licenses
|
||||
|
||||
Visit both pages and click "Accept":
|
||||
|
||||
- https://huggingface.co/pyannote/speaker-diarization-3.1
|
||||
- https://huggingface.co/pyannote/segmentation-3.0
|
||||
|
||||
@@ -180,7 +179,6 @@ Save these credentials - you'll need them in the next step.
|
||||
## Configure Environment
|
||||
|
||||
Reflector has two env files:
|
||||
|
||||
- `server/.env` - Backend configuration
|
||||
- `www/.env` - Frontend configuration
|
||||
|
||||
@@ -192,7 +190,6 @@ nano server/.env
|
||||
```
|
||||
|
||||
**Required settings:**
|
||||
|
||||
```env
|
||||
# Database (defaults work with docker-compose.prod.yml)
|
||||
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
@@ -252,7 +249,6 @@ nano www/.env
|
||||
```
|
||||
|
||||
**Required settings:**
|
||||
|
||||
```env
|
||||
# Your domains
|
||||
SITE_URL=https://app.example.com
|
||||
@@ -270,11 +266,7 @@ FEATURE_REQUIRE_LOGIN=false
|
||||
|
||||
---
|
||||
|
||||
## Reverse proxy (Caddy or existing)
|
||||
|
||||
**If Coolify, Traefik, or nginx already use ports 80/443** (e.g. Coolify on your host): skip Caddy. Start the stack without the Caddy profile (see [Start Services](#start-services) below), then point your proxy at `web:3000` (frontend) and `server:1250` (API).
|
||||
|
||||
**If you want Reflector to provide the reverse proxy and SSL:**
|
||||
## Configure Caddy
|
||||
|
||||
```bash
|
||||
cp Caddyfile.example Caddyfile
|
||||
@@ -297,18 +289,10 @@ Replace `example.com` with your domains. The `{$VAR:default}` syntax uses Caddy'
|
||||
|
||||
## Start Services
|
||||
|
||||
**Without Caddy** (e.g. Coolify already on 80/443):
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
**With Caddy** (Reflector handles SSL):
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||
```
|
||||
|
||||
Wait for containers to start (first run may take 1-2 minutes to pull images and initialize).
|
||||
|
||||
---
|
||||
@@ -316,21 +300,18 @@ Wait for containers to start (first run may take 1-2 minutes to pull images and
|
||||
## Verify Deployment
|
||||
|
||||
### Check services
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml ps
|
||||
# All should show "Up"
|
||||
```
|
||||
|
||||
### Test API
|
||||
|
||||
```bash
|
||||
curl https://api.example.com/health
|
||||
# Should return: {"status":"healthy"}
|
||||
```
|
||||
|
||||
### Test Frontend
|
||||
|
||||
- Visit https://app.example.com
|
||||
- You should see the Reflector interface
|
||||
- Try uploading an audio file to test transcription
|
||||
@@ -346,7 +327,6 @@ By default, Reflector is open (no login required). **Authentication is required
|
||||
See [Authentication Setup](./auth-setup) for full Authentik OAuth configuration.
|
||||
|
||||
Quick summary:
|
||||
|
||||
1. Deploy Authentik on your server
|
||||
2. Create OAuth provider in Authentik
|
||||
3. Extract public key for JWT verification
|
||||
@@ -378,7 +358,6 @@ DAILYCO_STORAGE_AWS_ROLE_ARN=<arn:aws:iam::ACCOUNT:role/DailyCo>
|
||||
```
|
||||
|
||||
Reload env and restart:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml up -d server worker
|
||||
```
|
||||
@@ -388,43 +367,35 @@ docker compose -f docker-compose.prod.yml up -d server worker
|
||||
## Troubleshooting
|
||||
|
||||
### Check logs for errors
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml logs server --tail 20
|
||||
docker compose -f docker-compose.prod.yml logs worker --tail 20
|
||||
```
|
||||
|
||||
### Services won't start
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml logs
|
||||
```
|
||||
|
||||
### CORS errors in browser
|
||||
|
||||
- Verify `CORS_ORIGIN` in `server/.env` matches your frontend domain exactly (including `https://`)
|
||||
- Reload env: `docker compose -f docker-compose.prod.yml up -d server`
|
||||
|
||||
### SSL certificate errors (when using Caddy)
|
||||
|
||||
### SSL certificate errors
|
||||
- Caddy auto-provisions Let's Encrypt certificates
|
||||
- Ensure ports 80 and 443 are open and not used by another proxy
|
||||
- Ensure ports 80 and 443 are open
|
||||
- Check: `docker compose -f docker-compose.prod.yml logs caddy`
|
||||
- If port 80 is already in use (e.g. by Coolify), run without Caddy: `docker compose -f docker-compose.prod.yml up -d` and use your existing proxy
|
||||
|
||||
### Transcription not working
|
||||
|
||||
- Check Modal dashboard: https://modal.com/apps
|
||||
- Verify URLs in `server/.env` match deployed functions
|
||||
- Check worker logs: `docker compose -f docker-compose.prod.yml logs worker`
|
||||
|
||||
### "Login required" but auth not configured
|
||||
|
||||
- Set `FEATURE_REQUIRE_LOGIN=false` in `www/.env`
|
||||
- Rebuild frontend: `docker compose -f docker-compose.prod.yml up -d --force-recreate web`
|
||||
|
||||
### Database migrations or connectivity issues
|
||||
|
||||
Migrations run automatically on server startup. To check database connectivity or debug migration failures:
|
||||
|
||||
```bash
|
||||
@@ -437,3 +408,4 @@ docker compose -f docker-compose.prod.yml exec server uv run python -c "from ref
|
||||
# Manually run migrations (if needed)
|
||||
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
|
||||
```
|
||||
|
||||
|
||||
@@ -131,15 +131,6 @@ if [ -z "$DIARIZER_URL" ]; then
|
||||
fi
|
||||
echo " -> $DIARIZER_URL"
|
||||
|
||||
echo ""
|
||||
echo "Deploying padding (CPU audio processing via Modal SDK)..."
|
||||
modal deploy reflector_padding.py
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: Failed to deploy padding. Check Modal dashboard for details."
|
||||
exit 1
|
||||
fi
|
||||
echo " -> reflector-padding.pad_track (Modal SDK function)"
|
||||
|
||||
# --- Output Configuration ---
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
@@ -156,6 +147,4 @@ echo ""
|
||||
echo "DIARIZATION_BACKEND=modal"
|
||||
echo "DIARIZATION_URL=$DIARIZER_URL"
|
||||
echo "DIARIZATION_MODAL_API_KEY=$API_KEY"
|
||||
echo ""
|
||||
echo "# Padding uses Modal SDK (requires MODAL_TOKEN_ID/SECRET in worker containers)"
|
||||
echo "# --- End Modal Configuration ---"
|
||||
|
||||
@@ -1,277 +0,0 @@
|
||||
"""
|
||||
Reflector GPU backend - audio padding
|
||||
======================================
|
||||
|
||||
CPU-intensive audio padding service for adding silence to audio tracks.
|
||||
Uses PyAV filter graph (adelay) for precise track synchronization.
|
||||
|
||||
IMPORTANT: This padding logic is duplicated from server/reflector/utils/audio_padding.py
|
||||
for Modal deployment isolation (Modal can't import from server/reflector/). If you modify
|
||||
the PyAV filter graph or padding algorithm, you MUST update both:
|
||||
- gpu/modal_deployments/reflector_padding.py (this file)
|
||||
- server/reflector/utils/audio_padding.py
|
||||
|
||||
Constants duplicated from server/reflector/utils/audio_constants.py for same reason.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from fractions import Fraction
|
||||
import math
|
||||
import asyncio
|
||||
|
||||
import modal
|
||||
|
||||
S3_TIMEOUT = 60 # happens 2 times
|
||||
PADDING_TIMEOUT = 600 + (S3_TIMEOUT * 2)
|
||||
SCALEDOWN_WINDOW = 60 # The maximum duration (in seconds) that individual containers can remain idle when scaling down.
|
||||
DISCONNECT_CHECK_INTERVAL = 2 # Check for client disconnect
|
||||
|
||||
|
||||
app = modal.App("reflector-padding")
|
||||
|
||||
# CPU-based image
|
||||
image = (
|
||||
modal.Image.debian_slim(python_version="3.12")
|
||||
.apt_install("ffmpeg") # Required by PyAV
|
||||
.pip_install(
|
||||
"av==13.1.0", # PyAV for audio processing
|
||||
"requests==2.32.3", # HTTP for presigned URL downloads/uploads
|
||||
"fastapi==0.115.12", # API framework
|
||||
)
|
||||
)
|
||||
|
||||
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||
OPUS_STANDARD_SAMPLE_RATE = 48000
|
||||
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||
OPUS_DEFAULT_BIT_RATE = 128000
|
||||
|
||||
|
||||
@app.function(
|
||||
cpu=2.0,
|
||||
timeout=PADDING_TIMEOUT,
|
||||
scaledown_window=SCALEDOWN_WINDOW,
|
||||
image=image,
|
||||
)
|
||||
@modal.asgi_app()
|
||||
def web():
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
class PaddingRequest(BaseModel):
|
||||
track_url: str
|
||||
output_url: str
|
||||
start_time_seconds: float
|
||||
track_index: int
|
||||
|
||||
class PaddingResponse(BaseModel):
|
||||
size: int
|
||||
cancelled: bool = False
|
||||
|
||||
web_app = FastAPI()
|
||||
|
||||
@web_app.post("/pad")
|
||||
async def pad_track_endpoint(request: Request, req: PaddingRequest) -> PaddingResponse:
|
||||
"""Modal web endpoint for padding audio tracks with disconnect detection.
|
||||
"""
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if not req.track_url:
|
||||
raise HTTPException(status_code=400, detail="track_url cannot be empty")
|
||||
if not req.output_url:
|
||||
raise HTTPException(status_code=400, detail="output_url cannot be empty")
|
||||
if req.start_time_seconds <= 0:
|
||||
raise HTTPException(status_code=400, detail=f"start_time_seconds must be positive, got {req.start_time_seconds}")
|
||||
if req.start_time_seconds > 18000:
|
||||
raise HTTPException(status_code=400, detail=f"start_time_seconds exceeds maximum 18000s (5 hours)")
|
||||
|
||||
logger.info(f"Padding request: track {req.track_index}, delay={req.start_time_seconds}s")
|
||||
|
||||
# Thread-safe cancellation flag shared between async disconnect checker and blocking thread
|
||||
import threading
|
||||
cancelled = threading.Event()
|
||||
|
||||
async def check_disconnect():
|
||||
"""Background task to check for client disconnect every 2 seconds."""
|
||||
while not cancelled.is_set():
|
||||
await asyncio.sleep(DISCONNECT_CHECK_INTERVAL)
|
||||
if await request.is_disconnected():
|
||||
logger.warning("Client disconnected, setting cancellation flag")
|
||||
cancelled.set()
|
||||
break
|
||||
|
||||
# Start disconnect checker in background
|
||||
disconnect_task = asyncio.create_task(check_disconnect())
|
||||
|
||||
try:
|
||||
result = await asyncio.get_event_loop().run_in_executor(
|
||||
None, _pad_track_blocking, req, cancelled, logger
|
||||
)
|
||||
return PaddingResponse(**result)
|
||||
finally:
|
||||
cancelled.set()
|
||||
disconnect_task.cancel()
|
||||
try:
|
||||
await disconnect_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
def _pad_track_blocking(req, cancelled, logger) -> dict:
|
||||
"""Blocking CPU-bound padding work with periodic cancellation checks.
|
||||
|
||||
Args:
|
||||
cancelled: threading.Event for thread-safe cancellation signaling
|
||||
"""
|
||||
import av
|
||||
import requests
|
||||
from av.audio.resampler import AudioResampler
|
||||
import time
|
||||
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
input_path = None
|
||||
output_path = None
|
||||
last_check = time.time()
|
||||
|
||||
try:
|
||||
logger.info("Downloading track for padding")
|
||||
response = requests.get(req.track_url, stream=True, timeout=S3_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
|
||||
input_path = os.path.join(temp_dir, "track.webm")
|
||||
total_bytes = 0
|
||||
chunk_count = 0
|
||||
with open(input_path, "wb") as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
total_bytes += len(chunk)
|
||||
chunk_count += 1
|
||||
|
||||
# Check for cancellation every arbitrary amount of chunks
|
||||
if chunk_count % 12 == 0:
|
||||
now = time.time()
|
||||
if now - last_check >= DISCONNECT_CHECK_INTERVAL:
|
||||
if cancelled.is_set():
|
||||
logger.info("Cancelled during download, exiting early")
|
||||
return {"size": 0, "cancelled": True}
|
||||
last_check = now
|
||||
logger.info(f"Track downloaded: {total_bytes} bytes")
|
||||
|
||||
if cancelled.is_set():
|
||||
logger.info("Cancelled after download, exiting early")
|
||||
return {"size": 0, "cancelled": True}
|
||||
|
||||
# Apply padding using PyAV
|
||||
output_path = os.path.join(temp_dir, "padded.webm")
|
||||
delay_ms = math.floor(req.start_time_seconds * 1000)
|
||||
logger.info(f"Padding track {req.track_index} with {delay_ms}ms delay using PyAV")
|
||||
|
||||
in_container = av.open(input_path)
|
||||
in_stream = next((s for s in in_container.streams if s.type == "audio"), None)
|
||||
if in_stream is None:
|
||||
raise ValueError("No audio stream in input")
|
||||
|
||||
with av.open(output_path, "w", format="webm") as out_container:
|
||||
out_stream = out_container.add_stream("libopus", rate=OPUS_STANDARD_SAMPLE_RATE)
|
||||
out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
|
||||
graph = av.filter.Graph()
|
||||
|
||||
abuf_args = (
|
||||
f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
|
||||
f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
|
||||
f"sample_fmt=s16:"
|
||||
f"channel_layout=stereo"
|
||||
)
|
||||
src = graph.add("abuffer", args=abuf_args, name="src")
|
||||
aresample_f = graph.add("aresample", args="async=1", name="ares")
|
||||
delays_arg = f"{delay_ms}|{delay_ms}"
|
||||
adelay_f = graph.add("adelay", args=f"delays={delays_arg}:all=1", name="delay")
|
||||
sink = graph.add("abuffersink", name="sink")
|
||||
|
||||
src.link_to(aresample_f)
|
||||
aresample_f.link_to(adelay_f)
|
||||
adelay_f.link_to(sink)
|
||||
graph.configure()
|
||||
|
||||
resampler = AudioResampler(
|
||||
format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
|
||||
)
|
||||
|
||||
for frame in in_container.decode(in_stream):
|
||||
# Check for cancellation periodically
|
||||
now = time.time()
|
||||
if now - last_check >= DISCONNECT_CHECK_INTERVAL:
|
||||
if cancelled.is_set():
|
||||
logger.info("Cancelled during processing, exiting early")
|
||||
in_container.close()
|
||||
return {"size": 0, "cancelled": True}
|
||||
last_check = now
|
||||
|
||||
out_frames = resampler.resample(frame) or []
|
||||
for rframe in out_frames:
|
||||
rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||
rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||
src.push(rframe)
|
||||
|
||||
while True:
|
||||
try:
|
||||
f_out = sink.pull()
|
||||
except Exception:
|
||||
break
|
||||
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||
for packet in out_stream.encode(f_out):
|
||||
out_container.mux(packet)
|
||||
|
||||
# Flush filter graph
|
||||
src.push(None)
|
||||
while True:
|
||||
try:
|
||||
f_out = sink.pull()
|
||||
except Exception:
|
||||
break
|
||||
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||
for packet in out_stream.encode(f_out):
|
||||
out_container.mux(packet)
|
||||
|
||||
# Flush encoder
|
||||
for packet in out_stream.encode(None):
|
||||
out_container.mux(packet)
|
||||
|
||||
in_container.close()
|
||||
|
||||
file_size = os.path.getsize(output_path)
|
||||
logger.info(f"Padding complete: {file_size} bytes")
|
||||
|
||||
logger.info("Uploading padded track to S3")
|
||||
|
||||
with open(output_path, "rb") as f:
|
||||
upload_response = requests.put(req.output_url, data=f, timeout=S3_TIMEOUT)
|
||||
|
||||
upload_response.raise_for_status()
|
||||
logger.info(f"Upload complete: {file_size} bytes")
|
||||
|
||||
return {"size": file_size}
|
||||
|
||||
finally:
|
||||
if input_path and os.path.exists(input_path):
|
||||
try:
|
||||
os.unlink(input_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cleanup input file: {e}")
|
||||
if output_path and os.path.exists(output_path):
|
||||
try:
|
||||
os.unlink(output_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cleanup output file: {e}")
|
||||
try:
|
||||
os.rmdir(temp_dir)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cleanup temp directory: {e}")
|
||||
|
||||
return web_app
|
||||
|
||||
@@ -86,7 +86,7 @@ Daily.co Room: "daily-private-igor-20260110042117"
|
||||
| **Purpose** | Tracks active session state | Links recordings, transcripts, participants |
|
||||
| **Scope** | Per room instance | Per Reflector room + timestamp |
|
||||
|
||||
**Critical Limitation:** Daily.co's recordings API often does NOT return `mtgSessionId` (can be null), requiring time-based matching (see [Time-Based Matching](#time-based-matching)).
|
||||
**Critical Limitation:** Daily.co's recordings API often does NOT return `mtgSessionId`, requiring time-based matching (see [Time-Based Matching](#time-based-matching)).
|
||||
|
||||
### Recording
|
||||
|
||||
@@ -101,30 +101,6 @@ Daily.co Room: "daily-private-igor-20260110042117"
|
||||
|
||||
**Critical Behavior:** Recording **stops/restarts** create **separate recording objects** with unique IDs.
|
||||
|
||||
### instanceId (Reflector-Generated)
|
||||
|
||||
**Definition:** UUID we generate and send when starting recording via REST API.
|
||||
|
||||
**Generation:** Deterministic from meeting_id
|
||||
- Cloud: `instanceId = meeting_id` directly
|
||||
- Raw-tracks: `instanceId = UUIDv5(meeting_id, namespace)`
|
||||
|
||||
**Key behaviors:**
|
||||
- ✅ **Reuse allowed:** Same instanceId can be used after stop (validated 2026-01-20)
|
||||
- ❌ **Not returned:** Daily.co does NOT echo instanceId back in GET /recordings response
|
||||
- ✅ **Present in error webhooks:** `recording.error` webhook includes instanceId
|
||||
- **Purpose:** Allows multiple concurrent recordings (cloud + raw-tracks) in same room
|
||||
|
||||
**Stop/restart example:**
|
||||
```
|
||||
Recording 1: POST /start with instanceId="779e6376..." → recording_id="ee00c4e8..."
|
||||
Stop recording
|
||||
Recording 2: POST /start with instanceId="779e6376..." (SAME) → recording_id="b702f509..." (DIFFERENT)
|
||||
✅ Both succeed, different recording_ids returned
|
||||
```
|
||||
|
||||
**Implication:** Cannot match recordings by instanceId (not in response) - must use recording_id.
|
||||
|
||||
---
|
||||
|
||||
## Entity Relationships
|
||||
@@ -220,19 +196,6 @@ Daily.co Room: "daily-private-igor-20260110042117"
|
||||
|
||||
`mtgSessionId` identifies a **Daily.co meeting session** (not individual participants, not a room).
|
||||
|
||||
**Reliability:** Can be null or present in GET /recordings response (unreliable).
|
||||
|
||||
**When present:** Multiple recordings from same session (stop/restart with participants connected) share same mtgSessionId.
|
||||
|
||||
**Example (validated 2026-01-20):**
|
||||
```json
|
||||
Recording 1: {"id": "ee00c4e8...", "mtgSessionId": "92c4136a-a8da-41c5-9c45-e9a2baae6bd6"}
|
||||
Recording 2: {"id": "b702f509...", "mtgSessionId": "92c4136a-a8da-41c5-9c45-e9a2baae6bd6"}
|
||||
// Same mtgSessionId (stop/restart in same session)
|
||||
```
|
||||
|
||||
**When null:** Common - Daily.co API does not reliably populate this field.
|
||||
|
||||
### session_id (Per-Participant)
|
||||
|
||||
**Different concept:** Per-participant connection identifier from webhooks.
|
||||
@@ -257,24 +220,16 @@ TABLE daily_participant_session (
|
||||
|
||||
Daily.co's recordings API does not reliably return `mtgSessionId`, making it impossible to directly link recordings to meetings via Daily.co's identifiers.
|
||||
|
||||
**Example API response (mtgSessionId can be null OR present):**
|
||||
**Example API response:**
|
||||
```json
|
||||
{
|
||||
"id": "recording-uuid",
|
||||
"room_name": "daily-private-igor-20260110042117",
|
||||
"start_ts": 1768018896,
|
||||
"mtgSessionId": null // ← Often null (unreliable)
|
||||
}
|
||||
|
||||
// OR (when present):
|
||||
{
|
||||
"id": "recording-uuid",
|
||||
"mtgSessionId": "92c4136a-a8da-41c5-9c45-e9a2baae6bd6" // ← Sometimes present
|
||||
"mtgSessionId": null ← Missing!
|
||||
}
|
||||
```
|
||||
|
||||
**Key insight:** Cannot rely on mtgSessionId for matching (unreliable). instanceId also not returned. Only reliable identifier is recording.id.
|
||||
|
||||
### Solution: Time-Based Matching
|
||||
|
||||
**Implementation:** `reflector/db/meetings.py:get_by_room_name_and_time()`
|
||||
@@ -536,10 +491,6 @@ UI: User sees 3 separate transcripts
|
||||
|
||||
|
||||
---
|
||||
**Document Version:** 1.1
|
||||
**Last Updated:** 2026-01-20
|
||||
**Data Source:** Production database + Daily.co API inspection + empirical testing
|
||||
**Changes in 1.1:**
|
||||
- Added instanceId behavior documentation (reuse allowed, not returned in API)
|
||||
- Clarified mtgSessionId reliability (can be null or present)
|
||||
- Added empirical validation of stop/restart behavior
|
||||
**Document Version:** 1.0
|
||||
**Last Verified:** 2026-01-15
|
||||
**Data Source:** Production database + Daily.co API inspection
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
"""add_daily_recording_requests
|
||||
|
||||
Revision ID: f5b008fa8a14
|
||||
Revises: 1b1e6a6fc465
|
||||
Create Date: 2026-01-20 22:32:06.697144
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "f5b008fa8a14"
|
||||
down_revision: Union[str, None] = "1b1e6a6fc465"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"daily_recording_request",
|
||||
sa.Column("recording_id", sa.String(), nullable=False),
|
||||
sa.Column("meeting_id", sa.String(), nullable=False),
|
||||
sa.Column("instance_id", sa.String(), nullable=False),
|
||||
sa.Column("type", sa.String(), nullable=False),
|
||||
sa.Column("requested_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.ForeignKeyConstraint(["meeting_id"], ["meeting.id"], ondelete="CASCADE"),
|
||||
sa.PrimaryKeyConstraint("recording_id"),
|
||||
)
|
||||
op.create_index("idx_meeting_id", "daily_recording_request", ["meeting_id"])
|
||||
op.create_index("idx_instance_id", "daily_recording_request", ["instance_id"])
|
||||
|
||||
# Clean up orphaned recordings before adding FK constraint
|
||||
op.execute("""
|
||||
UPDATE recording SET status = 'orphan', meeting_id = NULL
|
||||
WHERE meeting_id IS NOT NULL
|
||||
AND meeting_id NOT IN (SELECT id FROM meeting)
|
||||
""")
|
||||
|
||||
# Add FK constraint to recording table (cascade delete recordings when meeting deleted)
|
||||
op.execute("""
|
||||
ALTER TABLE recording ADD CONSTRAINT fk_recording_meeting
|
||||
FOREIGN KEY (meeting_id) REFERENCES meeting(id) ON DELETE CASCADE
|
||||
""")
|
||||
|
||||
# Add CHECK constraints to enforce orphan invariants
|
||||
op.execute("""
|
||||
ALTER TABLE recording ADD CONSTRAINT chk_orphan_no_meeting
|
||||
CHECK (status != 'orphan' OR meeting_id IS NULL)
|
||||
""")
|
||||
op.execute("""
|
||||
ALTER TABLE recording ADD CONSTRAINT chk_non_orphan_has_meeting
|
||||
CHECK (status = 'orphan' OR meeting_id IS NOT NULL)
|
||||
""")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("ALTER TABLE recording DROP CONSTRAINT IF EXISTS chk_orphan_no_meeting")
|
||||
op.execute(
|
||||
"ALTER TABLE recording DROP CONSTRAINT IF EXISTS chk_non_orphan_has_meeting"
|
||||
)
|
||||
op.execute("ALTER TABLE recording DROP CONSTRAINT IF EXISTS fk_recording_meeting")
|
||||
op.drop_index("idx_instance_id", table_name="daily_recording_request")
|
||||
op.drop_index("idx_meeting_id", table_name="daily_recording_request")
|
||||
op.drop_table("daily_recording_request")
|
||||
@@ -8,7 +8,7 @@ readme = "README.md"
|
||||
dependencies = [
|
||||
"aiohttp>=3.9.0",
|
||||
"aiohttp-cors>=0.7.0",
|
||||
"av>=15.0.0",
|
||||
"av>=10.0.0",
|
||||
"requests>=2.31.0",
|
||||
"aiortc>=1.5.0",
|
||||
"sortedcontainers>=2.4.0",
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
"""Utility for creating orphan recordings."""
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.logger import logger
|
||||
from reflector.utils.string import NonEmptyString
|
||||
|
||||
|
||||
async def create_and_log_orphan(
|
||||
recording_id: NonEmptyString,
|
||||
bucket_name: str,
|
||||
room_name: str,
|
||||
start_ts: int,
|
||||
track_keys: list[str] | None,
|
||||
source: str,
|
||||
) -> bool:
|
||||
"""Create orphan recording and log if first occurrence.
|
||||
|
||||
Args:
|
||||
recording_id: Daily.co recording ID
|
||||
bucket_name: S3 bucket (empty string for cloud recordings)
|
||||
room_name: Daily.co room name
|
||||
start_ts: Unix timestamp
|
||||
track_keys: Track keys for raw-tracks, None for cloud
|
||||
source: "webhook" or "polling" for logging
|
||||
|
||||
Returns:
|
||||
True if created (first poller), False if already exists
|
||||
"""
|
||||
if track_keys:
|
||||
object_key = os.path.dirname(track_keys[0]) if track_keys else room_name
|
||||
else:
|
||||
object_key = room_name
|
||||
|
||||
created = await recordings_controller.create_orphan(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name=bucket_name,
|
||||
object_key=object_key,
|
||||
recorded_at=datetime.fromtimestamp(start_ts, tz=timezone.utc),
|
||||
track_keys=track_keys,
|
||||
meeting_id=None,
|
||||
status="orphan",
|
||||
)
|
||||
)
|
||||
|
||||
if created:
|
||||
logger.error(
|
||||
f"Orphan recording ({source})",
|
||||
recording_id=recording_id,
|
||||
room_name=room_name,
|
||||
)
|
||||
|
||||
return created
|
||||
@@ -26,7 +26,6 @@ def get_database() -> databases.Database:
|
||||
# import models
|
||||
import reflector.db.calendar_events # noqa
|
||||
import reflector.db.daily_participant_sessions # noqa
|
||||
import reflector.db.daily_recording_requests # noqa
|
||||
import reflector.db.meetings # noqa
|
||||
import reflector.db.recordings # noqa
|
||||
import reflector.db.rooms # noqa
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
from uuid import UUID
|
||||
|
||||
import sqlalchemy as sa
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from reflector.db import get_database, metadata
|
||||
from reflector.utils.string import NonEmptyString
|
||||
|
||||
daily_recording_requests = sa.Table(
|
||||
"daily_recording_request",
|
||||
metadata,
|
||||
sa.Column("recording_id", sa.String, primary_key=True),
|
||||
sa.Column(
|
||||
"meeting_id",
|
||||
sa.String,
|
||||
sa.ForeignKey("meeting.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("instance_id", sa.String, nullable=False),
|
||||
sa.Column("type", sa.String, nullable=False),
|
||||
sa.Column("requested_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Index("idx_meeting_id", "meeting_id"),
|
||||
sa.Index("idx_instance_id", "instance_id"),
|
||||
)
|
||||
|
||||
|
||||
class DailyRecordingRequest(BaseModel):
|
||||
recording_id: NonEmptyString
|
||||
meeting_id: NonEmptyString
|
||||
instance_id: UUID
|
||||
type: Literal["cloud", "raw-tracks"]
|
||||
requested_at: datetime
|
||||
|
||||
|
||||
class DailyRecordingRequestsController:
|
||||
async def create(self, request: DailyRecordingRequest) -> None:
|
||||
stmt = insert(daily_recording_requests).values(
|
||||
recording_id=request.recording_id,
|
||||
meeting_id=request.meeting_id,
|
||||
instance_id=str(request.instance_id),
|
||||
type=request.type,
|
||||
requested_at=request.requested_at,
|
||||
)
|
||||
stmt = stmt.on_conflict_do_nothing(index_elements=["recording_id"])
|
||||
await get_database().execute(stmt)
|
||||
|
||||
async def find_by_recording_id(
|
||||
self,
|
||||
recording_id: NonEmptyString,
|
||||
) -> tuple[NonEmptyString, Literal["cloud", "raw-tracks"]] | None:
|
||||
query = daily_recording_requests.select().where(
|
||||
daily_recording_requests.c.recording_id == recording_id
|
||||
)
|
||||
result = await get_database().fetch_one(query)
|
||||
|
||||
if not result:
|
||||
return None
|
||||
|
||||
req = DailyRecordingRequest(
|
||||
recording_id=result["recording_id"],
|
||||
meeting_id=result["meeting_id"],
|
||||
instance_id=UUID(result["instance_id"]),
|
||||
type=result["type"],
|
||||
requested_at=result["requested_at"],
|
||||
)
|
||||
return (req.meeting_id, req.type)
|
||||
|
||||
async def find_by_instance_id(
|
||||
self,
|
||||
instance_id: UUID,
|
||||
) -> list[DailyRecordingRequest]:
|
||||
"""Multiple recordings can have same instance_id (stop/restart)."""
|
||||
query = daily_recording_requests.select().where(
|
||||
daily_recording_requests.c.instance_id == str(instance_id)
|
||||
)
|
||||
results = await get_database().fetch_all(query)
|
||||
return [
|
||||
DailyRecordingRequest(
|
||||
recording_id=r["recording_id"],
|
||||
meeting_id=r["meeting_id"],
|
||||
instance_id=UUID(r["instance_id"]),
|
||||
type=r["type"],
|
||||
requested_at=r["requested_at"],
|
||||
)
|
||||
for r in results
|
||||
]
|
||||
|
||||
async def get_by_meeting_id(
|
||||
self,
|
||||
meeting_id: NonEmptyString,
|
||||
) -> list[DailyRecordingRequest]:
|
||||
query = daily_recording_requests.select().where(
|
||||
daily_recording_requests.c.meeting_id == meeting_id
|
||||
)
|
||||
results = await get_database().fetch_all(query)
|
||||
return [
|
||||
DailyRecordingRequest(
|
||||
recording_id=r["recording_id"],
|
||||
meeting_id=r["meeting_id"],
|
||||
instance_id=UUID(r["instance_id"]),
|
||||
type=r["type"],
|
||||
requested_at=r["requested_at"],
|
||||
)
|
||||
for r in results
|
||||
]
|
||||
|
||||
|
||||
daily_recording_requests_controller = DailyRecordingRequestsController()
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Literal
|
||||
|
||||
import sqlalchemy as sa
|
||||
@@ -183,6 +183,84 @@ class MeetingController:
|
||||
results = await get_database().fetch_all(query)
|
||||
return [Meeting(**r) for r in results]
|
||||
|
||||
async def get_by_room_name_and_time(
|
||||
self,
|
||||
room_name: NonEmptyString,
|
||||
recording_start: datetime,
|
||||
time_window_hours: int = 168,
|
||||
) -> Meeting | None:
|
||||
"""
|
||||
Get meeting by room name closest to recording timestamp.
|
||||
|
||||
HACK ALERT: Daily.co doesn't return instanceId in recordings API response,
|
||||
and mtgSessionId is separate from our instanceId. Time-based matching is
|
||||
the least-bad workaround.
|
||||
|
||||
This handles edge case of duplicate room_name values in DB (race conditions,
|
||||
double-clicks, etc.) by matching based on temporal proximity.
|
||||
|
||||
Algorithm:
|
||||
1. Find meetings within time_window_hours of recording_start
|
||||
2. Return meeting with start_date closest to recording_start
|
||||
3. If tie, return first by meeting.id (deterministic)
|
||||
|
||||
Args:
|
||||
room_name: Daily.co room name from recording
|
||||
recording_start: Timezone-aware datetime from recording.start_ts
|
||||
time_window_hours: Search window (default 168 = 1 week)
|
||||
|
||||
Returns:
|
||||
Meeting closest to recording timestamp, or None if no matches
|
||||
|
||||
Failure modes:
|
||||
- Multiple meetings in same room within ~5 minutes: picks closest
|
||||
- All meetings outside time window: returns None
|
||||
- Clock skew between Daily.co and DB: 1-week window tolerates this
|
||||
|
||||
Why 1 week window:
|
||||
- Handles webhook failures (recording discovered days later)
|
||||
- Tolerates clock skew
|
||||
- Rejects unrelated meetings from weeks ago
|
||||
|
||||
"""
|
||||
# Validate timezone-aware datetime
|
||||
if recording_start.tzinfo is None:
|
||||
raise ValueError(
|
||||
f"recording_start must be timezone-aware, got naive datetime: {recording_start}"
|
||||
)
|
||||
|
||||
window_start = recording_start - timedelta(hours=time_window_hours)
|
||||
window_end = recording_start + timedelta(hours=time_window_hours)
|
||||
|
||||
query = (
|
||||
meetings.select()
|
||||
.where(
|
||||
sa.and_(
|
||||
meetings.c.room_name == room_name,
|
||||
meetings.c.start_date >= window_start,
|
||||
meetings.c.start_date <= window_end,
|
||||
)
|
||||
)
|
||||
.order_by(meetings.c.start_date)
|
||||
)
|
||||
|
||||
results = await get_database().fetch_all(query)
|
||||
if not results:
|
||||
return None
|
||||
|
||||
candidates = [Meeting(**r) for r in results]
|
||||
|
||||
# Find meeting with start_date closest to recording_start
|
||||
closest = min(
|
||||
candidates,
|
||||
key=lambda m: (
|
||||
abs((m.start_date - recording_start).total_seconds()),
|
||||
m.id, # Tie-breaker: deterministic by UUID
|
||||
),
|
||||
)
|
||||
|
||||
return closest
|
||||
|
||||
async def get_active(self, room: Room, current_time: datetime) -> Meeting | None:
|
||||
"""
|
||||
Get latest active meeting for a room.
|
||||
@@ -272,6 +350,44 @@ class MeetingController:
|
||||
query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs)
|
||||
await get_database().execute(query)
|
||||
|
||||
async def set_cloud_recording_if_missing(
|
||||
self,
|
||||
meeting_id: NonEmptyString,
|
||||
s3_key: NonEmptyString,
|
||||
duration: int,
|
||||
) -> bool:
|
||||
"""
|
||||
Set cloud recording only if not already set.
|
||||
|
||||
Returns True if updated, False if already set.
|
||||
Prevents webhook/polling race condition via atomic WHERE clause.
|
||||
"""
|
||||
# Check current value before update to detect actual change
|
||||
meeting_before = await self.get_by_id(meeting_id)
|
||||
if not meeting_before:
|
||||
return False
|
||||
|
||||
was_null = meeting_before.daily_composed_video_s3_key is None
|
||||
|
||||
query = (
|
||||
meetings.update()
|
||||
.where(
|
||||
sa.and_(
|
||||
meetings.c.id == meeting_id,
|
||||
meetings.c.daily_composed_video_s3_key.is_(None),
|
||||
)
|
||||
)
|
||||
.values(
|
||||
daily_composed_video_s3_key=s3_key,
|
||||
daily_composed_video_duration=duration,
|
||||
)
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
# Return True only if value was NULL before (actual update occurred)
|
||||
# If was_null=False, the WHERE clause prevented the update
|
||||
return was_null
|
||||
|
||||
async def increment_num_clients(self, meeting_id: str) -> None:
|
||||
"""Atomically increment participant count."""
|
||||
query = (
|
||||
@@ -351,27 +467,6 @@ class MeetingConsentController:
|
||||
result = await get_database().fetch_one(query)
|
||||
return result is not None
|
||||
|
||||
async def set_cloud_recording_if_missing(
|
||||
self,
|
||||
meeting_id: NonEmptyString,
|
||||
s3_key: NonEmptyString,
|
||||
duration: int,
|
||||
) -> bool:
|
||||
"""Returns True if updated, False if already set."""
|
||||
query = (
|
||||
meetings.update()
|
||||
.where(
|
||||
meetings.c.id == meeting_id,
|
||||
meetings.c.daily_composed_video_s3_key.is_(None),
|
||||
)
|
||||
.values(
|
||||
daily_composed_video_s3_key=s3_key,
|
||||
daily_composed_video_duration=duration,
|
||||
)
|
||||
)
|
||||
result = await get_database().execute(query)
|
||||
return result.rowcount > 0
|
||||
|
||||
|
||||
meetings_controller = MeetingController()
|
||||
meeting_consent_controller = MeetingConsentController()
|
||||
|
||||
@@ -4,10 +4,10 @@ from typing import Literal
|
||||
import sqlalchemy as sa
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
|
||||
from reflector.db import get_database, metadata
|
||||
from reflector.utils import generate_uuid4
|
||||
from reflector.utils.string import NonEmptyString
|
||||
|
||||
recordings = sa.Table(
|
||||
"recording",
|
||||
@@ -31,13 +31,14 @@ recordings = sa.Table(
|
||||
class Recording(BaseModel):
|
||||
id: str = Field(default_factory=generate_uuid4)
|
||||
bucket_name: str
|
||||
# for single-track
|
||||
object_key: str
|
||||
recorded_at: datetime
|
||||
status: Literal["pending", "processing", "completed", "failed", "orphan"] = (
|
||||
"pending"
|
||||
)
|
||||
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
||||
meeting_id: str | None = None
|
||||
# None = single-track, [] = multitrack with no audio, [keys...] = multitrack with audio
|
||||
# for multitrack reprocessing
|
||||
# track_keys can be empty list [] if recording finished but no audio was captured (silence/muted)
|
||||
# None means not a multitrack recording, [] means multitrack with no tracks
|
||||
track_keys: list[str] | None = None
|
||||
|
||||
@property
|
||||
@@ -71,6 +72,20 @@ class RecordingController:
|
||||
query = recordings.delete().where(recordings.c.id == id)
|
||||
await get_database().execute(query)
|
||||
|
||||
async def set_meeting_id(
|
||||
self,
|
||||
recording_id: NonEmptyString,
|
||||
meeting_id: NonEmptyString,
|
||||
) -> None:
|
||||
"""Link recording to meeting."""
|
||||
query = (
|
||||
recordings.update()
|
||||
.where(recordings.c.id == recording_id)
|
||||
.values(meeting_id=meeting_id)
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
# no check for existence
|
||||
async def get_by_ids(self, recording_ids: list[str]) -> list[Recording]:
|
||||
if not recording_ids:
|
||||
return []
|
||||
@@ -89,12 +104,9 @@ class RecordingController:
|
||||
|
||||
This is more efficient than fetching all recordings and filtering in Python.
|
||||
"""
|
||||
# INLINE IMPORT REQUIRED: Circular dependency
|
||||
# - recordings.py needs transcripts table for JOIN query
|
||||
# - transcripts.py imports recordings_controller
|
||||
# - db/__init__.py loads recordings before transcripts (line 31 vs 33)
|
||||
# - Top-level import would fail during module initialization
|
||||
from reflector.db.transcripts import transcripts
|
||||
from reflector.db.transcripts import (
|
||||
transcripts, # noqa: PLC0415 cyclic import
|
||||
)
|
||||
|
||||
query = (
|
||||
recordings.select()
|
||||
@@ -112,27 +124,5 @@ class RecordingController:
|
||||
recordings_list = [Recording(**row) for row in results]
|
||||
return [r for r in recordings_list if r.is_multitrack]
|
||||
|
||||
async def try_create_with_meeting(self, recording: Recording) -> bool:
|
||||
"""Returns True if created, False if already exists."""
|
||||
assert recording.meeting_id is not None, "meeting_id required for non-orphan"
|
||||
assert recording.status != "orphan", "use create_orphan for orphans"
|
||||
|
||||
stmt = insert(recordings).values(**recording.model_dump())
|
||||
stmt = stmt.on_conflict_do_nothing(index_elements=["id"])
|
||||
result = await get_database().execute(stmt)
|
||||
|
||||
return result.rowcount > 0
|
||||
|
||||
async def create_orphan(self, recording: Recording) -> bool:
|
||||
"""Returns True if created, False if already exists."""
|
||||
assert recording.status == "orphan", "status must be 'orphan'"
|
||||
assert recording.meeting_id is None, "meeting_id must be NULL for orphan"
|
||||
|
||||
stmt = insert(recordings).values(**recording.model_dump())
|
||||
stmt = stmt.on_conflict_do_nothing(index_elements=["id"])
|
||||
result = await get_database().execute(stmt)
|
||||
|
||||
return result.rowcount > 0
|
||||
|
||||
|
||||
recordings_controller = RecordingController()
|
||||
|
||||
@@ -8,7 +8,8 @@ from enum import StrEnum
|
||||
class TaskName(StrEnum):
|
||||
GET_RECORDING = "get_recording"
|
||||
GET_PARTICIPANTS = "get_participants"
|
||||
PROCESS_TRACKS = "process_tracks"
|
||||
PROCESS_PADDINGS = "process_paddings"
|
||||
PROCESS_TRANSCRIPTIONS = "process_transcriptions"
|
||||
MIXDOWN_TRACKS = "mixdown_tracks"
|
||||
GENERATE_WAVEFORM = "generate_waveform"
|
||||
DETECT_TOPICS = "detect_topics"
|
||||
@@ -35,9 +36,7 @@ LLM_RATE_LIMIT_PER_SECOND = 10
|
||||
|
||||
# Task execution timeouts (seconds)
|
||||
TIMEOUT_SHORT = 60 # Quick operations: API calls, DB updates
|
||||
TIMEOUT_MEDIUM = (
|
||||
300 # Single LLM calls, waveform generation (5m for slow LLM responses)
|
||||
)
|
||||
TIMEOUT_MEDIUM = 120 # Single LLM calls, waveform generation
|
||||
TIMEOUT_LONG = 180 # Action items (larger context LLM)
|
||||
TIMEOUT_AUDIO = 720 # Audio processing: padding, mixdown
|
||||
TIMEOUT_AUDIO = 300 # Audio processing: padding, mixdown
|
||||
TIMEOUT_HEAVY = 600 # Transcription, fan-out LLM tasks
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
"""
|
||||
CPU-heavy worker pool for audio processing tasks.
|
||||
Handles ONLY: mixdown_tracks
|
||||
Handles: mixdown_tracks only (serialized with max_runs=1)
|
||||
|
||||
Configuration:
|
||||
- slots=1: Only mixdown (already serialized globally with max_runs=1)
|
||||
- slots=1: Only one mixdown at a time
|
||||
- Worker affinity: pool=cpu-heavy
|
||||
"""
|
||||
|
||||
@@ -26,7 +26,7 @@ def main():
|
||||
|
||||
cpu_worker = hatchet.worker(
|
||||
"cpu-worker-pool",
|
||||
slots=1, # Only 1 mixdown at a time (already serialized globally)
|
||||
slots=1,
|
||||
labels={
|
||||
"pool": "cpu-heavy",
|
||||
},
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
"""
|
||||
LLM/I/O worker pool for all non-CPU tasks.
|
||||
Handles: all tasks except mixdown_tracks (transcription, LLM inference, orchestration)
|
||||
Handles: all tasks except mixdown_tracks (padding, transcription, LLM inference, orchestration)
|
||||
"""
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
daily_multitrack_pipeline,
|
||||
)
|
||||
from reflector.hatchet.workflows.padding_workflow import padding_workflow
|
||||
from reflector.hatchet.workflows.subject_processing import subject_workflow
|
||||
from reflector.hatchet.workflows.topic_chunk_processing import topic_chunk_workflow
|
||||
from reflector.hatchet.workflows.track_processing import track_workflow
|
||||
from reflector.hatchet.workflows.transcription_workflow import transcription_workflow
|
||||
from reflector.logger import logger
|
||||
|
||||
SLOTS = 10
|
||||
@@ -29,7 +30,7 @@ def main():
|
||||
|
||||
llm_worker = hatchet.worker(
|
||||
WORKER_NAME,
|
||||
slots=SLOTS, # not all slots are probably used
|
||||
slots=SLOTS,
|
||||
labels={
|
||||
"pool": POOL,
|
||||
},
|
||||
@@ -37,7 +38,8 @@ def main():
|
||||
daily_multitrack_pipeline,
|
||||
topic_chunk_workflow,
|
||||
subject_workflow,
|
||||
track_workflow,
|
||||
padding_workflow,
|
||||
transcription_workflow,
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@@ -4,6 +4,10 @@ from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
PipelineInput,
|
||||
daily_multitrack_pipeline,
|
||||
)
|
||||
from reflector.hatchet.workflows.padding_workflow import (
|
||||
PaddingInput,
|
||||
padding_workflow,
|
||||
)
|
||||
from reflector.hatchet.workflows.subject_processing import (
|
||||
SubjectInput,
|
||||
subject_workflow,
|
||||
@@ -12,15 +16,20 @@ from reflector.hatchet.workflows.topic_chunk_processing import (
|
||||
TopicChunkInput,
|
||||
topic_chunk_workflow,
|
||||
)
|
||||
from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
|
||||
from reflector.hatchet.workflows.transcription_workflow import (
|
||||
TranscriptionInput,
|
||||
transcription_workflow,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"daily_multitrack_pipeline",
|
||||
"subject_workflow",
|
||||
"topic_chunk_workflow",
|
||||
"track_workflow",
|
||||
"padding_workflow",
|
||||
"transcription_workflow",
|
||||
"PipelineInput",
|
||||
"SubjectInput",
|
||||
"TopicChunkInput",
|
||||
"TrackInput",
|
||||
"PaddingInput",
|
||||
"TranscriptionInput",
|
||||
]
|
||||
|
||||
@@ -54,8 +54,9 @@ from reflector.hatchet.workflows.models import (
|
||||
PadTrackResult,
|
||||
ParticipantInfo,
|
||||
ParticipantsResult,
|
||||
ProcessPaddingsResult,
|
||||
ProcessSubjectsResult,
|
||||
ProcessTracksResult,
|
||||
ProcessTranscriptionsResult,
|
||||
RecapResult,
|
||||
RecordingResult,
|
||||
SubjectsResult,
|
||||
@@ -68,6 +69,7 @@ from reflector.hatchet.workflows.models import (
|
||||
WebhookResult,
|
||||
ZulipResult,
|
||||
)
|
||||
from reflector.hatchet.workflows.padding_workflow import PaddingInput, padding_workflow
|
||||
from reflector.hatchet.workflows.subject_processing import (
|
||||
SubjectInput,
|
||||
subject_workflow,
|
||||
@@ -76,7 +78,10 @@ from reflector.hatchet.workflows.topic_chunk_processing import (
|
||||
TopicChunkInput,
|
||||
topic_chunk_workflow,
|
||||
)
|
||||
from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
|
||||
from reflector.hatchet.workflows.transcription_workflow import (
|
||||
TranscriptionInput,
|
||||
transcription_workflow,
|
||||
)
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines import topic_processing
|
||||
from reflector.processors import AudioFileWriterProcessor
|
||||
@@ -322,7 +327,6 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
||||
mtg_session_id = recording.mtg_session_id
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptDuration,
|
||||
TranscriptParticipant,
|
||||
transcripts_controller,
|
||||
)
|
||||
@@ -331,26 +335,15 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
# Note: title NOT cleared - preserves existing titles
|
||||
# Duration from Daily API (seconds -> milliseconds) - master source
|
||||
duration_ms = recording.duration * 1000 if recording.duration else 0
|
||||
await transcripts_controller.update(
|
||||
transcript,
|
||||
{
|
||||
"events": [],
|
||||
"topics": [],
|
||||
"participants": [],
|
||||
"duration": duration_ms,
|
||||
},
|
||||
)
|
||||
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"DURATION",
|
||||
TranscriptDuration(duration=duration_ms),
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
mtg_session_id = assert_non_none_and_non_empty(
|
||||
mtg_session_id, "mtg_session_id is required"
|
||||
)
|
||||
@@ -416,72 +409,115 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
)
|
||||
@with_error_handling(TaskName.PROCESS_TRACKS)
|
||||
async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksResult:
|
||||
"""Spawn child workflows for each track (dynamic fan-out)."""
|
||||
ctx.log(f"process_tracks: spawning {len(input.tracks)} track workflows")
|
||||
|
||||
participants_result = ctx.task_output(get_participants)
|
||||
source_language = participants_result.source_language
|
||||
@with_error_handling(TaskName.PROCESS_PADDINGS)
|
||||
async def process_paddings(input: PipelineInput, ctx: Context) -> ProcessPaddingsResult:
|
||||
"""Spawn child workflows for each track to apply padding (dynamic fan-out)."""
|
||||
ctx.log(f"process_paddings: spawning {len(input.tracks)} padding workflows")
|
||||
|
||||
bulk_runs = [
|
||||
track_workflow.create_bulk_run_item(
|
||||
input=TrackInput(
|
||||
padding_workflow.create_bulk_run_item(
|
||||
input=PaddingInput(
|
||||
track_index=i,
|
||||
s3_key=track["s3_key"],
|
||||
bucket_name=input.bucket_name,
|
||||
transcript_id=input.transcript_id,
|
||||
language=source_language,
|
||||
)
|
||||
)
|
||||
for i, track in enumerate(input.tracks)
|
||||
]
|
||||
|
||||
results = await track_workflow.aio_run_many(bulk_runs)
|
||||
results = await padding_workflow.aio_run_many(bulk_runs)
|
||||
|
||||
target_language = participants_result.target_language
|
||||
|
||||
track_words: list[list[Word]] = []
|
||||
padded_tracks = []
|
||||
created_padded_files = set()
|
||||
created_padded_files = []
|
||||
|
||||
for result in results:
|
||||
transcribe_result = TranscribeTrackResult(**result[TaskName.TRANSCRIBE_TRACK])
|
||||
track_words.append(transcribe_result.words)
|
||||
|
||||
pad_result = PadTrackResult(**result[TaskName.PAD_TRACK])
|
||||
|
||||
# Store S3 key info (not presigned URL) - consumer tasks presign on demand
|
||||
if pad_result.padded_key:
|
||||
padded_tracks.append(
|
||||
PaddedTrackInfo(
|
||||
key=pad_result.padded_key, bucket_name=pad_result.bucket_name
|
||||
)
|
||||
padded_tracks.append(
|
||||
PaddedTrackInfo(
|
||||
key=pad_result.padded_key,
|
||||
bucket_name=pad_result.bucket_name,
|
||||
track_index=pad_result.track_index,
|
||||
)
|
||||
)
|
||||
|
||||
if pad_result.size > 0:
|
||||
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{pad_result.track_index}.webm"
|
||||
created_padded_files.add(storage_path)
|
||||
created_padded_files.append(storage_path)
|
||||
|
||||
all_words = [word for words in track_words for word in words]
|
||||
all_words.sort(key=lambda w: w.start)
|
||||
ctx.log(f"process_paddings complete: {len(padded_tracks)} padded tracks")
|
||||
|
||||
ctx.log(
|
||||
f"process_tracks complete: {len(all_words)} words from {len(input.tracks)} tracks"
|
||||
)
|
||||
|
||||
return ProcessTracksResult(
|
||||
all_words=all_words,
|
||||
return ProcessPaddingsResult(
|
||||
padded_tracks=padded_tracks,
|
||||
word_count=len(all_words),
|
||||
num_tracks=len(input.tracks),
|
||||
target_language=target_language,
|
||||
created_padded_files=list(created_padded_files),
|
||||
)
|
||||
|
||||
|
||||
@daily_multitrack_pipeline.task(
|
||||
parents=[process_tracks],
|
||||
parents=[process_paddings],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
)
|
||||
@with_error_handling(TaskName.PROCESS_TRANSCRIPTIONS)
|
||||
async def process_transcriptions(
|
||||
input: PipelineInput, ctx: Context
|
||||
) -> ProcessTranscriptionsResult:
|
||||
"""Spawn child workflows for each padded track to transcribe (dynamic fan-out)."""
|
||||
participants_result = ctx.task_output(get_participants)
|
||||
paddings_result = ctx.task_output(process_paddings)
|
||||
|
||||
source_language = participants_result.source_language
|
||||
if not source_language:
|
||||
raise ValueError("source_language is required for transcription")
|
||||
|
||||
target_language = participants_result.target_language
|
||||
padded_tracks = paddings_result.padded_tracks
|
||||
|
||||
if not padded_tracks:
|
||||
raise ValueError("No padded tracks available for transcription")
|
||||
|
||||
ctx.log(
|
||||
f"process_transcriptions: spawning {len(padded_tracks)} transcription workflows"
|
||||
)
|
||||
|
||||
bulk_runs = [
|
||||
transcription_workflow.create_bulk_run_item(
|
||||
input=TranscriptionInput(
|
||||
track_index=padded_track.track_index,
|
||||
padded_key=padded_track.key,
|
||||
bucket_name=padded_track.bucket_name,
|
||||
language=source_language,
|
||||
)
|
||||
)
|
||||
for padded_track in padded_tracks
|
||||
]
|
||||
|
||||
results = await transcription_workflow.aio_run_many(bulk_runs)
|
||||
|
||||
track_words: list[list[Word]] = []
|
||||
for result in results:
|
||||
transcribe_result = TranscribeTrackResult(**result[TaskName.TRANSCRIBE_TRACK])
|
||||
track_words.append(transcribe_result.words)
|
||||
|
||||
all_words = [word for words in track_words for word in words]
|
||||
all_words.sort(key=lambda w: w.start)
|
||||
|
||||
ctx.log(
|
||||
f"process_transcriptions complete: {len(all_words)} words from {len(padded_tracks)} tracks"
|
||||
)
|
||||
|
||||
return ProcessTranscriptionsResult(
|
||||
all_words=all_words,
|
||||
word_count=len(all_words),
|
||||
num_tracks=len(input.tracks),
|
||||
target_language=target_language,
|
||||
)
|
||||
|
||||
|
||||
@daily_multitrack_pipeline.task(
|
||||
parents=[process_paddings],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_AUDIO),
|
||||
retries=3,
|
||||
desired_worker_labels={
|
||||
@@ -501,12 +537,12 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
||||
)
|
||||
@with_error_handling(TaskName.MIXDOWN_TRACKS)
|
||||
async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
|
||||
"""Mix all padded tracks into single audio file using PyAV (same as Celery)."""
|
||||
"""Mix all padded tracks into single audio file using PyAV."""
|
||||
ctx.log("mixdown_tracks: mixing padded tracks into single audio file")
|
||||
|
||||
track_result = ctx.task_output(process_tracks)
|
||||
paddings_result = ctx.task_output(process_paddings)
|
||||
recording_result = ctx.task_output(get_recording)
|
||||
padded_tracks = track_result.padded_tracks
|
||||
padded_tracks = paddings_result.padded_tracks
|
||||
|
||||
# Dynamic timeout: scales with track count and recording duration
|
||||
# Base 300s + 60s per track + 1s per 10s of recording
|
||||
@@ -660,7 +696,7 @@ async def generate_waveform(input: PipelineInput, ctx: Context) -> WaveformResul
|
||||
|
||||
|
||||
@daily_multitrack_pipeline.task(
|
||||
parents=[process_tracks],
|
||||
parents=[process_transcriptions],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
)
|
||||
@@ -669,8 +705,8 @@ async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
|
||||
"""Detect topics using parallel child workflows (one per chunk)."""
|
||||
ctx.log("detect_topics: analyzing transcript for topics")
|
||||
|
||||
track_result = ctx.task_output(process_tracks)
|
||||
words = track_result.all_words
|
||||
transcriptions_result = ctx.task_output(process_transcriptions)
|
||||
words = transcriptions_result.all_words
|
||||
|
||||
if not words:
|
||||
ctx.log("detect_topics: no words, returning empty topics")
|
||||
@@ -1107,7 +1143,7 @@ async def identify_action_items(
|
||||
|
||||
|
||||
@daily_multitrack_pipeline.task(
|
||||
parents=[process_tracks, generate_title, generate_recap, identify_action_items],
|
||||
parents=[generate_title, generate_recap, identify_action_items],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
)
|
||||
@@ -1120,10 +1156,15 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
||||
"""
|
||||
ctx.log("finalize: saving transcript and setting status to 'ended'")
|
||||
|
||||
track_result = ctx.task_output(process_tracks)
|
||||
mixdown_result = ctx.task_output(mixdown_tracks)
|
||||
transcriptions_result = ctx.task_output(process_transcriptions)
|
||||
paddings_result = ctx.task_output(process_paddings)
|
||||
|
||||
duration = mixdown_result.duration
|
||||
all_words = transcriptions_result.all_words
|
||||
|
||||
# Cleanup temporary padded S3 files (deferred until finalize for semantic parity with Celery)
|
||||
created_padded_files = track_result.created_padded_files
|
||||
created_padded_files = paddings_result.created_padded_files
|
||||
if created_padded_files:
|
||||
ctx.log(f"Cleaning up {len(created_padded_files)} temporary S3 files")
|
||||
storage = _spawn_storage()
|
||||
@@ -1141,6 +1182,7 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptDuration,
|
||||
TranscriptText,
|
||||
transcripts_controller,
|
||||
)
|
||||
@@ -1149,6 +1191,8 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
||||
if transcript is None:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found in database")
|
||||
|
||||
merged_transcript = TranscriptType(words=all_words, translation=None)
|
||||
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
@@ -1160,15 +1204,21 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
# Clear workflow_run_id (workflow completed successfully)
|
||||
# Note: title/long_summary/short_summary/duration already saved by their callbacks
|
||||
# Save duration and clear workflow_run_id (workflow completed successfully)
|
||||
# Note: title/long_summary/short_summary already saved by their callbacks
|
||||
await transcripts_controller.update(
|
||||
transcript,
|
||||
{
|
||||
"duration": duration,
|
||||
"workflow_run_id": None, # Clear on success - no need to resume
|
||||
},
|
||||
)
|
||||
|
||||
duration_data = TranscriptDuration(duration=duration)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id, transcript, "DURATION", duration_data, logger=logger
|
||||
)
|
||||
|
||||
await set_status_and_broadcast(input.transcript_id, "ended", logger=logger)
|
||||
|
||||
ctx.log(
|
||||
|
||||
@@ -21,12 +21,14 @@ class ParticipantInfo(BaseModel):
|
||||
|
||||
|
||||
class PadTrackResult(BaseModel):
|
||||
"""Result from pad_track task."""
|
||||
"""Result from pad_track task.
|
||||
|
||||
padded_key: NonEmptyString # S3 key (not presigned URL) - presign on demand to avoid stale URLs on replay
|
||||
bucket_name: (
|
||||
NonEmptyString | None
|
||||
) # None means use default transcript storage bucket
|
||||
If size=0, track required no padding and padded_key contains original S3 key.
|
||||
If size>0, track was padded and padded_key contains new padded file S3 key.
|
||||
"""
|
||||
|
||||
padded_key: NonEmptyString
|
||||
bucket_name: NonEmptyString | None
|
||||
size: int
|
||||
track_index: int
|
||||
|
||||
@@ -59,18 +61,25 @@ class PaddedTrackInfo(BaseModel):
|
||||
"""Info for a padded track - S3 key + bucket for on-demand presigning."""
|
||||
|
||||
key: NonEmptyString
|
||||
bucket_name: NonEmptyString | None # None = use default storage bucket
|
||||
bucket_name: NonEmptyString | None
|
||||
track_index: int
|
||||
|
||||
|
||||
class ProcessTracksResult(BaseModel):
|
||||
"""Result from process_tracks task."""
|
||||
class ProcessPaddingsResult(BaseModel):
|
||||
"""Result from process_paddings task."""
|
||||
|
||||
padded_tracks: list[PaddedTrackInfo]
|
||||
num_tracks: int
|
||||
created_padded_files: list[NonEmptyString]
|
||||
|
||||
|
||||
class ProcessTranscriptionsResult(BaseModel):
|
||||
"""Result from process_transcriptions task."""
|
||||
|
||||
all_words: list[Word]
|
||||
padded_tracks: list[PaddedTrackInfo] # S3 keys, not presigned URLs
|
||||
word_count: int
|
||||
num_tracks: int
|
||||
target_language: NonEmptyString
|
||||
created_padded_files: list[NonEmptyString]
|
||||
|
||||
|
||||
class MixdownResult(BaseModel):
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
"""
|
||||
Hatchet child workflow: PaddingWorkflow
|
||||
Handles individual audio track padding via Modal.com backend.
|
||||
Handles individual audio track padding only.
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import av
|
||||
from hatchet_sdk import Context
|
||||
@@ -14,7 +16,10 @@ from reflector.hatchet.constants import TIMEOUT_AUDIO
|
||||
from reflector.hatchet.workflows.models import PadTrackResult
|
||||
from reflector.logger import logger
|
||||
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
|
||||
from reflector.utils.audio_padding import extract_stream_start_time_from_container
|
||||
from reflector.utils.audio_padding import (
|
||||
apply_audio_padding_to_file,
|
||||
extract_stream_start_time_from_container,
|
||||
)
|
||||
|
||||
|
||||
class PaddingInput(BaseModel):
|
||||
@@ -63,83 +68,61 @@ async def pad_track(input: PaddingInput, ctx: Context) -> PadTrackResult:
|
||||
bucket=input.bucket_name,
|
||||
)
|
||||
|
||||
# Extract start_time to determine if padding needed
|
||||
with av.open(source_url) as in_container:
|
||||
if in_container.duration:
|
||||
try:
|
||||
duration = timedelta(seconds=in_container.duration // 1_000_000)
|
||||
ctx.log(
|
||||
f"pad_track: track {input.track_index}, duration={duration}"
|
||||
)
|
||||
except (ValueError, TypeError, OverflowError) as e:
|
||||
ctx.log(
|
||||
f"pad_track: track {input.track_index}, duration error: {str(e)}"
|
||||
)
|
||||
with av.open(source_url) as in_container:
|
||||
if in_container.duration:
|
||||
try:
|
||||
duration = timedelta(seconds=in_container.duration // 1_000_000)
|
||||
ctx.log(
|
||||
f"pad_track: track {input.track_index}, duration={duration}"
|
||||
)
|
||||
except (ValueError, TypeError, OverflowError) as e:
|
||||
ctx.log(
|
||||
f"pad_track: track {input.track_index}, duration error: {str(e)}"
|
||||
)
|
||||
|
||||
start_time_seconds = extract_stream_start_time_from_container(
|
||||
in_container, input.track_index, logger=logger
|
||||
)
|
||||
|
||||
if start_time_seconds <= 0:
|
||||
logger.info(
|
||||
f"Track {input.track_index} requires no padding",
|
||||
track_index=input.track_index,
|
||||
)
|
||||
return PadTrackResult(
|
||||
padded_key=input.s3_key,
|
||||
bucket_name=input.bucket_name,
|
||||
size=0,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
if start_time_seconds <= 0:
|
||||
logger.info(
|
||||
f"Track {input.track_index} requires no padding",
|
||||
track_index=input.track_index,
|
||||
)
|
||||
return PadTrackResult(
|
||||
padded_key=input.s3_key,
|
||||
bucket_name=input.bucket_name,
|
||||
size=0,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
|
||||
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||
|
||||
# Presign PUT URL for output (Modal will upload directly)
|
||||
output_url = await storage.get_file_url(
|
||||
storage_path,
|
||||
operation="put_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
|
||||
temp_path = temp_file.name
|
||||
|
||||
import httpx # noqa: PLC0415
|
||||
try:
|
||||
apply_audio_padding_to_file(
|
||||
in_container,
|
||||
temp_path,
|
||||
start_time_seconds,
|
||||
input.track_index,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
|
||||
AudioPaddingModalProcessor,
|
||||
)
|
||||
file_size = Path(temp_path).stat().st_size
|
||||
|
||||
try:
|
||||
processor = AudioPaddingModalProcessor()
|
||||
result = await processor.pad_track(
|
||||
track_url=source_url,
|
||||
output_url=output_url,
|
||||
start_time_seconds=start_time_seconds,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
file_size = result.size
|
||||
with open(temp_path, "rb") as padded_file:
|
||||
await storage.put_file(storage_path, padded_file)
|
||||
|
||||
ctx.log(f"pad_track: Modal returned size={file_size}")
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_detail = e.response.text if hasattr(e.response, "text") else str(e)
|
||||
logger.error(
|
||||
"[Hatchet] Modal padding HTTP error",
|
||||
transcript_id=input.transcript_id,
|
||||
track_index=input.track_index,
|
||||
status_code=e.response.status_code if hasattr(e, "response") else None,
|
||||
error=error_detail,
|
||||
exc_info=True,
|
||||
)
|
||||
raise Exception(
|
||||
f"Modal padding failed: HTTP {e.response.status_code}"
|
||||
) from e
|
||||
except httpx.TimeoutException as e:
|
||||
logger.error(
|
||||
"[Hatchet] Modal padding timeout",
|
||||
transcript_id=input.transcript_id,
|
||||
track_index=input.track_index,
|
||||
error=str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
raise Exception("Modal padding timeout") from e
|
||||
logger.info(
|
||||
f"Uploaded padded track to S3",
|
||||
key=storage_path,
|
||||
size=file_size,
|
||||
)
|
||||
finally:
|
||||
Path(temp_path).unlink(missing_ok=True)
|
||||
|
||||
logger.info(
|
||||
"[Hatchet] pad_track complete",
|
||||
|
||||
@@ -1,205 +0,0 @@
|
||||
"""
|
||||
Hatchet child workflow: TrackProcessing
|
||||
|
||||
Handles individual audio track processing: padding and transcription.
|
||||
Spawned dynamically by the main diarization pipeline for each track.
|
||||
|
||||
Architecture note: This is a separate workflow (not inline tasks in DailyMultitrackPipeline)
|
||||
because Hatchet workflow DAGs are defined statically, but the number of tracks varies
|
||||
at runtime. Child workflow spawning via `aio_run()` + `asyncio.gather()` is the
|
||||
standard pattern for dynamic fan-out. See `process_tracks` in daily_multitrack_pipeline.py.
|
||||
|
||||
Note: This file uses deferred imports (inside tasks) intentionally.
|
||||
Hatchet workers run in forked processes; fresh imports per task ensure
|
||||
storage/DB connections are not shared across forks.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import av
|
||||
from hatchet_sdk import Context
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.constants import TIMEOUT_AUDIO, TIMEOUT_HEAVY
|
||||
from reflector.hatchet.workflows.models import PadTrackResult, TranscribeTrackResult
|
||||
from reflector.logger import logger
|
||||
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
|
||||
from reflector.utils.audio_padding import extract_stream_start_time_from_container
|
||||
|
||||
|
||||
class TrackInput(BaseModel):
|
||||
"""Input for individual track processing."""
|
||||
|
||||
track_index: int
|
||||
s3_key: str
|
||||
bucket_name: str
|
||||
transcript_id: str
|
||||
language: str = "en"
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
track_workflow = hatchet.workflow(name="TrackProcessing", input_validator=TrackInput)
|
||||
|
||||
|
||||
@track_workflow.task(execution_timeout=timedelta(seconds=TIMEOUT_AUDIO), retries=3)
|
||||
async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
|
||||
"""Pad single audio track with silence for alignment.
|
||||
|
||||
Extracts stream.start_time from WebM container metadata and applies
|
||||
silence padding using PyAV filter graph (adelay).
|
||||
"""
|
||||
ctx.log(f"pad_track: track {input.track_index}, s3_key={input.s3_key}")
|
||||
logger.info(
|
||||
"[Hatchet] pad_track",
|
||||
track_index=input.track_index,
|
||||
s3_key=input.s3_key,
|
||||
transcript_id=input.transcript_id,
|
||||
)
|
||||
|
||||
try:
|
||||
# Create fresh storage instance to avoid aioboto3 fork issues
|
||||
from reflector.settings import settings # noqa: PLC0415
|
||||
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
|
||||
|
||||
storage = AwsStorage(
|
||||
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
)
|
||||
|
||||
source_url = await storage.get_file_url(
|
||||
input.s3_key,
|
||||
operation="get_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
bucket=input.bucket_name,
|
||||
)
|
||||
|
||||
with av.open(source_url) as in_container:
|
||||
start_time_seconds = extract_stream_start_time_from_container(
|
||||
in_container, input.track_index, logger=logger
|
||||
)
|
||||
|
||||
# If no padding needed, return original S3 key
|
||||
if start_time_seconds <= 0:
|
||||
logger.info(
|
||||
f"Track {input.track_index} requires no padding",
|
||||
track_index=input.track_index,
|
||||
)
|
||||
return PadTrackResult(
|
||||
padded_key=input.s3_key,
|
||||
bucket_name=input.bucket_name,
|
||||
size=0,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
|
||||
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||
|
||||
# Presign PUT URL for output (Modal uploads directly)
|
||||
output_url = await storage.get_file_url(
|
||||
storage_path,
|
||||
operation="put_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
)
|
||||
|
||||
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
|
||||
AudioPaddingModalProcessor,
|
||||
)
|
||||
|
||||
processor = AudioPaddingModalProcessor()
|
||||
result = await processor.pad_track(
|
||||
track_url=source_url,
|
||||
output_url=output_url,
|
||||
start_time_seconds=start_time_seconds,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
file_size = result.size
|
||||
|
||||
ctx.log(f"pad_track complete: track {input.track_index} -> {storage_path}")
|
||||
logger.info(
|
||||
"[Hatchet] pad_track complete",
|
||||
track_index=input.track_index,
|
||||
padded_key=storage_path,
|
||||
)
|
||||
|
||||
# Return S3 key (not presigned URL) - consumer tasks presign on demand
|
||||
# This avoids stale URLs when workflow is replayed
|
||||
return PadTrackResult(
|
||||
padded_key=storage_path,
|
||||
bucket_name=None, # None = use default transcript storage bucket
|
||||
size=file_size,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("[Hatchet] pad_track failed", error=str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
@track_workflow.task(
|
||||
parents=[pad_track], execution_timeout=timedelta(seconds=TIMEOUT_HEAVY), retries=3
|
||||
)
|
||||
async def transcribe_track(input: TrackInput, ctx: Context) -> TranscribeTrackResult:
|
||||
"""Transcribe audio track using GPU (Modal.com) or local Whisper."""
|
||||
ctx.log(f"transcribe_track: track {input.track_index}, language={input.language}")
|
||||
logger.info(
|
||||
"[Hatchet] transcribe_track",
|
||||
track_index=input.track_index,
|
||||
language=input.language,
|
||||
)
|
||||
|
||||
try:
|
||||
pad_result = ctx.task_output(pad_track)
|
||||
padded_key = pad_result.padded_key
|
||||
bucket_name = pad_result.bucket_name
|
||||
|
||||
if not padded_key:
|
||||
raise ValueError("Missing padded_key from pad_track")
|
||||
|
||||
# Presign URL on demand (avoids stale URLs on workflow replay)
|
||||
from reflector.settings import settings # noqa: PLC0415
|
||||
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
|
||||
|
||||
storage = AwsStorage(
|
||||
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
)
|
||||
|
||||
audio_url = await storage.get_file_url(
|
||||
padded_key,
|
||||
operation="get_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
bucket=bucket_name,
|
||||
)
|
||||
|
||||
from reflector.pipelines.transcription_helpers import ( # noqa: PLC0415
|
||||
transcribe_file_with_processor,
|
||||
)
|
||||
|
||||
transcript = await transcribe_file_with_processor(audio_url, input.language)
|
||||
|
||||
# Tag all words with speaker index
|
||||
for word in transcript.words:
|
||||
word.speaker = input.track_index
|
||||
|
||||
ctx.log(
|
||||
f"transcribe_track complete: track {input.track_index}, {len(transcript.words)} words"
|
||||
)
|
||||
logger.info(
|
||||
"[Hatchet] transcribe_track complete",
|
||||
track_index=input.track_index,
|
||||
word_count=len(transcript.words),
|
||||
)
|
||||
|
||||
return TranscribeTrackResult(
|
||||
words=transcript.words,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("[Hatchet] transcribe_track failed", error=str(e), exc_info=True)
|
||||
raise
|
||||
98
server/reflector/hatchet/workflows/transcription_workflow.py
Normal file
98
server/reflector/hatchet/workflows/transcription_workflow.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Hatchet child workflow: TranscriptionWorkflow
|
||||
Handles individual audio track transcription only.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from hatchet_sdk import Context
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.constants import TIMEOUT_HEAVY
|
||||
from reflector.hatchet.workflows.models import TranscribeTrackResult
|
||||
from reflector.logger import logger
|
||||
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
|
||||
|
||||
|
||||
class TranscriptionInput(BaseModel):
|
||||
"""Input for individual track transcription."""
|
||||
|
||||
track_index: int
|
||||
padded_key: str # S3 key from padding step
|
||||
bucket_name: str | None # None = use default bucket
|
||||
language: str = "en"
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
transcription_workflow = hatchet.workflow(
|
||||
name="TranscriptionWorkflow", input_validator=TranscriptionInput
|
||||
)
|
||||
|
||||
|
||||
@transcription_workflow.task(
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY), retries=3
|
||||
)
|
||||
async def transcribe_track(
|
||||
input: TranscriptionInput, ctx: Context
|
||||
) -> TranscribeTrackResult:
|
||||
"""Transcribe audio track using GPU (Modal.com) or local Whisper."""
|
||||
ctx.log(f"transcribe_track: track {input.track_index}, language={input.language}")
|
||||
logger.info(
|
||||
"[Hatchet] transcribe_track",
|
||||
track_index=input.track_index,
|
||||
language=input.language,
|
||||
)
|
||||
|
||||
try:
|
||||
from reflector.settings import settings # noqa: PLC0415
|
||||
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
|
||||
|
||||
storage = AwsStorage(
|
||||
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
)
|
||||
|
||||
audio_url = await storage.get_file_url(
|
||||
input.padded_key,
|
||||
operation="get_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
bucket=input.bucket_name,
|
||||
)
|
||||
|
||||
from reflector.pipelines.transcription_helpers import ( # noqa: PLC0415
|
||||
transcribe_file_with_processor,
|
||||
)
|
||||
|
||||
transcript = await transcribe_file_with_processor(audio_url, input.language)
|
||||
|
||||
for word in transcript.words:
|
||||
word.speaker = input.track_index
|
||||
|
||||
ctx.log(
|
||||
f"transcribe_track complete: track {input.track_index}, {len(transcript.words)} words"
|
||||
)
|
||||
logger.info(
|
||||
"[Hatchet] transcribe_track complete",
|
||||
track_index=input.track_index,
|
||||
word_count=len(transcript.words),
|
||||
)
|
||||
|
||||
return TranscribeTrackResult(
|
||||
words=transcript.words,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[Hatchet] transcribe_track failed",
|
||||
track_index=input.track_index,
|
||||
padded_key=input.padded_key,
|
||||
language=input.language,
|
||||
error=str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
@@ -1,113 +0,0 @@
|
||||
"""
|
||||
Modal.com backend for audio padding.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.hatchet.constants import TIMEOUT_AUDIO
|
||||
from reflector.logger import logger
|
||||
|
||||
|
||||
class PaddingResponse(BaseModel):
|
||||
size: int
|
||||
cancelled: bool = False
|
||||
|
||||
|
||||
class AudioPaddingModalProcessor:
|
||||
"""Audio padding processor using Modal.com CPU backend via HTTP."""
|
||||
|
||||
def __init__(
|
||||
self, padding_url: str | None = None, modal_api_key: str | None = None
|
||||
):
|
||||
self.padding_url = padding_url or os.getenv("PADDING_URL")
|
||||
if not self.padding_url:
|
||||
raise ValueError(
|
||||
"PADDING_URL required to use AudioPaddingModalProcessor. "
|
||||
"Set PADDING_URL environment variable or pass padding_url parameter."
|
||||
)
|
||||
|
||||
self.modal_api_key = modal_api_key or os.getenv("MODAL_API_KEY")
|
||||
|
||||
async def pad_track(
|
||||
self,
|
||||
track_url: str,
|
||||
output_url: str,
|
||||
start_time_seconds: float,
|
||||
track_index: int,
|
||||
) -> PaddingResponse:
|
||||
"""Pad audio track with silence via Modal backend.
|
||||
|
||||
Args:
|
||||
track_url: Presigned GET URL for source audio track
|
||||
output_url: Presigned PUT URL for output WebM
|
||||
start_time_seconds: Amount of silence to prepend
|
||||
track_index: Track index for logging
|
||||
"""
|
||||
if not track_url:
|
||||
raise ValueError("track_url cannot be empty")
|
||||
if start_time_seconds <= 0:
|
||||
raise ValueError(
|
||||
f"start_time_seconds must be positive, got {start_time_seconds}"
|
||||
)
|
||||
|
||||
log = logger.bind(track_index=track_index, padding_seconds=start_time_seconds)
|
||||
log.info("Sending Modal padding HTTP request")
|
||||
|
||||
url = f"{self.padding_url}/pad"
|
||||
|
||||
headers = {}
|
||||
if self.modal_api_key:
|
||||
headers["Authorization"] = f"Bearer {self.modal_api_key}"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=TIMEOUT_AUDIO) as client:
|
||||
response = await client.post(
|
||||
url,
|
||||
headers=headers,
|
||||
json={
|
||||
"track_url": track_url,
|
||||
"output_url": output_url,
|
||||
"start_time_seconds": start_time_seconds,
|
||||
"track_index": track_index,
|
||||
},
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_body = response.text
|
||||
log.error(
|
||||
"Modal padding API error",
|
||||
status_code=response.status_code,
|
||||
error_body=error_body,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Check if work was cancelled
|
||||
if result.get("cancelled"):
|
||||
log.warning("Modal padding was cancelled by disconnect detection")
|
||||
raise asyncio.CancelledError(
|
||||
"Padding cancelled due to client disconnect"
|
||||
)
|
||||
|
||||
log.info("Modal padding complete", size=result["size"])
|
||||
return PaddingResponse(**result)
|
||||
except asyncio.CancelledError:
|
||||
log.warning(
|
||||
"Modal padding cancelled (Hatchet timeout, disconnect detected on Modal side)"
|
||||
)
|
||||
raise
|
||||
except httpx.TimeoutException as e:
|
||||
log.error("Modal padding timeout", error=str(e), exc_info=True)
|
||||
raise Exception(f"Modal padding timeout: {e}") from e
|
||||
except httpx.HTTPStatusError as e:
|
||||
log.error("Modal padding HTTP error", error=str(e), exc_info=True)
|
||||
raise Exception(f"Modal padding HTTP error: {e}") from e
|
||||
except Exception as e:
|
||||
log.error("Modal padding unexpected error", error=str(e), exc_info=True)
|
||||
raise
|
||||
@@ -98,10 +98,6 @@ class Settings(BaseSettings):
|
||||
# Diarization: local pyannote.audio
|
||||
DIARIZATION_PYANNOTE_AUTH_TOKEN: str | None = None
|
||||
|
||||
# Audio Padding (Modal.com backend)
|
||||
PADDING_URL: str | None = None
|
||||
PADDING_MODAL_API_KEY: str | None = None
|
||||
|
||||
# Sentry
|
||||
SENTRY_DSN: str | None = None
|
||||
|
||||
|
||||
@@ -5,9 +5,7 @@ Used by both Hatchet workflows and Celery pipelines for consistent audio encodin
|
||||
"""
|
||||
|
||||
# Opus codec settings
|
||||
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||
OPUS_STANDARD_SAMPLE_RATE = 48000
|
||||
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||
OPUS_DEFAULT_BIT_RATE = 128000 # 128kbps for good speech quality
|
||||
|
||||
# S3 presigned URL expiration
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import assert_never
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
@@ -14,10 +12,7 @@ from reflector.dailyco_api import (
|
||||
RecordingReadyEvent,
|
||||
RecordingStartedEvent,
|
||||
)
|
||||
from reflector.dailyco_api.recording_orphans import create_and_log_orphan
|
||||
from reflector.db.daily_recording_requests import daily_recording_requests_controller
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.logger import logger as _logger
|
||||
from reflector.settings import settings
|
||||
from reflector.video_platforms.factory import create_platform_client
|
||||
@@ -217,73 +212,10 @@ async def _handle_recording_ready(event: RecordingReadyEvent):
|
||||
|
||||
track_keys = [t.s3Key for t in tracks if t.type == "audio"]
|
||||
|
||||
# Lookup request
|
||||
match = await daily_recording_requests_controller.find_by_recording_id(
|
||||
recording_id
|
||||
)
|
||||
|
||||
if not match:
|
||||
await create_and_log_orphan(
|
||||
recording_id=recording_id,
|
||||
bucket_name=bucket_name,
|
||||
room_name=room_name,
|
||||
start_ts=event.payload.start_ts,
|
||||
track_keys=track_keys,
|
||||
source="webhook",
|
||||
)
|
||||
return
|
||||
|
||||
meeting_id, _ = match
|
||||
|
||||
# Verify meeting exists
|
||||
meeting = await meetings_controller.get_by_id(meeting_id)
|
||||
if not meeting:
|
||||
logger.error(
|
||||
"Meeting not found (webhook)",
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting_id,
|
||||
)
|
||||
await create_and_log_orphan(
|
||||
recording_id=recording_id,
|
||||
bucket_name=bucket_name,
|
||||
room_name=room_name,
|
||||
start_ts=event.payload.start_ts,
|
||||
track_keys=track_keys,
|
||||
source="webhook",
|
||||
)
|
||||
return
|
||||
|
||||
# Create recording atomically
|
||||
created = await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name=bucket_name,
|
||||
object_key=(
|
||||
os.path.dirname(track_keys[0]) if track_keys else room_name
|
||||
),
|
||||
recorded_at=datetime.fromtimestamp(
|
||||
event.payload.start_ts, tz=timezone.utc
|
||||
),
|
||||
track_keys=track_keys,
|
||||
meeting_id=meeting_id,
|
||||
status="pending",
|
||||
)
|
||||
)
|
||||
|
||||
if not created:
|
||||
# Already created (polling got it first)
|
||||
logger.debug(
|
||||
"Recording already exists (webhook late)",
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting_id,
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Raw-tracks recording queuing processing (webhook)",
|
||||
"Raw-tracks recording queuing processing",
|
||||
recording_id=recording_id,
|
||||
room_name=room_name,
|
||||
meeting_id=meeting_id,
|
||||
num_tracks=len(track_keys),
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Annotated, Any, Optional
|
||||
from uuid import UUID
|
||||
@@ -10,21 +9,16 @@ from pydantic import BaseModel
|
||||
import reflector.auth as auth
|
||||
from reflector.dailyco_api import RecordingType
|
||||
from reflector.dailyco_api.client import DailyApiError
|
||||
from reflector.db.daily_recording_requests import (
|
||||
DailyRecordingRequest,
|
||||
daily_recording_requests_controller,
|
||||
)
|
||||
from reflector.db.meetings import (
|
||||
MeetingConsent,
|
||||
meeting_consent_controller,
|
||||
meetings_controller,
|
||||
)
|
||||
from reflector.db.rooms import rooms_controller
|
||||
from reflector.logger import logger
|
||||
from reflector.utils.string import NonEmptyString
|
||||
from reflector.video_platforms.factory import create_platform_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@@ -108,6 +102,13 @@ async def start_recording(
|
||||
if not meeting:
|
||||
raise HTTPException(status_code=404, detail="Meeting not found")
|
||||
|
||||
log = logger.bind(
|
||||
meeting_id=meeting_id,
|
||||
room_name=meeting.room_name,
|
||||
recording_type=body.type,
|
||||
instance_id=body.instanceId,
|
||||
)
|
||||
|
||||
try:
|
||||
client = create_platform_client("daily")
|
||||
result = await client.start_recording(
|
||||
@@ -116,30 +117,9 @@ async def start_recording(
|
||||
instance_id=body.instanceId,
|
||||
)
|
||||
|
||||
recording_id = result["id"]
|
||||
log.info(f"Started {body.type} recording via REST API")
|
||||
|
||||
await daily_recording_requests_controller.create(
|
||||
DailyRecordingRequest(
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting_id,
|
||||
instance_id=body.instanceId,
|
||||
type=body.type,
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Started {body.type} recording via REST API",
|
||||
extra={
|
||||
"meeting_id": meeting_id,
|
||||
"room_name": meeting.room_name,
|
||||
"recording_type": body.type,
|
||||
"instance_id": body.instanceId,
|
||||
"recording_id": recording_id,
|
||||
},
|
||||
)
|
||||
|
||||
return {"status": "ok", "recording_id": recording_id}
|
||||
return {"status": "ok", "result": result}
|
||||
|
||||
except DailyApiError as e:
|
||||
# Parse Daily.co error response to detect "has an active stream"
|
||||
@@ -150,42 +130,22 @@ async def start_recording(
|
||||
# "has an active stream" means recording already started by another participant
|
||||
# This is SUCCESS from business logic perspective - return 200
|
||||
if "has an active stream" in error_info:
|
||||
logger.info(
|
||||
f"{body.type} recording already active (started by another participant)",
|
||||
extra={
|
||||
"meeting_id": meeting_id,
|
||||
"room_name": meeting.room_name,
|
||||
"recording_type": body.type,
|
||||
"instance_id": body.instanceId,
|
||||
},
|
||||
log.info(
|
||||
f"{body.type} recording already active (started by another participant)"
|
||||
)
|
||||
return {"status": "already_active", "instanceId": str(body.instanceId)}
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass # Fall through to error handling
|
||||
|
||||
# All other Daily.co API errors
|
||||
logger.error(
|
||||
f"Failed to start {body.type} recording",
|
||||
extra={
|
||||
"meeting_id": meeting_id,
|
||||
"recording_type": body.type,
|
||||
"error": str(e),
|
||||
},
|
||||
)
|
||||
log.error(f"Failed to start {body.type} recording", error=str(e))
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start recording: {str(e)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Non-Daily.co errors
|
||||
logger.error(
|
||||
f"Failed to start {body.type} recording",
|
||||
extra={
|
||||
"meeting_id": meeting_id,
|
||||
"recording_type": body.type,
|
||||
"error": str(e),
|
||||
},
|
||||
)
|
||||
log.error(f"Failed to start {body.type} recording", error=str(e))
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start recording: {str(e)}"
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Literal
|
||||
from urllib.parse import unquote
|
||||
@@ -12,12 +13,10 @@ from celery.utils.log import get_task_logger
|
||||
from pydantic import ValidationError
|
||||
|
||||
from reflector.dailyco_api import FinishedRecordingResponse, RecordingResponse
|
||||
from reflector.dailyco_api.recording_orphans import create_and_log_orphan
|
||||
from reflector.db.daily_participant_sessions import (
|
||||
DailyParticipantSession,
|
||||
daily_participant_sessions_controller,
|
||||
)
|
||||
from reflector.db.daily_recording_requests import daily_recording_requests_controller
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.db.rooms import rooms_controller
|
||||
@@ -231,44 +230,79 @@ async def _process_multitrack_recording_inner(
|
||||
recording_start_ts: int,
|
||||
):
|
||||
"""
|
||||
Process multitrack recording.
|
||||
Process multitrack recording (first time or reprocessing).
|
||||
|
||||
Recording must already exist with meeting_id set (created by webhook/polling before queueing).
|
||||
For first processing (webhook/polling):
|
||||
- Uses recording_start_ts for time-based meeting matching (no instanceId available)
|
||||
|
||||
For reprocessing:
|
||||
- Uses recording.meeting_id directly (already linked during first processing)
|
||||
- recording_start_ts is ignored
|
||||
"""
|
||||
|
||||
# Get recording (must exist - created by webhook/polling)
|
||||
tz = timezone.utc
|
||||
recorded_at = datetime.now(tz)
|
||||
try:
|
||||
if track_keys:
|
||||
folder = os.path.basename(os.path.dirname(track_keys[0]))
|
||||
ts_match = re.search(r"(\d{14})$", folder)
|
||||
if ts_match:
|
||||
ts = ts_match.group(1)
|
||||
recorded_at = datetime.strptime(ts, "%Y%m%d%H%M%S").replace(tzinfo=tz)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Could not parse recorded_at from keys, using now() {recorded_at}",
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Check if recording already exists (reprocessing path)
|
||||
recording = await recordings_controller.get_by_id(recording_id)
|
||||
|
||||
if not recording:
|
||||
logger.error(
|
||||
"Recording not found - should have been created by webhook/polling",
|
||||
recording_id=recording_id,
|
||||
)
|
||||
return
|
||||
if recording and recording.meeting_id:
|
||||
# Reprocessing: recording exists with meeting already linked
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
if not meeting:
|
||||
logger.error(
|
||||
"Reprocessing: meeting not found for recording - skipping",
|
||||
meeting_id=recording.meeting_id,
|
||||
recording_id=recording_id,
|
||||
)
|
||||
return
|
||||
|
||||
if not recording.meeting_id:
|
||||
logger.error(
|
||||
"Recording has no meeting_id - orphan should not be queued",
|
||||
logger.info(
|
||||
"Reprocessing: using existing recording.meeting_id",
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting.id,
|
||||
room_name=daily_room_name,
|
||||
)
|
||||
return
|
||||
|
||||
# Get meeting
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
if not meeting:
|
||||
logger.error(
|
||||
"Meeting not found for recording",
|
||||
meeting_id=recording.meeting_id,
|
||||
else:
|
||||
# First processing: recording doesn't exist, need time-based matching
|
||||
# (Daily.co doesn't return instanceId in API, must match by timestamp)
|
||||
recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc)
|
||||
meeting = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name=daily_room_name,
|
||||
recording_start=recording_start,
|
||||
time_window_hours=168, # 1 week
|
||||
)
|
||||
if not meeting:
|
||||
logger.error(
|
||||
"Raw-tracks: no meeting found within 1-week window (time-based match) - skipping",
|
||||
recording_id=recording_id,
|
||||
room_name=daily_room_name,
|
||||
recording_start_ts=recording_start_ts,
|
||||
recording_start=recording_start.isoformat(),
|
||||
)
|
||||
return # Skip processing, will retry on next poll
|
||||
logger.info(
|
||||
"First processing: found meeting via time-based matching",
|
||||
meeting_id=meeting.id,
|
||||
room_name=daily_room_name,
|
||||
recording_id=recording_id,
|
||||
time_delta_seconds=abs(
|
||||
(meeting.start_date - recording_start).total_seconds()
|
||||
),
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Processing multitrack recording",
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting.id,
|
||||
room_name=daily_room_name,
|
||||
)
|
||||
|
||||
room_name_base = extract_base_room_name(daily_room_name)
|
||||
|
||||
@@ -276,6 +310,33 @@ async def _process_multitrack_recording_inner(
|
||||
if not room:
|
||||
raise Exception(f"Room not found: {room_name_base}")
|
||||
|
||||
if not recording:
|
||||
# Create recording (only happens during first processing)
|
||||
object_key_dir = os.path.dirname(track_keys[0]) if track_keys else ""
|
||||
recording = await recordings_controller.create(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name=bucket_name,
|
||||
object_key=object_key_dir,
|
||||
recorded_at=recorded_at,
|
||||
meeting_id=meeting.id,
|
||||
track_keys=track_keys,
|
||||
)
|
||||
)
|
||||
elif not recording.meeting_id:
|
||||
# Recording exists but meeting_id is null (failed first processing)
|
||||
# Update with meeting from time-based matching
|
||||
await recordings_controller.set_meeting_id(
|
||||
recording_id=recording.id,
|
||||
meeting_id=meeting.id,
|
||||
)
|
||||
recording.meeting_id = meeting.id
|
||||
logger.info(
|
||||
"Updated existing recording with meeting_id",
|
||||
recording_id=recording.id,
|
||||
meeting_id=meeting.id,
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
||||
if not transcript:
|
||||
transcript = await transcripts_controller.add(
|
||||
@@ -461,7 +522,7 @@ async def store_cloud_recording(
|
||||
Store cloud recording reference in meeting table.
|
||||
|
||||
Common function for both webhook and polling code paths.
|
||||
Uses direct recording_id lookup via daily_recording_requests table.
|
||||
Uses time-based matching to handle duplicate room_name values.
|
||||
|
||||
Args:
|
||||
recording_id: Daily.co recording ID
|
||||
@@ -474,170 +535,155 @@ async def store_cloud_recording(
|
||||
Returns:
|
||||
True if stored, False if skipped/failed
|
||||
"""
|
||||
# Lookup request
|
||||
match = await daily_recording_requests_controller.find_by_recording_id(recording_id)
|
||||
recording_start = datetime.fromtimestamp(start_ts, tz=timezone.utc)
|
||||
|
||||
if not match:
|
||||
# ORPHAN: No request found (pre-migration recording or failed request creation)
|
||||
await create_and_log_orphan(
|
||||
meeting = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name=room_name,
|
||||
recording_start=recording_start,
|
||||
time_window_hours=168, # 1 week
|
||||
)
|
||||
|
||||
if not meeting:
|
||||
logger.warning(
|
||||
f"Cloud recording ({source}): no meeting found within 1-week window",
|
||||
recording_id=recording_id,
|
||||
bucket_name="",
|
||||
room_name=room_name,
|
||||
start_ts=start_ts,
|
||||
track_keys=None,
|
||||
source=source,
|
||||
recording_start_ts=start_ts,
|
||||
recording_start=recording_start.isoformat(),
|
||||
)
|
||||
return False
|
||||
|
||||
meeting_id, _ = match
|
||||
|
||||
success = await meetings_controller.set_cloud_recording_if_missing(
|
||||
meeting_id=meeting_id,
|
||||
meeting_id=meeting.id,
|
||||
s3_key=s3_key,
|
||||
duration=duration,
|
||||
)
|
||||
|
||||
if not success:
|
||||
logger.debug(
|
||||
f"Cloud recording ({source}): already set (stop/restart?)",
|
||||
f"Cloud recording ({source}): already set (race lost)",
|
||||
recording_id=recording_id,
|
||||
room_name=room_name,
|
||||
meeting_id=meeting_id,
|
||||
meeting_id=meeting.id,
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
f"Cloud recording stored via {source}",
|
||||
meeting_id=meeting_id,
|
||||
f"Cloud recording stored via {source} (time-based match)",
|
||||
meeting_id=meeting.id,
|
||||
recording_id=recording_id,
|
||||
s3_key=s3_key,
|
||||
duration=duration,
|
||||
time_delta_seconds=abs((meeting.start_date - recording_start).total_seconds()),
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingResponse]):
|
||||
"""Process cloud recordings (database deduplication, worker-agnostic).
|
||||
"""
|
||||
Store cloud recordings missing from meeting table via polling.
|
||||
|
||||
Cloud recordings stored in meeting.daily_composed_video_s3_key, not recording table.
|
||||
Only first cloud recording per meeting is kept (existing behavior).
|
||||
Uses time-based matching via store_cloud_recording().
|
||||
"""
|
||||
if not cloud_recordings:
|
||||
return
|
||||
|
||||
for rec in cloud_recordings:
|
||||
# Lookup request
|
||||
match = await daily_recording_requests_controller.find_by_recording_id(rec.id)
|
||||
|
||||
if not match:
|
||||
await create_and_log_orphan(
|
||||
recording_id=rec.id,
|
||||
bucket_name="",
|
||||
room_name=rec.room_name,
|
||||
start_ts=rec.start_ts,
|
||||
track_keys=None,
|
||||
source="polling",
|
||||
)
|
||||
continue
|
||||
|
||||
meeting_id, _ = match
|
||||
|
||||
if not rec.s3key:
|
||||
logger.error("Cloud recording missing s3_key", recording_id=rec.id)
|
||||
continue
|
||||
|
||||
# Store in meeting table (atomic, only if not already set)
|
||||
success = await meetings_controller.set_cloud_recording_if_missing(
|
||||
meeting_id=meeting_id,
|
||||
s3_key=rec.s3key,
|
||||
duration=rec.duration,
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info(
|
||||
"Stored cloud recording", recording_id=rec.id, meeting_id=meeting_id
|
||||
)
|
||||
else:
|
||||
stored_count = 0
|
||||
for recording in cloud_recordings:
|
||||
# Extract S3 key from recording (cloud recordings use s3key field)
|
||||
s3_key = recording.s3key or (recording.s3.key if recording.s3 else None)
|
||||
if not s3_key:
|
||||
logger.warning(
|
||||
"Cloud recording already exists for meeting (stop/restart?)",
|
||||
recording_id=rec.id,
|
||||
meeting_id=meeting_id,
|
||||
"Cloud recording: missing S3 key",
|
||||
recording_id=recording.id,
|
||||
room_name=recording.room_name,
|
||||
)
|
||||
continue
|
||||
|
||||
stored = await store_cloud_recording(
|
||||
recording_id=recording.id,
|
||||
room_name=recording.room_name,
|
||||
s3_key=s3_key,
|
||||
duration=recording.duration,
|
||||
start_ts=recording.start_ts,
|
||||
source="polling",
|
||||
)
|
||||
if stored:
|
||||
stored_count += 1
|
||||
|
||||
logger.info(
|
||||
"Cloud recording polling complete",
|
||||
total=len(cloud_recordings),
|
||||
stored=stored_count,
|
||||
)
|
||||
|
||||
|
||||
async def _poll_raw_tracks_recordings(
|
||||
raw_tracks_recordings: List[FinishedRecordingResponse],
|
||||
bucket_name: NonEmptyString,
|
||||
) -> None:
|
||||
"""Process raw-tracks (database deduplication, worker-agnostic)."""
|
||||
bucket_name: str,
|
||||
):
|
||||
"""Queue raw-tracks recordings missing from DB (existing logic)."""
|
||||
if not raw_tracks_recordings:
|
||||
return
|
||||
|
||||
for rec in raw_tracks_recordings:
|
||||
# Lookup request FIRST (before any DB writes)
|
||||
match = await daily_recording_requests_controller.find_by_recording_id(rec.id)
|
||||
recording_ids = [rec.id for rec in raw_tracks_recordings]
|
||||
existing_recordings = await recordings_controller.get_by_ids(recording_ids)
|
||||
existing_ids = {rec.id for rec in existing_recordings}
|
||||
|
||||
if not match:
|
||||
await create_and_log_orphan(
|
||||
recording_id=rec.id,
|
||||
bucket_name=bucket_name,
|
||||
room_name=rec.room_name,
|
||||
start_ts=rec.start_ts,
|
||||
track_keys=[t.s3Key for t in rec.tracks if t.type == "audio"],
|
||||
source="polling",
|
||||
missing_recordings = [
|
||||
rec for rec in raw_tracks_recordings if rec.id not in existing_ids
|
||||
]
|
||||
|
||||
if not missing_recordings:
|
||||
logger.debug(
|
||||
"All raw-tracks recordings already in DB",
|
||||
api_count=len(raw_tracks_recordings),
|
||||
existing_count=len(existing_recordings),
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Found raw-tracks recordings missing from DB",
|
||||
missing_count=len(missing_recordings),
|
||||
total_api_count=len(raw_tracks_recordings),
|
||||
existing_count=len(existing_recordings),
|
||||
)
|
||||
|
||||
for recording in missing_recordings:
|
||||
if not recording.tracks:
|
||||
logger.warning(
|
||||
"Finished raw-tracks recording has no tracks (no audio captured)",
|
||||
recording_id=recording.id,
|
||||
room_name=recording.room_name,
|
||||
)
|
||||
continue
|
||||
|
||||
meeting_id, _ = match
|
||||
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
||||
|
||||
# Verify meeting exists
|
||||
meeting = await meetings_controller.get_by_id(meeting_id)
|
||||
if not meeting:
|
||||
logger.error(
|
||||
"Meeting not found", recording_id=rec.id, meeting_id=meeting_id
|
||||
)
|
||||
await create_and_log_orphan(
|
||||
recording_id=rec.id,
|
||||
bucket_name=bucket_name,
|
||||
room_name=rec.room_name,
|
||||
start_ts=rec.start_ts,
|
||||
track_keys=[t.s3Key for t in rec.tracks if t.type == "audio"],
|
||||
source="polling",
|
||||
if not track_keys:
|
||||
logger.warning(
|
||||
"No audio tracks found in raw-tracks recording",
|
||||
recording_id=recording.id,
|
||||
room_name=recording.room_name,
|
||||
total_tracks=len(recording.tracks),
|
||||
)
|
||||
continue
|
||||
|
||||
# DEDUPLICATION: Atomically create recording (single operation, no race window)
|
||||
# ON CONFLICT → concurrent poller already got it, skip entire logic
|
||||
track_keys = [t.s3Key for t in rec.tracks if t.type == "audio"]
|
||||
|
||||
created = await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id=rec.id,
|
||||
bucket_name=bucket_name,
|
||||
object_key=os.path.dirname(track_keys[0]) if track_keys else "",
|
||||
recorded_at=datetime.fromtimestamp(rec.start_ts, tz=timezone.utc),
|
||||
track_keys=track_keys,
|
||||
meeting_id=meeting_id, # Set at creation (constraint-safe)
|
||||
status="pending",
|
||||
)
|
||||
logger.info(
|
||||
"Queueing missing raw-tracks recording for processing",
|
||||
recording_id=recording.id,
|
||||
room_name=recording.room_name,
|
||||
track_count=len(track_keys),
|
||||
)
|
||||
|
||||
if not created:
|
||||
# Conflict: another poller already created/queued this
|
||||
# Skip all remaining logic (match already done by winner)
|
||||
continue
|
||||
|
||||
# Only winner reaches here - queue processing (works with Celery or Hatchet)
|
||||
process_multitrack_recording.delay(
|
||||
recording_id=rec.id,
|
||||
daily_room_name=rec.room_name,
|
||||
recording_start_ts=rec.start_ts,
|
||||
bucket_name=bucket_name,
|
||||
daily_room_name=recording.room_name,
|
||||
recording_id=recording.id,
|
||||
track_keys=track_keys,
|
||||
recording_start_ts=recording.start_ts,
|
||||
)
|
||||
|
||||
logger.info("Queued recording", recording_id=rec.id, meeting_id=meeting_id)
|
||||
|
||||
|
||||
async def poll_daily_room_presence(meeting_id: str) -> None:
|
||||
"""Poll Daily.co room presence and reconcile with DB sessions. New presence is added, old presence is marked as closed.
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test script to fetch Daily.co recordings for a specific room and show raw API response."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
from reflector.video_platforms.factory import create_platform_client
|
||||
|
||||
|
||||
async def main():
|
||||
room_name = "daily-private-igor-20260110042117"
|
||||
|
||||
print(f"\n=== Fetching recordings for room: {room_name} ===\n")
|
||||
|
||||
async with create_platform_client("daily") as client:
|
||||
recordings = await client.list_recordings(room_name=room_name)
|
||||
|
||||
print(f"Found {len(recordings)} recording objects from Daily.co API\n")
|
||||
|
||||
for i, rec in enumerate(recordings, 1):
|
||||
print(f"--- Recording #{i} ---")
|
||||
print(f"ID: {rec.id}")
|
||||
print(f"Room: {rec.room_name}")
|
||||
print(f"Start TS: {rec.start_ts}")
|
||||
print(f"Status: {rec.status}")
|
||||
print(f"Duration: {rec.duration}")
|
||||
print(f"Type: {rec.type}")
|
||||
print(f"Tracks count: {len(rec.tracks)}")
|
||||
|
||||
if rec.tracks:
|
||||
print(f"Tracks:")
|
||||
for j, track in enumerate(rec.tracks, 1):
|
||||
print(f" Track {j}: {track.s3Key}")
|
||||
|
||||
print(f"\nRaw JSON:\n{json.dumps(rec.model_dump(), indent=2, default=str)}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,258 +0,0 @@
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.db.daily_recording_requests import (
|
||||
DailyRecordingRequest,
|
||||
daily_recording_requests_controller,
|
||||
)
|
||||
from reflector.db.meetings import Meeting, meetings_controller
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.db.rooms import Room, rooms_controller
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_request():
|
||||
"""Test creating a recording request."""
|
||||
# Create meeting first
|
||||
room = Room(id="test-room", name="Test Room", slug="test-room", user_id="test-user")
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting = Meeting(
|
||||
id="meeting-123",
|
||||
room_name="test-room",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="cloud",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
request = DailyRecordingRequest(
|
||||
recording_id="rec-1",
|
||||
meeting_id="meeting-123",
|
||||
instance_id=UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890"),
|
||||
type="cloud",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
await daily_recording_requests_controller.create(request)
|
||||
|
||||
result = await daily_recording_requests_controller.find_by_recording_id("rec-1")
|
||||
assert result is not None
|
||||
assert result[0] == "meeting-123"
|
||||
assert result[1] == "cloud"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_recordings_same_meeting():
|
||||
"""Test stop/restart creates multiple request rows."""
|
||||
# Create room and meeting
|
||||
room = Room(
|
||||
id="test-room-2", name="Test Room 2", slug="test-room-2", user_id="test-user"
|
||||
)
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting_id = "meeting-456"
|
||||
meeting = Meeting(
|
||||
id=meeting_id,
|
||||
room_name="test-room-2",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="cloud",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
instance_id = UUID("b1c2d3e4-f5a6-7890-abcd-ef1234567890")
|
||||
|
||||
# First recording
|
||||
await daily_recording_requests_controller.create(
|
||||
DailyRecordingRequest(
|
||||
recording_id="rec-1",
|
||||
meeting_id=meeting_id,
|
||||
instance_id=instance_id,
|
||||
type="cloud",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
# Stop, then restart (new recording_id, same instance_id)
|
||||
await daily_recording_requests_controller.create(
|
||||
DailyRecordingRequest(
|
||||
recording_id="rec-2", # DIFFERENT
|
||||
meeting_id=meeting_id,
|
||||
instance_id=instance_id, # SAME
|
||||
type="cloud",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
# Both exist
|
||||
requests = await daily_recording_requests_controller.get_by_meeting_id(meeting_id)
|
||||
assert len(requests) == 2
|
||||
assert {r.recording_id for r in requests} == {"rec-1", "rec-2"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deduplication_via_database():
|
||||
"""Test concurrent pollers use database for deduplication."""
|
||||
# Create room and meeting
|
||||
room = Room(
|
||||
id="test-room-3", name="Test Room 3", slug="test-room-3", user_id="test-user"
|
||||
)
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting = Meeting(
|
||||
id="meeting-789",
|
||||
room_name="test-room-3",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="raw-tracks",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
recording_id = "rec-123"
|
||||
|
||||
# Poller 1
|
||||
created1 = await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name="test-bucket",
|
||||
object_key="test-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id="meeting-789",
|
||||
status="pending",
|
||||
track_keys=["track1.webm", "track2.webm"],
|
||||
)
|
||||
)
|
||||
assert created1 is True # First wins
|
||||
|
||||
# Poller 2 (concurrent)
|
||||
created2 = await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name="test-bucket",
|
||||
object_key="test-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id="meeting-789",
|
||||
status="pending",
|
||||
track_keys=["track1.webm", "track2.webm"],
|
||||
)
|
||||
)
|
||||
assert created2 is False # Conflict, skip
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_orphan_logged_once():
|
||||
"""Test orphan marked once, skipped on re-poll."""
|
||||
# First poll
|
||||
created1 = await recordings_controller.create_orphan(
|
||||
Recording(
|
||||
id="orphan-123",
|
||||
bucket_name="test-bucket",
|
||||
object_key="orphan-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=None,
|
||||
status="orphan",
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
assert created1 is True
|
||||
|
||||
# Second poll (same orphan discovered again)
|
||||
created2 = await recordings_controller.create_orphan(
|
||||
Recording(
|
||||
id="orphan-123",
|
||||
bucket_name="test-bucket",
|
||||
object_key="orphan-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=None,
|
||||
status="orphan",
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
assert created2 is False # Already exists
|
||||
|
||||
# Verify it exists
|
||||
existing = await recordings_controller.get_by_id("orphan-123")
|
||||
assert existing is not None
|
||||
assert existing.status == "orphan"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_orphan_constraints():
|
||||
"""Test orphan invariants are enforced."""
|
||||
# Can't create orphan with meeting_id
|
||||
with pytest.raises(AssertionError, match="meeting_id must be NULL"):
|
||||
await recordings_controller.create_orphan(
|
||||
Recording(
|
||||
id="bad-orphan-1",
|
||||
bucket_name="test",
|
||||
object_key="test",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id="meeting-123", # Should be None
|
||||
status="orphan",
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
|
||||
# Can't create orphan with wrong status
|
||||
with pytest.raises(AssertionError, match="status must be 'orphan'"):
|
||||
await recordings_controller.create_orphan(
|
||||
Recording(
|
||||
id="bad-orphan-2",
|
||||
bucket_name="test",
|
||||
object_key="test",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=None,
|
||||
status="pending", # Should be "orphan"
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_try_create_with_meeting_constraints():
|
||||
"""Test try_create_with_meeting enforces constraints."""
|
||||
# Create room and meeting
|
||||
room = Room(
|
||||
id="test-room-4", name="Test Room 4", slug="test-room-4", user_id="test-user"
|
||||
)
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting = Meeting(
|
||||
id="meeting-999",
|
||||
room_name="test-room-4",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="cloud",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
# Can't create with orphan status
|
||||
with pytest.raises(AssertionError, match="use create_orphan"):
|
||||
await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id="bad-rec-1",
|
||||
bucket_name="test",
|
||||
object_key="test",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id="meeting-999",
|
||||
status="orphan", # Should not be orphan
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
|
||||
# Can't create without meeting_id
|
||||
with pytest.raises(AssertionError, match="meeting_id required"):
|
||||
await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id="bad-rec-2",
|
||||
bucket_name="test",
|
||||
object_key="test",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=None, # Should have meeting_id
|
||||
status="pending",
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
@@ -1,300 +0,0 @@
|
||||
"""
|
||||
Integration tests for recording request flow.
|
||||
|
||||
These tests verify the end-to-end flow of:
|
||||
1. Starting a recording (creates request)
|
||||
2. Webhook/polling discovering recording (matches via request)
|
||||
3. Recording processing (uses existing meeting_id)
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.db.daily_recording_requests import (
|
||||
DailyRecordingRequest,
|
||||
daily_recording_requests_controller,
|
||||
)
|
||||
from reflector.db.meetings import Meeting, meetings_controller
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.db.rooms import Room, rooms_controller
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recording_request_flow_cloud(client):
|
||||
"""Test full cloud recording flow: start -> webhook -> match"""
|
||||
# Create room and meeting
|
||||
room = Room(id="test-room", name="Test Room", slug="test-room", user_id="test-user")
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting_id = f"meeting-{uuid4()}"
|
||||
meeting = Meeting(
|
||||
id=meeting_id,
|
||||
room_name="test-room",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="cloud",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
# Simulate recording start (what endpoint does)
|
||||
recording_id = "rec-cloud-123"
|
||||
instance_id = UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890")
|
||||
|
||||
request = DailyRecordingRequest(
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting_id,
|
||||
instance_id=instance_id,
|
||||
type="cloud",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
await daily_recording_requests_controller.create(request)
|
||||
|
||||
# Verify request exists
|
||||
match = await daily_recording_requests_controller.find_by_recording_id(recording_id)
|
||||
assert match is not None
|
||||
assert match[0] == meeting_id
|
||||
assert match[1] == "cloud"
|
||||
|
||||
# Simulate webhook/polling storing cloud recording
|
||||
success = await meetings_controller.set_cloud_recording_if_missing(
|
||||
meeting_id=meeting_id,
|
||||
s3_key="s3://bucket/recording.mp4",
|
||||
duration=120,
|
||||
)
|
||||
assert success is True
|
||||
|
||||
# Verify meeting updated
|
||||
updated_meeting = await meetings_controller.get_by_id(meeting_id)
|
||||
assert updated_meeting.daily_composed_video_s3_key == "s3://bucket/recording.mp4"
|
||||
assert updated_meeting.daily_composed_video_duration == 120
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recording_request_flow_raw_tracks(client):
|
||||
"""Test full raw-tracks recording flow: start -> webhook/polling -> process"""
|
||||
# Create room and meeting
|
||||
room = Room(
|
||||
id="test-room-2",
|
||||
name="Test Room 2",
|
||||
slug="test-room-2",
|
||||
user_id="test-user",
|
||||
)
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting_id = f"meeting-{uuid4()}"
|
||||
meeting = Meeting(
|
||||
id=meeting_id,
|
||||
room_name="test-room-2",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="raw-tracks",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
# Simulate recording start
|
||||
recording_id = "rec-raw-456"
|
||||
instance_id = UUID("b1c2d3e4-f5a6-7890-abcd-ef1234567890")
|
||||
|
||||
request = DailyRecordingRequest(
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting_id,
|
||||
instance_id=instance_id,
|
||||
type="raw-tracks",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
await daily_recording_requests_controller.create(request)
|
||||
|
||||
# Simulate webhook/polling discovering recording
|
||||
match = await daily_recording_requests_controller.find_by_recording_id(recording_id)
|
||||
assert match is not None
|
||||
found_meeting_id, recording_type = match
|
||||
assert found_meeting_id == meeting_id
|
||||
assert recording_type == "raw-tracks"
|
||||
|
||||
# Create recording (what webhook/polling does)
|
||||
created = await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name="test-bucket",
|
||||
object_key="recordings/20260120/",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
track_keys=["track1.webm", "track2.webm"],
|
||||
meeting_id=meeting_id,
|
||||
status="pending",
|
||||
)
|
||||
)
|
||||
assert created is True
|
||||
|
||||
# Verify recording exists with meeting_id
|
||||
recording = await recordings_controller.get_by_id(recording_id)
|
||||
assert recording is not None
|
||||
assert recording.meeting_id == meeting_id
|
||||
assert recording.status == "pending"
|
||||
assert len(recording.track_keys) == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_restart_creates_multiple_requests(client):
|
||||
"""Test stop/restart creates multiple request rows with same instance_id"""
|
||||
# Create room and meeting
|
||||
room = Room(
|
||||
id="test-room-3",
|
||||
name="Test Room 3",
|
||||
slug="test-room-3",
|
||||
user_id="test-user",
|
||||
)
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting_id = f"meeting-{uuid4()}"
|
||||
meeting = Meeting(
|
||||
id=meeting_id,
|
||||
room_name="test-room-3",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="cloud",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
instance_id = UUID("c1d2e3f4-a5b6-7890-abcd-ef1234567890")
|
||||
|
||||
# First recording
|
||||
await daily_recording_requests_controller.create(
|
||||
DailyRecordingRequest(
|
||||
recording_id="rec-first",
|
||||
meeting_id=meeting_id,
|
||||
instance_id=instance_id,
|
||||
type="cloud",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
# Stop, then restart (new recording_id, same instance_id)
|
||||
await daily_recording_requests_controller.create(
|
||||
DailyRecordingRequest(
|
||||
recording_id="rec-second", # DIFFERENT
|
||||
meeting_id=meeting_id,
|
||||
instance_id=instance_id, # SAME
|
||||
type="cloud",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
# Both exist
|
||||
requests = await daily_recording_requests_controller.get_by_meeting_id(meeting_id)
|
||||
assert len(requests) == 2
|
||||
assert {r.recording_id for r in requests} == {"rec-first", "rec-second"}
|
||||
assert all(r.instance_id == instance_id for r in requests)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_orphan_recording_no_request(client):
|
||||
"""Test orphan recording (no request found)"""
|
||||
# Simulate polling discovering recording with no request
|
||||
recording_id = "rec-orphan"
|
||||
|
||||
match = await daily_recording_requests_controller.find_by_recording_id(recording_id)
|
||||
assert match is None # No request
|
||||
|
||||
# Mark as orphan
|
||||
created = await recordings_controller.create_orphan(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name="test-bucket",
|
||||
object_key="orphan-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=None,
|
||||
status="orphan",
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
assert created is True
|
||||
|
||||
# Verify orphan exists
|
||||
recording = await recordings_controller.get_by_id(recording_id)
|
||||
assert recording is not None
|
||||
assert recording.status == "orphan"
|
||||
assert recording.meeting_id is None
|
||||
|
||||
# Second poll - already exists
|
||||
created_again = await recordings_controller.create_orphan(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name="test-bucket",
|
||||
object_key="orphan-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=None,
|
||||
status="orphan",
|
||||
track_keys=None,
|
||||
)
|
||||
)
|
||||
assert created_again is False # Already exists
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_polling_deduplication(client):
|
||||
"""Test concurrent pollers only queue once"""
|
||||
# Create room and meeting
|
||||
room = Room(
|
||||
id="test-room-4",
|
||||
name="Test Room 4",
|
||||
slug="test-room-4",
|
||||
user_id="test-user",
|
||||
)
|
||||
await rooms_controller.create(room)
|
||||
|
||||
meeting_id = f"meeting-{uuid4()}"
|
||||
meeting = Meeting(
|
||||
id=meeting_id,
|
||||
room_name="test-room-4",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=None,
|
||||
recording_type="raw-tracks",
|
||||
)
|
||||
await meetings_controller.create(meeting)
|
||||
|
||||
# Create request
|
||||
recording_id = "rec-concurrent"
|
||||
await daily_recording_requests_controller.create(
|
||||
DailyRecordingRequest(
|
||||
recording_id=recording_id,
|
||||
meeting_id=meeting_id,
|
||||
instance_id=UUID("d1e2f3a4-b5c6-7890-abcd-ef1234567890"),
|
||||
type="raw-tracks",
|
||||
requested_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
|
||||
# Poller 1
|
||||
created1 = await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name="test-bucket",
|
||||
object_key="test-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=meeting_id,
|
||||
status="pending",
|
||||
track_keys=["track1.webm"],
|
||||
)
|
||||
)
|
||||
assert created1 is True # First wins
|
||||
|
||||
# Poller 2 (concurrent)
|
||||
created2 = await recordings_controller.try_create_with_meeting(
|
||||
Recording(
|
||||
id=recording_id,
|
||||
bucket_name="test-bucket",
|
||||
object_key="test-key",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
meeting_id=meeting_id,
|
||||
status="pending",
|
||||
track_keys=["track1.webm"],
|
||||
)
|
||||
)
|
||||
assert created2 is False # Conflict, skip
|
||||
|
||||
# Only one recording exists
|
||||
recording = await recordings_controller.get_by_id(recording_id)
|
||||
assert recording is not None
|
||||
assert recording.meeting_id == meeting_id
|
||||
374
server/tests/test_time_based_meeting_matching.py
Normal file
374
server/tests/test_time_based_meeting_matching.py
Normal file
@@ -0,0 +1,374 @@
|
||||
"""
|
||||
Integration tests for time-based meeting-to-recording matching.
|
||||
|
||||
Tests the critical path for matching Daily.co recordings to meetings when
|
||||
API doesn't return instanceId.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.rooms import rooms_controller
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def test_room():
|
||||
"""Create a test room for meetings."""
|
||||
room = await rooms_controller.add(
|
||||
name="test-room-time",
|
||||
user_id="test-user-id",
|
||||
zulip_auto_post=False,
|
||||
zulip_stream="",
|
||||
zulip_topic="",
|
||||
is_locked=False,
|
||||
room_mode="normal",
|
||||
recording_type="cloud",
|
||||
recording_trigger="automatic",
|
||||
is_shared=False,
|
||||
platform="daily",
|
||||
)
|
||||
return room
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def base_time():
|
||||
"""Fixed timestamp for deterministic tests."""
|
||||
return datetime(2026, 1, 14, 9, 0, 0, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
class TestTimeBasedMatching:
|
||||
"""Test get_by_room_name_and_time() matching logic."""
|
||||
|
||||
async def test_exact_time_match(self, test_room, base_time):
|
||||
"""Recording timestamp exactly matches meeting start_date."""
|
||||
meeting = await meetings_controller.create(
|
||||
id="meeting-exact",
|
||||
room_name="daily-test-20260114090000",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-20260114090000",
|
||||
recording_start=base_time,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == meeting.id
|
||||
|
||||
async def test_recording_slightly_after_meeting_start(self, test_room, base_time):
|
||||
"""Recording started 1 minute after meeting (participants joined late)."""
|
||||
meeting = await meetings_controller.create(
|
||||
id="meeting-late",
|
||||
room_name="daily-test-20260114090100",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
recording_start = base_time + timedelta(minutes=1)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-20260114090100",
|
||||
recording_start=recording_start,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == meeting.id
|
||||
|
||||
async def test_duplicate_room_names_picks_closest(self, test_room, base_time):
|
||||
"""
|
||||
Two meetings with same room_name (duplicate/race condition).
|
||||
Should pick closest by timestamp.
|
||||
"""
|
||||
meeting1 = await meetings_controller.create(
|
||||
id="meeting-1-first",
|
||||
room_name="daily-duplicate-room",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
meeting2 = await meetings_controller.create(
|
||||
id="meeting-2-second",
|
||||
room_name="daily-duplicate-room", # Same room_name!
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time + timedelta(seconds=0.99), # 0.99s later
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
# Recording started 0.5s after meeting1
|
||||
# Distance: meeting1 = 0.5s, meeting2 = 0.49s → meeting2 is closer
|
||||
recording_start = base_time + timedelta(seconds=0.5)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-duplicate-room",
|
||||
recording_start=recording_start,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == meeting2.id # meeting2 is closer (0.49s vs 0.5s)
|
||||
|
||||
async def test_outside_time_window_returns_none(self, test_room, base_time):
|
||||
"""Recording outside 1-week window returns None."""
|
||||
await meetings_controller.create(
|
||||
id="meeting-old",
|
||||
room_name="daily-test-old",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
# Recording 8 days later (outside 7-day window)
|
||||
recording_start = base_time + timedelta(days=8)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-old",
|
||||
recording_start=recording_start,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
async def test_tie_breaker_deterministic(self, test_room, base_time):
|
||||
"""When time delta identical, tie-breaker by meeting.id is deterministic."""
|
||||
meeting_z = await meetings_controller.create(
|
||||
id="zzz-last-uuid",
|
||||
room_name="daily-test-tie",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
meeting_a = await meetings_controller.create(
|
||||
id="aaa-first-uuid",
|
||||
room_name="daily-test-tie",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time, # Exact same start_date
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-tie",
|
||||
recording_start=base_time,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
# Tie-breaker: lexicographically first UUID
|
||||
assert result.id == "aaa-first-uuid"
|
||||
|
||||
async def test_timezone_naive_datetime_raises(self, test_room, base_time):
|
||||
"""Timezone-naive datetime raises ValueError."""
|
||||
await meetings_controller.create(
|
||||
id="meeting-tz",
|
||||
room_name="daily-test-tz",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
# Naive datetime (no timezone)
|
||||
naive_dt = datetime(2026, 1, 14, 9, 0, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="timezone-aware"):
|
||||
await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-tz",
|
||||
recording_start=naive_dt,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
async def test_one_week_boundary_after_included(self, test_room, base_time):
|
||||
"""Meeting 1-week AFTER recording is included (window_end boundary)."""
|
||||
meeting_time = base_time + timedelta(hours=168)
|
||||
|
||||
await meetings_controller.create(
|
||||
id="meeting-boundary-after",
|
||||
room_name="daily-test-boundary-after",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=meeting_time,
|
||||
end_date=meeting_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-boundary-after",
|
||||
recording_start=base_time,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == "meeting-boundary-after"
|
||||
|
||||
async def test_one_week_boundary_before_included(self, test_room, base_time):
|
||||
"""Meeting 1-week BEFORE recording is included (window_start boundary)."""
|
||||
meeting_time = base_time - timedelta(hours=168)
|
||||
|
||||
await meetings_controller.create(
|
||||
id="meeting-boundary-before",
|
||||
room_name="daily-test-boundary-before",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=meeting_time,
|
||||
end_date=meeting_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-boundary-before",
|
||||
recording_start=base_time,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == "meeting-boundary-before"
|
||||
|
||||
async def test_recording_before_meeting_start(self, test_room, base_time):
|
||||
"""Recording started before meeting (clock skew or early join)."""
|
||||
await meetings_controller.create(
|
||||
id="meeting-early",
|
||||
room_name="daily-test-early",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
recording_start = base_time - timedelta(minutes=2)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-early",
|
||||
recording_start=recording_start,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == "meeting-early"
|
||||
|
||||
async def test_mixed_inside_outside_window(self, test_room, base_time):
|
||||
"""Multiple meetings, only one inside window - returns the inside one."""
|
||||
await meetings_controller.create(
|
||||
id="meeting-old",
|
||||
room_name="daily-test-mixed",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time - timedelta(days=10),
|
||||
end_date=base_time - timedelta(days=10, hours=-1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
await meetings_controller.create(
|
||||
id="meeting-inside",
|
||||
room_name="daily-test-mixed",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time - timedelta(days=2),
|
||||
end_date=base_time - timedelta(days=2, hours=-1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
await meetings_controller.create(
|
||||
id="meeting-future",
|
||||
room_name="daily-test-mixed",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time + timedelta(days=10),
|
||||
end_date=base_time + timedelta(days=10, hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
result = await meetings_controller.get_by_room_name_and_time(
|
||||
room_name="daily-test-mixed",
|
||||
recording_start=base_time,
|
||||
time_window_hours=168,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == "meeting-inside"
|
||||
|
||||
|
||||
class TestAtomicCloudRecordingUpdate:
|
||||
"""Test atomic update prevents race conditions."""
|
||||
|
||||
async def test_first_update_succeeds(self, test_room, base_time):
|
||||
"""First call to set_cloud_recording_if_missing succeeds."""
|
||||
meeting = await meetings_controller.create(
|
||||
id="meeting-atomic-1",
|
||||
room_name="daily-test-atomic",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
success = await meetings_controller.set_cloud_recording_if_missing(
|
||||
meeting_id=meeting.id,
|
||||
s3_key="first-s3-key",
|
||||
duration=100,
|
||||
)
|
||||
|
||||
assert success is True
|
||||
|
||||
updated = await meetings_controller.get_by_id(meeting.id)
|
||||
assert updated.daily_composed_video_s3_key == "first-s3-key"
|
||||
assert updated.daily_composed_video_duration == 100
|
||||
|
||||
async def test_second_update_fails_atomically(self, test_room, base_time):
|
||||
"""Second call to update same meeting doesn't overwrite (atomic check)."""
|
||||
meeting = await meetings_controller.create(
|
||||
id="meeting-atomic-2",
|
||||
room_name="daily-test-atomic2",
|
||||
room_url="https://example.daily.co/test",
|
||||
host_room_url="https://example.daily.co/test?t=host",
|
||||
start_date=base_time,
|
||||
end_date=base_time + timedelta(hours=1),
|
||||
room=test_room,
|
||||
)
|
||||
|
||||
success1 = await meetings_controller.set_cloud_recording_if_missing(
|
||||
meeting_id=meeting.id,
|
||||
s3_key="first-s3-key",
|
||||
duration=100,
|
||||
)
|
||||
|
||||
assert success1 is True
|
||||
|
||||
after_first = await meetings_controller.get_by_id(meeting.id)
|
||||
assert after_first.daily_composed_video_s3_key == "first-s3-key"
|
||||
|
||||
success2 = await meetings_controller.set_cloud_recording_if_missing(
|
||||
meeting_id=meeting.id,
|
||||
s3_key="bucket/path/should-not-overwrite",
|
||||
duration=200,
|
||||
)
|
||||
|
||||
assert success2 is False
|
||||
|
||||
final = await meetings_controller.get_by_id(meeting.id)
|
||||
assert final.daily_composed_video_s3_key == "first-s3-key"
|
||||
assert final.daily_composed_video_duration == 100
|
||||
45
server/uv.lock
generated
45
server/uv.lock
generated
@@ -159,20 +159,21 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "aiortc"
|
||||
version = "1.14.0"
|
||||
version = "1.13.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "aioice" },
|
||||
{ name = "av" },
|
||||
{ name = "cffi" },
|
||||
{ name = "cryptography" },
|
||||
{ name = "google-crc32c" },
|
||||
{ name = "pyee" },
|
||||
{ name = "pylibsrtp" },
|
||||
{ name = "pyopenssl" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/51/9c/4e027bfe0195de0442da301e2389329496745d40ae44d2d7c4571c4290ce/aiortc-1.14.0.tar.gz", hash = "sha256:adc8a67ace10a085721e588e06a00358ed8eaf5f6b62f0a95358ff45628dd762", size = 1180864 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/62/03/bc947d74c548e0c17cf94e5d5bdacaed0ee9e5b2bb7b8b8cf1ac7a7c01ec/aiortc-1.13.0.tar.gz", hash = "sha256:5d209975c22d0910fb5a0f0e2caa828f2da966c53580f7c7170ac3a16a871620", size = 1179894 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/57/ab/31646a49209568cde3b97eeade0d28bb78b400e6645c56422c101df68932/aiortc-1.14.0-py3-none-any.whl", hash = "sha256:4b244d7e482f4e1f67e685b3468269628eca1ec91fa5b329ab517738cfca086e", size = 93183 },
|
||||
{ url = "https://files.pythonhosted.org/packages/87/29/765633cab5f1888890f5f172d1d53009b9b14e079cdfa01a62d9896a9ea9/aiortc-1.13.0-py3-none-any.whl", hash = "sha256:9ccccec98796f6a96bd1c3dd437a06da7e0f57521c96bd56e4b965a91b03a0a0", size = 92910 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -326,24 +327,28 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "av"
|
||||
version = "16.1.0"
|
||||
version = "14.4.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/78/cd/3a83ffbc3cc25b39721d174487fb0d51a76582f4a1703f98e46170ce83d4/av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd", size = 4285203 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/86/f6/0b473dab52dfdea05f28f3578b1c56b6c796ce85e76951bab7c4e38d5a74/av-14.4.0.tar.gz", hash = "sha256:3ecbf803a7fdf67229c0edada0830d6bfaea4d10bfb24f0c3f4e607cd1064b42", size = 3892203 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/48/d0/b71b65d1b36520dcb8291a2307d98b7fc12329a45614a303ff92ada4d723/av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f", size = 26927747 },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/79/720a5a6ccdee06eafa211b945b0a450e3a0b8fc3d12922f0f3c454d870d2/av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b", size = 21492232 },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/4f/a1ba8d922f2f6d1a3d52419463ef26dd6c4d43ee364164a71b424b5ae204/av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879", size = 39291737 },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/31/fc62b9fe8738d2693e18d99f040b219e26e8df894c10d065f27c6b4f07e3/av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e", size = 40846822 },
|
||||
{ url = "https://files.pythonhosted.org/packages/53/10/ab446583dbce730000e8e6beec6ec3c2753e628c7f78f334a35cad0317f4/av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83", size = 40675604 },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/d7/1003be685277005f6d63fd9e64904ee222fe1f7a0ea70af313468bb597db/av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63", size = 42015955 },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/4a/fa2a38ee9306bf4579f556f94ecbc757520652eb91294d2a99c7cf7623b9/av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62", size = 31750339 },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/84/2535f55edcd426cebec02eb37b811b1b0c163f26b8d3f53b059e2ec32665/av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6", size = 26945785 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b6/17/ffb940c9e490bf42e86db4db1ff426ee1559cd355a69609ec1efe4d3a9eb/av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35", size = 21481147 },
|
||||
{ url = "https://files.pythonhosted.org/packages/15/c1/e0d58003d2d83c3921887d5c8c9b8f5f7de9b58dc2194356a2656a45cfdc/av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86", size = 39517197 },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/77/787797b43475d1b90626af76f80bfb0c12cfec5e11eafcfc4151b8c80218/av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2", size = 41174337 },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/ac/d90df7f1e3b97fc5554cf45076df5045f1e0a6adf13899e10121229b826c/av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a", size = 40817720 },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/6f/13c3a35f9dbcebafd03fe0c4cbd075d71ac8968ec849a3cfce406c35a9d2/av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829", size = 42267396 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/b9/275df9607f7fb44317ccb1d4be74827185c0d410f52b6e2cd770fe209118/av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd", size = 31752045 },
|
||||
{ url = "https://files.pythonhosted.org/packages/18/8a/d57418b686ffd05fabd5a0a9cfa97e63b38c35d7101af00e87c51c8cc43c/av-14.4.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b21d5586a88b9fce0ab78e26bd1c38f8642f8e2aad5b35e619f4d202217c701", size = 19965048 },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/aa/3f878b0301efe587e9b07bb773dd6b47ef44ca09a3cffb4af50c08a170f3/av-14.4.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:cf8762d90b0f94a20c9f6e25a94f1757db5a256707964dfd0b1d4403e7a16835", size = 23750064 },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/b4/6fe94a31f9ed3a927daa72df67c7151968587106f30f9f8fcd792b186633/av-14.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0ac9f08920c7bbe0795319689d901e27cb3d7870b9a0acae3f26fc9daa801a6", size = 33648775 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/f3/7f3130753521d779450c935aec3f4beefc8d4645471159f27b54e896470c/av-14.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a56d9ad2afdb638ec0404e962dc570960aae7e08ae331ad7ff70fbe99a6cf40e", size = 32216915 },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/9a/8ffabfcafb42154b4b3a67d63f9b69e68fa8c34cb39ddd5cb813dd049ed4/av-14.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bed513cbcb3437d0ae47743edc1f5b4a113c0b66cdd4e1aafc533abf5b2fbf2", size = 35287279 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/11/7023ba0a2ca94a57aedf3114ab8cfcecb0819b50c30982a4c5be4d31df41/av-14.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d030c2d3647931e53d51f2f6e0fcf465263e7acf9ec6e4faa8dbfc77975318c3", size = 36294683 },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/fa/b8ac9636bd5034e2b899354468bef9f4dadb067420a16d8a493a514b7817/av-14.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1cc21582a4f606271d8c2036ec7a6247df0831050306c55cf8a905701d0f0474", size = 34552391 },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/29/0db48079c207d1cba7a2783896db5aec3816e17de55942262c244dffbc0f/av-14.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce7c9cd452153d36f1b1478f904ed5f9ab191d76db873bdd3a597193290805d4", size = 37265250 },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/55/715858c3feb7efa4d667ce83a829c8e6ee3862e297fb2b568da3f968639d/av-14.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd261e31cc6b43ca722f80656c39934199d8f2eb391e0147e704b6226acebc29", size = 27925845 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/75/b8641653780336c90ba89e5352cac0afa6256a86a150c7703c0b38851c6d/av-14.4.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:a53e682b239dd23b4e3bc9568cfb1168fc629ab01925fdb2e7556eb426339e94", size = 19954125 },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/e6/37fe6fa5853a48d54d749526365780a63a4bc530be6abf2115e3a21e292a/av-14.4.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5aa0b901751a32703fa938d2155d56ce3faf3630e4a48d238b35d2f7e49e5395", size = 23751479 },
|
||||
{ url = "https://files.pythonhosted.org/packages/f7/75/9a5f0e6bda5f513b62bafd1cff2b495441a8b07ab7fb7b8e62f0c0d1683f/av-14.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3b316fed3597675fe2aacfed34e25fc9d5bb0196dc8c0b014ae5ed4adda48de", size = 33801401 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/c9/e4df32a2ad1cb7f3a112d0ed610c5e43c89da80b63c60d60e3dc23793ec0/av-14.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a587b5c5014c3c0e16143a0f8d99874e46b5d0c50db6111aa0b54206b5687c81", size = 32364330 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ca/f0/64e7444a41817fde49a07d0239c033f7e9280bec4a4bb4784f5c79af95e6/av-14.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d53f75e8ac1ec8877a551c0db32a83c0aaeae719d05285281eaaba211bbc30", size = 35519508 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/a8/a370099daa9033a3b6f9b9bd815304b3d8396907a14d09845f27467ba138/av-14.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c8558cfde79dd8fc92d97c70e0f0fa8c94c7a66f68ae73afdf58598f0fe5e10d", size = 36448593 },
|
||||
{ url = "https://files.pythonhosted.org/packages/27/bb/edb6ceff8fa7259cb6330c51dbfbc98dd1912bd6eb5f7bc05a4bb14a9d6e/av-14.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:455b6410dea0ab2d30234ffb28df7d62ca3cdf10708528e247bec3a4cdcced09", size = 34701485 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/8a/957da1f581aa1faa9a5dfa8b47ca955edb47f2b76b949950933b457bfa1d/av-14.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1661efbe9d975f927b8512d654704223d936f39016fad2ddab00aee7c40f412c", size = 37521981 },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/76/3f1cf0568592f100fd68eb40ed8c491ce95ca3c1378cc2d4c1f6d1bd295d/av-14.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:fbbeef1f421a3461086853d6464ad5526b56ffe8ccb0ab3fd0a1f121dfbf26ad", size = 27925944 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3262,7 +3267,7 @@ requires-dist = [
|
||||
{ name = "aiohttp-cors", specifier = ">=0.7.0" },
|
||||
{ name = "aiortc", specifier = ">=1.5.0" },
|
||||
{ name = "alembic", specifier = ">=1.11.3" },
|
||||
{ name = "av", specifier = ">=15.0.0" },
|
||||
{ name = "av", specifier = ">=10.0.0" },
|
||||
{ name = "celery", specifier = ">=5.3.4" },
|
||||
{ name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
|
||||
{ name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
|
||||
|
||||
Reference in New Issue
Block a user