mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-04 12:56:49 +00:00
Compare commits
35 Commits
v0.29.0
...
feat/dag-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b1eeb651f6 | ||
|
|
499de45fdb | ||
|
|
b4ccbe6928 | ||
|
|
38f100a83e | ||
|
|
faec509a33 | ||
|
|
4d9f5fa4b4 | ||
|
|
455cb3d099 | ||
|
|
2410688559 | ||
|
|
6dd96bfa5e | ||
|
|
0acaa0de93 | ||
|
|
c45d3182ee | ||
|
|
0c06cdd117 | ||
|
|
ebae9124b6 | ||
|
|
a6a5d35e44 | ||
|
|
025e6da539 | ||
|
|
4b79b0c989 | ||
|
|
a359c845ff | ||
| cd2255cfbc | |||
| 15ab2e306e | |||
| 1ce1c7a910 | |||
|
|
984795357e | ||
| fa3cf5da0f | |||
| 8707c6694a | |||
| 4acde4b7fd | |||
| a2ed7d60d5 | |||
| a08f94a5bf | |||
|
|
c05d1f03cd | ||
|
|
23eb1371cb | ||
| 2592e369f6 | |||
| 7fde64e252 | |||
| 2ca624f052 | |||
| fc3ef6c893 | |||
| 5d26461477 | |||
| 6c175a11d8 | |||
| 6e786b7631 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,6 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
server/.env
|
server/.env
|
||||||
|
server/.env.production
|
||||||
.env
|
.env
|
||||||
Caddyfile
|
Caddyfile
|
||||||
server/exportdanswer
|
server/exportdanswer
|
||||||
|
|||||||
@@ -3,3 +3,5 @@ docs/docs/installation/auth-setup.md:curl-auth-header:250
|
|||||||
docs/docs/installation/daily-setup.md:curl-auth-header:277
|
docs/docs/installation/daily-setup.md:curl-auth-header:277
|
||||||
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:74
|
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:74
|
||||||
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:83
|
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:83
|
||||||
|
server/reflector/worker/process.py:generic-api-key:465
|
||||||
|
server/reflector/worker/process.py:generic-api-key:594
|
||||||
|
|||||||
50
CHANGELOG.md
50
CHANGELOG.md
@@ -1,5 +1,55 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## [0.33.0](https://github.com/Monadical-SAS/reflector/compare/v0.32.2...v0.33.0) (2026-02-05)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* Daily+hatchet default ([#846](https://github.com/Monadical-SAS/reflector/issues/846)) ([15ab2e3](https://github.com/Monadical-SAS/reflector/commit/15ab2e306eacf575494b4b5d2b2ad779d44a1c7f))
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* websocket tests ([#825](https://github.com/Monadical-SAS/reflector/issues/825)) ([1ce1c7a](https://github.com/Monadical-SAS/reflector/commit/1ce1c7a910b6c374115d2437b17f9d288ef094dc))
|
||||||
|
|
||||||
|
## [0.32.2](https://github.com/Monadical-SAS/reflector/compare/v0.32.1...v0.32.2) (2026-02-03)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* increase TIMEOUT_MEDIUM from 2m to 5m for LLM tasks ([#843](https://github.com/Monadical-SAS/reflector/issues/843)) ([4acde4b](https://github.com/Monadical-SAS/reflector/commit/4acde4b7fdef88cc02ca12cf38c9020b05ed96ac))
|
||||||
|
* make caddy optional ([#841](https://github.com/Monadical-SAS/reflector/issues/841)) ([a2ed7d6](https://github.com/Monadical-SAS/reflector/commit/a2ed7d60d557b551a5b64e4dfd909b63a791d9fc))
|
||||||
|
* use Daily API recording.duration as master source for transcript duration ([#844](https://github.com/Monadical-SAS/reflector/issues/844)) ([8707c66](https://github.com/Monadical-SAS/reflector/commit/8707c6694a80c939b6214bbc13331741f192e082))
|
||||||
|
|
||||||
|
## [0.32.1](https://github.com/Monadical-SAS/reflector/compare/v0.32.0...v0.32.1) (2026-01-30)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* daily multitrack pipeline finalze dependency fix ([23eb137](https://github.com/Monadical-SAS/reflector/commit/23eb1371cb9348c4b81eb12ad506b582f8a4799e))
|
||||||
|
* match httpx pad with hatchet audio timeout ([c05d1f0](https://github.com/Monadical-SAS/reflector/commit/c05d1f03cd8369fc06efd455527e50246887efd0))
|
||||||
|
|
||||||
|
## [0.32.0](https://github.com/Monadical-SAS/reflector/compare/v0.31.0...v0.32.0) (2026-01-30)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* modal padding ([#837](https://github.com/Monadical-SAS/reflector/issues/837)) ([7fde64e](https://github.com/Monadical-SAS/reflector/commit/7fde64e2529a1d37b0f7507c62d983a7bd0b5b89))
|
||||||
|
|
||||||
|
## [0.31.0](https://github.com/Monadical-SAS/reflector/compare/v0.30.0...v0.31.0) (2026-01-23)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* mixdown optional ([#834](https://github.com/Monadical-SAS/reflector/issues/834)) ([fc3ef6c](https://github.com/Monadical-SAS/reflector/commit/fc3ef6c8933231c731fad84e7477a476a6220a5e))
|
||||||
|
|
||||||
|
## [0.30.0](https://github.com/Monadical-SAS/reflector/compare/v0.29.0...v0.30.0) (2026-01-23)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* brady bunch ([#816](https://github.com/Monadical-SAS/reflector/issues/816)) ([6c175a1](https://github.com/Monadical-SAS/reflector/commit/6c175a11d8a3745095bfad06a4ad3ccdfd278433))
|
||||||
|
|
||||||
## [0.29.0](https://github.com/Monadical-SAS/reflector/compare/v0.28.1...v0.29.0) (2026-01-21)
|
## [0.29.0](https://github.com/Monadical-SAS/reflector/compare/v0.28.1...v0.29.0) (2026-01-21)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
# Reflector Caddyfile
|
# Reflector Caddyfile (optional reverse proxy)
|
||||||
# Replace example.com with your actual domains
|
# Use this only when you run Caddy via: docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||||
# CORS is handled by the backend - Caddy just proxies
|
# If Coolify, Traefik, or nginx already use ports 80/443, do NOT start Caddy; point your proxy at web:3000 and server:1250.
|
||||||
|
#
|
||||||
|
# Replace example.com with your actual domains. CORS is handled by the backend - Caddy just proxies.
|
||||||
#
|
#
|
||||||
# For environment variable substitution, set:
|
# For environment variable substitution, set:
|
||||||
# FRONTEND_DOMAIN=app.example.com
|
# FRONTEND_DOMAIN=app.example.com
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
# Production Docker Compose configuration
|
# Production Docker Compose configuration
|
||||||
# Usage: docker compose -f docker-compose.prod.yml up -d
|
# Usage: docker compose -f docker-compose.prod.yml up -d
|
||||||
#
|
#
|
||||||
|
# Caddy (reverse proxy on ports 80/443) is OPTIONAL and behind the "caddy" profile:
|
||||||
|
# - With Caddy (self-hosted, you manage SSL): docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||||
|
# - Without Caddy (Coolify/Traefik/nginx already on 80/443): docker compose -f docker-compose.prod.yml up -d
|
||||||
|
# Then point your proxy at web:3000 (frontend) and server:1250 (API).
|
||||||
|
#
|
||||||
# Prerequisites:
|
# Prerequisites:
|
||||||
# 1. Copy .env.example to .env and configure for both server/ and www/
|
# 1. Copy .env.example to .env and configure for both server/ and www/
|
||||||
# 2. Copy Caddyfile.example to Caddyfile and edit with your domains
|
# 2. If using Caddy: copy Caddyfile.example to Caddyfile and edit your domains
|
||||||
# 3. Deploy Modal GPU functions (see gpu/modal_deployments/deploy-all.sh)
|
# 3. Deploy Modal GPU functions (see gpu/modal_deployments/deploy-all.sh)
|
||||||
|
|
||||||
services:
|
services:
|
||||||
@@ -84,6 +89,8 @@ services:
|
|||||||
retries: 3
|
retries: 3
|
||||||
|
|
||||||
caddy:
|
caddy:
|
||||||
|
profiles:
|
||||||
|
- caddy
|
||||||
image: caddy:2-alpine
|
image: caddy:2-alpine
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
@@ -11,15 +11,15 @@ This page documents the Docker Compose configuration for Reflector. For the comp
|
|||||||
|
|
||||||
The `docker-compose.prod.yml` includes these services:
|
The `docker-compose.prod.yml` includes these services:
|
||||||
|
|
||||||
| Service | Image | Purpose |
|
| Service | Image | Purpose |
|
||||||
|---------|-------|---------|
|
| ---------- | --------------------------------- | --------------------------------------------------------------------------- |
|
||||||
| `web` | `monadicalsas/reflector-frontend` | Next.js frontend |
|
| `web` | `monadicalsas/reflector-frontend` | Next.js frontend |
|
||||||
| `server` | `monadicalsas/reflector-backend` | FastAPI backend |
|
| `server` | `monadicalsas/reflector-backend` | FastAPI backend |
|
||||||
| `worker` | `monadicalsas/reflector-backend` | Celery worker for background tasks |
|
| `worker` | `monadicalsas/reflector-backend` | Celery worker for background tasks |
|
||||||
| `beat` | `monadicalsas/reflector-backend` | Celery beat scheduler |
|
| `beat` | `monadicalsas/reflector-backend` | Celery beat scheduler |
|
||||||
| `redis` | `redis:7.2-alpine` | Message broker and cache |
|
| `redis` | `redis:7.2-alpine` | Message broker and cache |
|
||||||
| `postgres` | `postgres:17-alpine` | Primary database |
|
| `postgres` | `postgres:17-alpine` | Primary database |
|
||||||
| `caddy` | `caddy:2-alpine` | Reverse proxy with auto-SSL |
|
| `caddy` | `caddy:2-alpine` | Reverse proxy with auto-SSL (optional; see [Caddy profile](#caddy-profile)) |
|
||||||
|
|
||||||
## Environment Files
|
## Environment Files
|
||||||
|
|
||||||
@@ -30,6 +30,7 @@ Reflector uses two separate environment files:
|
|||||||
Used by: `server`, `worker`, `beat`
|
Used by: `server`, `worker`, `beat`
|
||||||
|
|
||||||
Key variables:
|
Key variables:
|
||||||
|
|
||||||
```env
|
```env
|
||||||
# Database connection
|
# Database connection
|
||||||
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||||
@@ -54,6 +55,7 @@ TRANSCRIPT_MODAL_API_KEY=...
|
|||||||
Used by: `web`
|
Used by: `web`
|
||||||
|
|
||||||
Key variables:
|
Key variables:
|
||||||
|
|
||||||
```env
|
```env
|
||||||
# Domain configuration
|
# Domain configuration
|
||||||
SITE_URL=https://app.example.com
|
SITE_URL=https://app.example.com
|
||||||
@@ -70,26 +72,42 @@ Note: `API_URL` is used client-side (browser), `SERVER_API_URL` is used server-s
|
|||||||
|
|
||||||
## Volumes
|
## Volumes
|
||||||
|
|
||||||
| Volume | Purpose |
|
| Volume | Purpose |
|
||||||
|--------|---------|
|
| --------------- | ----------------------------- |
|
||||||
| `redis_data` | Redis persistence |
|
| `redis_data` | Redis persistence |
|
||||||
| `postgres_data` | PostgreSQL data |
|
| `postgres_data` | PostgreSQL data |
|
||||||
| `server_data` | Uploaded files, local storage |
|
| `server_data` | Uploaded files, local storage |
|
||||||
| `caddy_data` | SSL certificates |
|
| `caddy_data` | SSL certificates |
|
||||||
| `caddy_config` | Caddy configuration |
|
| `caddy_config` | Caddy configuration |
|
||||||
|
|
||||||
## Network
|
## Network
|
||||||
|
|
||||||
All services share the default network. The network is marked `attachable: true` to allow external containers (like Authentik) to join.
|
All services share the default network. The network is marked `attachable: true` to allow external containers (like Authentik) to join.
|
||||||
|
|
||||||
|
## Caddy profile
|
||||||
|
|
||||||
|
Caddy (ports 80 and 443) is **optional** and behind the `caddy` profile so it does not conflict with an existing reverse proxy (e.g. Coolify, Traefik, nginx).
|
||||||
|
|
||||||
|
- **With Caddy** (you want Reflector to handle SSL):
|
||||||
|
`docker compose -f docker-compose.prod.yml --profile caddy up -d`
|
||||||
|
- **Without Caddy** (Coolify or another proxy already on 80/443):
|
||||||
|
`docker compose -f docker-compose.prod.yml up -d`
|
||||||
|
Then configure your proxy to send traffic to `web:3000` (frontend) and `server:1250` (API).
|
||||||
|
|
||||||
## Common Commands
|
## Common Commands
|
||||||
|
|
||||||
### Start all services
|
### Start all services
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Without Caddy (e.g. when using Coolify)
|
||||||
docker compose -f docker-compose.prod.yml up -d
|
docker compose -f docker-compose.prod.yml up -d
|
||||||
|
|
||||||
|
# With Caddy as reverse proxy
|
||||||
|
docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
### View logs
|
### View logs
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# All services
|
# All services
|
||||||
docker compose -f docker-compose.prod.yml logs -f
|
docker compose -f docker-compose.prod.yml logs -f
|
||||||
@@ -99,6 +117,7 @@ docker compose -f docker-compose.prod.yml logs server --tail 50
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Restart a service
|
### Restart a service
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Quick restart (doesn't reload .env changes)
|
# Quick restart (doesn't reload .env changes)
|
||||||
docker compose -f docker-compose.prod.yml restart server
|
docker compose -f docker-compose.prod.yml restart server
|
||||||
@@ -108,27 +127,32 @@ docker compose -f docker-compose.prod.yml up -d server
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Run database migrations
|
### Run database migrations
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
|
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
|
||||||
```
|
```
|
||||||
|
|
||||||
### Access database
|
### Access database
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml exec postgres psql -U reflector
|
docker compose -f docker-compose.prod.yml exec postgres psql -U reflector
|
||||||
```
|
```
|
||||||
|
|
||||||
### Pull latest images
|
### Pull latest images
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml pull
|
docker compose -f docker-compose.prod.yml pull
|
||||||
docker compose -f docker-compose.prod.yml up -d
|
docker compose -f docker-compose.prod.yml up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
### Stop all services
|
### Stop all services
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml down
|
docker compose -f docker-compose.prod.yml down
|
||||||
```
|
```
|
||||||
|
|
||||||
### Full reset (WARNING: deletes data)
|
### Full reset (WARNING: deletes data)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml down -v
|
docker compose -f docker-compose.prod.yml down -v
|
||||||
```
|
```
|
||||||
@@ -187,6 +211,7 @@ The Caddyfile supports environment variable substitution:
|
|||||||
Set `FRONTEND_DOMAIN` and `API_DOMAIN` environment variables, or edit the file directly.
|
Set `FRONTEND_DOMAIN` and `API_DOMAIN` environment variables, or edit the file directly.
|
||||||
|
|
||||||
### Reload Caddy after changes
|
### Reload Caddy after changes
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml exec caddy caddy reload --config /etc/caddy/Caddyfile
|
docker compose -f docker-compose.prod.yml exec caddy caddy reload --config /etc/caddy/Caddyfile
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ flowchart LR
|
|||||||
|
|
||||||
Before starting, you need:
|
Before starting, you need:
|
||||||
|
|
||||||
- **Production server** - 4+ cores, 8GB+ RAM, public IP
|
- **Production server** - 4+ cores, 8GB+ RAM, public IP
|
||||||
- **Two domain names** - e.g., `app.example.com` (frontend) and `api.example.com` (backend)
|
- **Two domain names** - e.g., `app.example.com` (frontend) and `api.example.com` (backend)
|
||||||
- **GPU processing** - Choose one:
|
- **GPU processing** - Choose one:
|
||||||
- Modal.com account, OR
|
- Modal.com account, OR
|
||||||
@@ -60,16 +60,17 @@ Type: A Name: api Value: <your-server-ip>
|
|||||||
|
|
||||||
Reflector requires GPU processing for transcription and speaker diarization. Choose one option:
|
Reflector requires GPU processing for transcription and speaker diarization. Choose one option:
|
||||||
|
|
||||||
| | **Modal.com (Cloud)** | **Self-Hosted GPU** |
|
| | **Modal.com (Cloud)** | **Self-Hosted GPU** |
|
||||||
|---|---|---|
|
| ------------ | --------------------------------- | ---------------------------- |
|
||||||
| **Best for** | No GPU hardware, zero maintenance | Own GPU server, full control |
|
| **Best for** | No GPU hardware, zero maintenance | Own GPU server, full control |
|
||||||
| **Pricing** | Pay-per-use | Fixed infrastructure cost |
|
| **Pricing** | Pay-per-use | Fixed infrastructure cost |
|
||||||
|
|
||||||
### Option A: Modal.com (Serverless Cloud GPU)
|
### Option A: Modal.com (Serverless Cloud GPU)
|
||||||
|
|
||||||
#### Accept HuggingFace Licenses
|
#### Accept HuggingFace Licenses
|
||||||
|
|
||||||
Visit both pages and click "Accept":
|
Visit both pages and click "Accept":
|
||||||
|
|
||||||
- https://huggingface.co/pyannote/speaker-diarization-3.1
|
- https://huggingface.co/pyannote/speaker-diarization-3.1
|
||||||
- https://huggingface.co/pyannote/segmentation-3.0
|
- https://huggingface.co/pyannote/segmentation-3.0
|
||||||
|
|
||||||
@@ -179,6 +180,7 @@ Save these credentials - you'll need them in the next step.
|
|||||||
## Configure Environment
|
## Configure Environment
|
||||||
|
|
||||||
Reflector has two env files:
|
Reflector has two env files:
|
||||||
|
|
||||||
- `server/.env` - Backend configuration
|
- `server/.env` - Backend configuration
|
||||||
- `www/.env` - Frontend configuration
|
- `www/.env` - Frontend configuration
|
||||||
|
|
||||||
@@ -190,6 +192,7 @@ nano server/.env
|
|||||||
```
|
```
|
||||||
|
|
||||||
**Required settings:**
|
**Required settings:**
|
||||||
|
|
||||||
```env
|
```env
|
||||||
# Database (defaults work with docker-compose.prod.yml)
|
# Database (defaults work with docker-compose.prod.yml)
|
||||||
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||||
@@ -249,6 +252,7 @@ nano www/.env
|
|||||||
```
|
```
|
||||||
|
|
||||||
**Required settings:**
|
**Required settings:**
|
||||||
|
|
||||||
```env
|
```env
|
||||||
# Your domains
|
# Your domains
|
||||||
SITE_URL=https://app.example.com
|
SITE_URL=https://app.example.com
|
||||||
@@ -266,7 +270,11 @@ FEATURE_REQUIRE_LOGIN=false
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Configure Caddy
|
## Reverse proxy (Caddy or existing)
|
||||||
|
|
||||||
|
**If Coolify, Traefik, or nginx already use ports 80/443** (e.g. Coolify on your host): skip Caddy. Start the stack without the Caddy profile (see [Start Services](#start-services) below), then point your proxy at `web:3000` (frontend) and `server:1250` (API).
|
||||||
|
|
||||||
|
**If you want Reflector to provide the reverse proxy and SSL:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cp Caddyfile.example Caddyfile
|
cp Caddyfile.example Caddyfile
|
||||||
@@ -289,10 +297,18 @@ Replace `example.com` with your domains. The `{$VAR:default}` syntax uses Caddy'
|
|||||||
|
|
||||||
## Start Services
|
## Start Services
|
||||||
|
|
||||||
|
**Without Caddy** (e.g. Coolify already on 80/443):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml up -d
|
docker compose -f docker-compose.prod.yml up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**With Caddy** (Reflector handles SSL):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose.prod.yml --profile caddy up -d
|
||||||
|
```
|
||||||
|
|
||||||
Wait for containers to start (first run may take 1-2 minutes to pull images and initialize).
|
Wait for containers to start (first run may take 1-2 minutes to pull images and initialize).
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -300,18 +316,21 @@ Wait for containers to start (first run may take 1-2 minutes to pull images and
|
|||||||
## Verify Deployment
|
## Verify Deployment
|
||||||
|
|
||||||
### Check services
|
### Check services
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml ps
|
docker compose -f docker-compose.prod.yml ps
|
||||||
# All should show "Up"
|
# All should show "Up"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Test API
|
### Test API
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl https://api.example.com/health
|
curl https://api.example.com/health
|
||||||
# Should return: {"status":"healthy"}
|
# Should return: {"status":"healthy"}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Test Frontend
|
### Test Frontend
|
||||||
|
|
||||||
- Visit https://app.example.com
|
- Visit https://app.example.com
|
||||||
- You should see the Reflector interface
|
- You should see the Reflector interface
|
||||||
- Try uploading an audio file to test transcription
|
- Try uploading an audio file to test transcription
|
||||||
@@ -327,6 +346,7 @@ By default, Reflector is open (no login required). **Authentication is required
|
|||||||
See [Authentication Setup](./auth-setup) for full Authentik OAuth configuration.
|
See [Authentication Setup](./auth-setup) for full Authentik OAuth configuration.
|
||||||
|
|
||||||
Quick summary:
|
Quick summary:
|
||||||
|
|
||||||
1. Deploy Authentik on your server
|
1. Deploy Authentik on your server
|
||||||
2. Create OAuth provider in Authentik
|
2. Create OAuth provider in Authentik
|
||||||
3. Extract public key for JWT verification
|
3. Extract public key for JWT verification
|
||||||
@@ -358,6 +378,7 @@ DAILYCO_STORAGE_AWS_ROLE_ARN=<arn:aws:iam::ACCOUNT:role/DailyCo>
|
|||||||
```
|
```
|
||||||
|
|
||||||
Reload env and restart:
|
Reload env and restart:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml up -d server worker
|
docker compose -f docker-compose.prod.yml up -d server worker
|
||||||
```
|
```
|
||||||
@@ -367,35 +388,43 @@ docker compose -f docker-compose.prod.yml up -d server worker
|
|||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
### Check logs for errors
|
### Check logs for errors
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml logs server --tail 20
|
docker compose -f docker-compose.prod.yml logs server --tail 20
|
||||||
docker compose -f docker-compose.prod.yml logs worker --tail 20
|
docker compose -f docker-compose.prod.yml logs worker --tail 20
|
||||||
```
|
```
|
||||||
|
|
||||||
### Services won't start
|
### Services won't start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose -f docker-compose.prod.yml logs
|
docker compose -f docker-compose.prod.yml logs
|
||||||
```
|
```
|
||||||
|
|
||||||
### CORS errors in browser
|
### CORS errors in browser
|
||||||
|
|
||||||
- Verify `CORS_ORIGIN` in `server/.env` matches your frontend domain exactly (including `https://`)
|
- Verify `CORS_ORIGIN` in `server/.env` matches your frontend domain exactly (including `https://`)
|
||||||
- Reload env: `docker compose -f docker-compose.prod.yml up -d server`
|
- Reload env: `docker compose -f docker-compose.prod.yml up -d server`
|
||||||
|
|
||||||
### SSL certificate errors
|
### SSL certificate errors (when using Caddy)
|
||||||
|
|
||||||
- Caddy auto-provisions Let's Encrypt certificates
|
- Caddy auto-provisions Let's Encrypt certificates
|
||||||
- Ensure ports 80 and 443 are open
|
- Ensure ports 80 and 443 are open and not used by another proxy
|
||||||
- Check: `docker compose -f docker-compose.prod.yml logs caddy`
|
- Check: `docker compose -f docker-compose.prod.yml logs caddy`
|
||||||
|
- If port 80 is already in use (e.g. by Coolify), run without Caddy: `docker compose -f docker-compose.prod.yml up -d` and use your existing proxy
|
||||||
|
|
||||||
### Transcription not working
|
### Transcription not working
|
||||||
|
|
||||||
- Check Modal dashboard: https://modal.com/apps
|
- Check Modal dashboard: https://modal.com/apps
|
||||||
- Verify URLs in `server/.env` match deployed functions
|
- Verify URLs in `server/.env` match deployed functions
|
||||||
- Check worker logs: `docker compose -f docker-compose.prod.yml logs worker`
|
- Check worker logs: `docker compose -f docker-compose.prod.yml logs worker`
|
||||||
|
|
||||||
### "Login required" but auth not configured
|
### "Login required" but auth not configured
|
||||||
|
|
||||||
- Set `FEATURE_REQUIRE_LOGIN=false` in `www/.env`
|
- Set `FEATURE_REQUIRE_LOGIN=false` in `www/.env`
|
||||||
- Rebuild frontend: `docker compose -f docker-compose.prod.yml up -d --force-recreate web`
|
- Rebuild frontend: `docker compose -f docker-compose.prod.yml up -d --force-recreate web`
|
||||||
|
|
||||||
### Database migrations or connectivity issues
|
### Database migrations or connectivity issues
|
||||||
|
|
||||||
Migrations run automatically on server startup. To check database connectivity or debug migration failures:
|
Migrations run automatically on server startup. To check database connectivity or debug migration failures:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -408,4 +437,3 @@ docker compose -f docker-compose.prod.yml exec server uv run python -c "from ref
|
|||||||
# Manually run migrations (if needed)
|
# Manually run migrations (if needed)
|
||||||
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
|
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -131,6 +131,15 @@ if [ -z "$DIARIZER_URL" ]; then
|
|||||||
fi
|
fi
|
||||||
echo " -> $DIARIZER_URL"
|
echo " -> $DIARIZER_URL"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Deploying padding (CPU audio processing via Modal SDK)..."
|
||||||
|
modal deploy reflector_padding.py
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error: Failed to deploy padding. Check Modal dashboard for details."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " -> reflector-padding.pad_track (Modal SDK function)"
|
||||||
|
|
||||||
# --- Output Configuration ---
|
# --- Output Configuration ---
|
||||||
echo ""
|
echo ""
|
||||||
echo "=========================================="
|
echo "=========================================="
|
||||||
@@ -147,4 +156,6 @@ echo ""
|
|||||||
echo "DIARIZATION_BACKEND=modal"
|
echo "DIARIZATION_BACKEND=modal"
|
||||||
echo "DIARIZATION_URL=$DIARIZER_URL"
|
echo "DIARIZATION_URL=$DIARIZER_URL"
|
||||||
echo "DIARIZATION_MODAL_API_KEY=$API_KEY"
|
echo "DIARIZATION_MODAL_API_KEY=$API_KEY"
|
||||||
|
echo ""
|
||||||
|
echo "# Padding uses Modal SDK (requires MODAL_TOKEN_ID/SECRET in worker containers)"
|
||||||
echo "# --- End Modal Configuration ---"
|
echo "# --- End Modal Configuration ---"
|
||||||
|
|||||||
277
gpu/modal_deployments/reflector_padding.py
Normal file
277
gpu/modal_deployments/reflector_padding.py
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
"""
|
||||||
|
Reflector GPU backend - audio padding
|
||||||
|
======================================
|
||||||
|
|
||||||
|
CPU-intensive audio padding service for adding silence to audio tracks.
|
||||||
|
Uses PyAV filter graph (adelay) for precise track synchronization.
|
||||||
|
|
||||||
|
IMPORTANT: This padding logic is duplicated from server/reflector/utils/audio_padding.py
|
||||||
|
for Modal deployment isolation (Modal can't import from server/reflector/). If you modify
|
||||||
|
the PyAV filter graph or padding algorithm, you MUST update both:
|
||||||
|
- gpu/modal_deployments/reflector_padding.py (this file)
|
||||||
|
- server/reflector/utils/audio_padding.py
|
||||||
|
|
||||||
|
Constants duplicated from server/reflector/utils/audio_constants.py for same reason.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from fractions import Fraction
|
||||||
|
import math
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
import modal
|
||||||
|
|
||||||
|
S3_TIMEOUT = 60 # happens 2 times
|
||||||
|
PADDING_TIMEOUT = 600 + (S3_TIMEOUT * 2)
|
||||||
|
SCALEDOWN_WINDOW = 60 # The maximum duration (in seconds) that individual containers can remain idle when scaling down.
|
||||||
|
DISCONNECT_CHECK_INTERVAL = 2 # Check for client disconnect
|
||||||
|
|
||||||
|
|
||||||
|
app = modal.App("reflector-padding")
|
||||||
|
|
||||||
|
# CPU-based image
|
||||||
|
image = (
|
||||||
|
modal.Image.debian_slim(python_version="3.12")
|
||||||
|
.apt_install("ffmpeg") # Required by PyAV
|
||||||
|
.pip_install(
|
||||||
|
"av==13.1.0", # PyAV for audio processing
|
||||||
|
"requests==2.32.3", # HTTP for presigned URL downloads/uploads
|
||||||
|
"fastapi==0.115.12", # API framework
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||||
|
OPUS_STANDARD_SAMPLE_RATE = 48000
|
||||||
|
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||||
|
OPUS_DEFAULT_BIT_RATE = 128000
|
||||||
|
|
||||||
|
|
||||||
|
@app.function(
|
||||||
|
cpu=2.0,
|
||||||
|
timeout=PADDING_TIMEOUT,
|
||||||
|
scaledown_window=SCALEDOWN_WINDOW,
|
||||||
|
image=image,
|
||||||
|
)
|
||||||
|
@modal.asgi_app()
|
||||||
|
def web():
|
||||||
|
from fastapi import FastAPI, Request, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
class PaddingRequest(BaseModel):
|
||||||
|
track_url: str
|
||||||
|
output_url: str
|
||||||
|
start_time_seconds: float
|
||||||
|
track_index: int
|
||||||
|
|
||||||
|
class PaddingResponse(BaseModel):
|
||||||
|
size: int
|
||||||
|
cancelled: bool = False
|
||||||
|
|
||||||
|
web_app = FastAPI()
|
||||||
|
|
||||||
|
@web_app.post("/pad")
|
||||||
|
async def pad_track_endpoint(request: Request, req: PaddingRequest) -> PaddingResponse:
|
||||||
|
"""Modal web endpoint for padding audio tracks with disconnect detection.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
if not req.track_url:
|
||||||
|
raise HTTPException(status_code=400, detail="track_url cannot be empty")
|
||||||
|
if not req.output_url:
|
||||||
|
raise HTTPException(status_code=400, detail="output_url cannot be empty")
|
||||||
|
if req.start_time_seconds <= 0:
|
||||||
|
raise HTTPException(status_code=400, detail=f"start_time_seconds must be positive, got {req.start_time_seconds}")
|
||||||
|
if req.start_time_seconds > 18000:
|
||||||
|
raise HTTPException(status_code=400, detail=f"start_time_seconds exceeds maximum 18000s (5 hours)")
|
||||||
|
|
||||||
|
logger.info(f"Padding request: track {req.track_index}, delay={req.start_time_seconds}s")
|
||||||
|
|
||||||
|
# Thread-safe cancellation flag shared between async disconnect checker and blocking thread
|
||||||
|
import threading
|
||||||
|
cancelled = threading.Event()
|
||||||
|
|
||||||
|
async def check_disconnect():
|
||||||
|
"""Background task to check for client disconnect every 2 seconds."""
|
||||||
|
while not cancelled.is_set():
|
||||||
|
await asyncio.sleep(DISCONNECT_CHECK_INTERVAL)
|
||||||
|
if await request.is_disconnected():
|
||||||
|
logger.warning("Client disconnected, setting cancellation flag")
|
||||||
|
cancelled.set()
|
||||||
|
break
|
||||||
|
|
||||||
|
# Start disconnect checker in background
|
||||||
|
disconnect_task = asyncio.create_task(check_disconnect())
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await asyncio.get_event_loop().run_in_executor(
|
||||||
|
None, _pad_track_blocking, req, cancelled, logger
|
||||||
|
)
|
||||||
|
return PaddingResponse(**result)
|
||||||
|
finally:
|
||||||
|
cancelled.set()
|
||||||
|
disconnect_task.cancel()
|
||||||
|
try:
|
||||||
|
await disconnect_task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _pad_track_blocking(req, cancelled, logger) -> dict:
|
||||||
|
"""Blocking CPU-bound padding work with periodic cancellation checks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cancelled: threading.Event for thread-safe cancellation signaling
|
||||||
|
"""
|
||||||
|
import av
|
||||||
|
import requests
|
||||||
|
from av.audio.resampler import AudioResampler
|
||||||
|
import time
|
||||||
|
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
input_path = None
|
||||||
|
output_path = None
|
||||||
|
last_check = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info("Downloading track for padding")
|
||||||
|
response = requests.get(req.track_url, stream=True, timeout=S3_TIMEOUT)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
input_path = os.path.join(temp_dir, "track.webm")
|
||||||
|
total_bytes = 0
|
||||||
|
chunk_count = 0
|
||||||
|
with open(input_path, "wb") as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
total_bytes += len(chunk)
|
||||||
|
chunk_count += 1
|
||||||
|
|
||||||
|
# Check for cancellation every arbitrary amount of chunks
|
||||||
|
if chunk_count % 12 == 0:
|
||||||
|
now = time.time()
|
||||||
|
if now - last_check >= DISCONNECT_CHECK_INTERVAL:
|
||||||
|
if cancelled.is_set():
|
||||||
|
logger.info("Cancelled during download, exiting early")
|
||||||
|
return {"size": 0, "cancelled": True}
|
||||||
|
last_check = now
|
||||||
|
logger.info(f"Track downloaded: {total_bytes} bytes")
|
||||||
|
|
||||||
|
if cancelled.is_set():
|
||||||
|
logger.info("Cancelled after download, exiting early")
|
||||||
|
return {"size": 0, "cancelled": True}
|
||||||
|
|
||||||
|
# Apply padding using PyAV
|
||||||
|
output_path = os.path.join(temp_dir, "padded.webm")
|
||||||
|
delay_ms = math.floor(req.start_time_seconds * 1000)
|
||||||
|
logger.info(f"Padding track {req.track_index} with {delay_ms}ms delay using PyAV")
|
||||||
|
|
||||||
|
in_container = av.open(input_path)
|
||||||
|
in_stream = next((s for s in in_container.streams if s.type == "audio"), None)
|
||||||
|
if in_stream is None:
|
||||||
|
raise ValueError("No audio stream in input")
|
||||||
|
|
||||||
|
with av.open(output_path, "w", format="webm") as out_container:
|
||||||
|
out_stream = out_container.add_stream("libopus", rate=OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
|
||||||
|
graph = av.filter.Graph()
|
||||||
|
|
||||||
|
abuf_args = (
|
||||||
|
f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
|
||||||
|
f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
|
||||||
|
f"sample_fmt=s16:"
|
||||||
|
f"channel_layout=stereo"
|
||||||
|
)
|
||||||
|
src = graph.add("abuffer", args=abuf_args, name="src")
|
||||||
|
aresample_f = graph.add("aresample", args="async=1", name="ares")
|
||||||
|
delays_arg = f"{delay_ms}|{delay_ms}"
|
||||||
|
adelay_f = graph.add("adelay", args=f"delays={delays_arg}:all=1", name="delay")
|
||||||
|
sink = graph.add("abuffersink", name="sink")
|
||||||
|
|
||||||
|
src.link_to(aresample_f)
|
||||||
|
aresample_f.link_to(adelay_f)
|
||||||
|
adelay_f.link_to(sink)
|
||||||
|
graph.configure()
|
||||||
|
|
||||||
|
resampler = AudioResampler(
|
||||||
|
format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
)
|
||||||
|
|
||||||
|
for frame in in_container.decode(in_stream):
|
||||||
|
# Check for cancellation periodically
|
||||||
|
now = time.time()
|
||||||
|
if now - last_check >= DISCONNECT_CHECK_INTERVAL:
|
||||||
|
if cancelled.is_set():
|
||||||
|
logger.info("Cancelled during processing, exiting early")
|
||||||
|
in_container.close()
|
||||||
|
return {"size": 0, "cancelled": True}
|
||||||
|
last_check = now
|
||||||
|
|
||||||
|
out_frames = resampler.resample(frame) or []
|
||||||
|
for rframe in out_frames:
|
||||||
|
rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
src.push(rframe)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
f_out = sink.pull()
|
||||||
|
except Exception:
|
||||||
|
break
|
||||||
|
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
for packet in out_stream.encode(f_out):
|
||||||
|
out_container.mux(packet)
|
||||||
|
|
||||||
|
# Flush filter graph
|
||||||
|
src.push(None)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
f_out = sink.pull()
|
||||||
|
except Exception:
|
||||||
|
break
|
||||||
|
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
for packet in out_stream.encode(f_out):
|
||||||
|
out_container.mux(packet)
|
||||||
|
|
||||||
|
# Flush encoder
|
||||||
|
for packet in out_stream.encode(None):
|
||||||
|
out_container.mux(packet)
|
||||||
|
|
||||||
|
in_container.close()
|
||||||
|
|
||||||
|
file_size = os.path.getsize(output_path)
|
||||||
|
logger.info(f"Padding complete: {file_size} bytes")
|
||||||
|
|
||||||
|
logger.info("Uploading padded track to S3")
|
||||||
|
|
||||||
|
with open(output_path, "rb") as f:
|
||||||
|
upload_response = requests.put(req.output_url, data=f, timeout=S3_TIMEOUT)
|
||||||
|
|
||||||
|
upload_response.raise_for_status()
|
||||||
|
logger.info(f"Upload complete: {file_size} bytes")
|
||||||
|
|
||||||
|
return {"size": file_size}
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if input_path and os.path.exists(input_path):
|
||||||
|
try:
|
||||||
|
os.unlink(input_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to cleanup input file: {e}")
|
||||||
|
if output_path and os.path.exists(output_path):
|
||||||
|
try:
|
||||||
|
os.unlink(output_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to cleanup output file: {e}")
|
||||||
|
try:
|
||||||
|
os.rmdir(temp_dir)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to cleanup temp directory: {e}")
|
||||||
|
|
||||||
|
return web_app
|
||||||
|
|
||||||
@@ -4,27 +4,31 @@ ENV PYTHONUNBUFFERED=1 \
|
|||||||
UV_LINK_MODE=copy \
|
UV_LINK_MODE=copy \
|
||||||
UV_NO_CACHE=1
|
UV_NO_CACHE=1
|
||||||
|
|
||||||
|
# patch until nvidia updates the sha1 repo
|
||||||
|
ADD sequoia.config /etc/crypto-policies/back-ends/sequoia.config
|
||||||
|
|
||||||
WORKDIR /tmp
|
WORKDIR /tmp
|
||||||
RUN apt-get update \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
|
apt-get update \
|
||||||
&& apt-get install -y \
|
&& apt-get install -y \
|
||||||
ffmpeg \
|
ffmpeg \
|
||||||
curl \
|
curl \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
gnupg \
|
gnupg \
|
||||||
wget \
|
wget
|
||||||
&& apt-get clean
|
|
||||||
# Add NVIDIA CUDA repo for Debian 12 (bookworm) and install cuDNN 9 for CUDA 12
|
# Add NVIDIA CUDA repo for Debian 12 (bookworm) and install cuDNN 9 for CUDA 12
|
||||||
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb /cuda-keyring.deb
|
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb /cuda-keyring.deb
|
||||||
RUN dpkg -i /cuda-keyring.deb \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
|
dpkg -i /cuda-keyring.deb \
|
||||||
&& rm /cuda-keyring.deb \
|
&& rm /cuda-keyring.deb \
|
||||||
&& apt-get update \
|
&& apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends \
|
&& apt-get install -y --no-install-recommends \
|
||||||
cuda-cudart-12-6 \
|
cuda-cudart-12-6 \
|
||||||
libcublas-12-6 \
|
libcublas-12-6 \
|
||||||
libcudnn9-cuda-12 \
|
libcudnn9-cuda-12 \
|
||||||
libcudnn9-dev-cuda-12 \
|
libcudnn9-dev-cuda-12
|
||||||
&& apt-get clean \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
||||||
RUN sh /uv-installer.sh && rm /uv-installer.sh
|
RUN sh /uv-installer.sh && rm /uv-installer.sh
|
||||||
ENV PATH="/root/.local/bin/:$PATH"
|
ENV PATH="/root/.local/bin/:$PATH"
|
||||||
@@ -39,6 +43,13 @@ COPY ./app /app/app
|
|||||||
COPY ./main.py /app/
|
COPY ./main.py /app/
|
||||||
COPY ./runserver.sh /app/
|
COPY ./runserver.sh /app/
|
||||||
|
|
||||||
|
# prevent uv failing with too many open files on big cpus
|
||||||
|
ENV UV_CONCURRENT_INSTALLS=16
|
||||||
|
|
||||||
|
# first install
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv sync --compile-bytecode --locked
|
||||||
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
CMD ["sh", "/app/runserver.sh"]
|
CMD ["sh", "/app/runserver.sh"]
|
||||||
|
|||||||
2
gpu/self_hosted/sequoia.config
Normal file
2
gpu/self_hosted/sequoia.config
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
[hash_algorithms]
|
||||||
|
sha1 = "always"
|
||||||
496
server/docs/DAILY_REFLECTOR_DATA_MODEL.md
Normal file
496
server/docs/DAILY_REFLECTOR_DATA_MODEL.md
Normal file
@@ -0,0 +1,496 @@
|
|||||||
|
# Daily.co and Reflector Data Model
|
||||||
|
|
||||||
|
This document explains the data model relationships between Daily.co's API concepts and Reflector's database schema, clarifying common sources of confusion.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
1. [Core Entities Overview](#core-entities-overview)
|
||||||
|
2. [Daily.co vs Reflector Terminology](#dailyco-vs-reflector-terminology)
|
||||||
|
3. [Entity Relationships](#entity-relationships)
|
||||||
|
4. [Recording Multiplicity](#recording-multiplicity)
|
||||||
|
5. [Session Identifiers Explained](#session-identifiers-explained)
|
||||||
|
6. [Time-Based Matching](#time-based-matching)
|
||||||
|
7. [Multitrack Recording Details](#multitrack-recording-details)
|
||||||
|
8. [Verified Example](#verified-example)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Core Entities Overview
|
||||||
|
|
||||||
|
### Reflector's Four Primary Entities
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Room (Reflector) │
|
||||||
|
│ - Persistent meeting template │
|
||||||
|
│ - User-created configuration │
|
||||||
|
│ - Example: "team-standup" │
|
||||||
|
└────────────────────┬────────────────────────────────────────────┘
|
||||||
|
│ 1:N
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Meeting (Reflector) │
|
||||||
|
│ - Single session instance │
|
||||||
|
│ - Creates NEW Daily.co room with timestamp │
|
||||||
|
│ - Example: "team-standup-20260115120000" │
|
||||||
|
└────────────────────┬────────────────────────────────────────────┘
|
||||||
|
│ 1:N
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Recording (Reflector + Daily.co) │
|
||||||
|
│ - One segment of audio/video │
|
||||||
|
│ - New recording created on stop/restart │
|
||||||
|
│ - track_keys: JSON array of S3 file paths │
|
||||||
|
└────────────────────┬────────────────────────────────────────────┘
|
||||||
|
│ 1:1
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Transcript (Reflector) │
|
||||||
|
│ - Processed audio with transcription │
|
||||||
|
│ - Diarization, summaries, topics │
|
||||||
|
│ - One transcript per recording │
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Daily.co vs Reflector Terminology
|
||||||
|
|
||||||
|
### Room
|
||||||
|
|
||||||
|
| Aspect | Daily.co | Reflector |
|
||||||
|
|--------|----------|-----------|
|
||||||
|
| **Definition** | Virtual meeting space on Daily.co platform | User-created meeting template/configuration |
|
||||||
|
| **Lifetime** | Configurable expiration | Persistent until user deletes |
|
||||||
|
| **Creation** | API call for each meeting | Pre-created by user once |
|
||||||
|
| **Reuse** | Can host multiple sessions | Generates new Daily.co room per meeting |
|
||||||
|
| **Name Format** | `room-name` (reusable) | `room-name` (base identifier) |
|
||||||
|
| **Timestamping** | Not required | Meeting adds timestamp: `{name}-YYYYMMDDHHMMSS` |
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
Reflector Room: "daily-private-igor" (persistent config)
|
||||||
|
↓ starts meeting
|
||||||
|
Daily.co Room: "daily-private-igor-20260110042117"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Meeting
|
||||||
|
|
||||||
|
| Aspect | Daily.co | Reflector |
|
||||||
|
|--------|----------|-----------|
|
||||||
|
| **Definition** | Session that starts when first participant joins | Explicit database record of a session |
|
||||||
|
| **Identifier** | `mtgSessionId` (generated by Daily.co) | `meeting.id` (UUID, generated by Reflector) |
|
||||||
|
| **Creation** | Implicit (first participant join) | Explicit API call before participants join |
|
||||||
|
| **Purpose** | Tracks active session state | Links recordings, transcripts, participants |
|
||||||
|
| **Scope** | Per room instance | Per Reflector room + timestamp |
|
||||||
|
|
||||||
|
**Critical Limitation:** Daily.co's recordings API often does NOT return `mtgSessionId`, requiring time-based matching (see [Time-Based Matching](#time-based-matching)).
|
||||||
|
|
||||||
|
### Recording
|
||||||
|
|
||||||
|
| Aspect | Daily.co | Reflector |
|
||||||
|
|--------|----------|-----------|
|
||||||
|
| **Definition** | Audio/video files on S3 | Metadata + processing status |
|
||||||
|
| **Types** | `cloud` (composed video), `raw-tracks` (multitrack) | Stores references + `track_keys` array |
|
||||||
|
| **Multiplicity** | One recording object per start/stop cycle | One DB row per Daily.co recording object |
|
||||||
|
| **Identifier** | Daily.co `recording_id` | Same `recording_id` (stored in DB) |
|
||||||
|
| **Multitrack** | Array of `.webm` files (one per participant) | `track_keys` JSON array with S3 paths |
|
||||||
|
| **Linkage** | Via `room_name` + `start_ts` | FK `meeting_id` (set via time-based match) |
|
||||||
|
|
||||||
|
**Critical Behavior:** Recording **stops/restarts** create **separate recording objects** with unique IDs.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Entity Relationships
|
||||||
|
|
||||||
|
### Database Schema Relationships
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Simplified schema showing key relationships
|
||||||
|
|
||||||
|
TABLE room (
|
||||||
|
id VARCHAR PRIMARY KEY,
|
||||||
|
name VARCHAR UNIQUE,
|
||||||
|
platform VARCHAR -- 'whereby' | 'daily'
|
||||||
|
)
|
||||||
|
|
||||||
|
TABLE meeting (
|
||||||
|
id VARCHAR PRIMARY KEY,
|
||||||
|
room_id VARCHAR REFERENCES room(id) ON DELETE CASCADE, -- nullable
|
||||||
|
room_name VARCHAR, -- Daily.co room name (timestamped)
|
||||||
|
start_date TIMESTAMP,
|
||||||
|
platform VARCHAR
|
||||||
|
)
|
||||||
|
|
||||||
|
TABLE recording (
|
||||||
|
id VARCHAR PRIMARY KEY, -- Daily.co recording_id
|
||||||
|
meeting_id VARCHAR, -- FK to meeting (set via time-based match)
|
||||||
|
bucket_name VARCHAR,
|
||||||
|
object_key VARCHAR, -- S3 prefix
|
||||||
|
track_keys JSON, -- Array of S3 keys for multitrack
|
||||||
|
recorded_at TIMESTAMP
|
||||||
|
)
|
||||||
|
|
||||||
|
TABLE transcript (
|
||||||
|
id VARCHAR PRIMARY KEY,
|
||||||
|
recording_id VARCHAR, -- nullable FK
|
||||||
|
meeting_id VARCHAR, -- nullable FK
|
||||||
|
room_id VARCHAR, -- nullable FK
|
||||||
|
participants JSON, -- [{id, speaker, name, user_id}, ...]
|
||||||
|
title VARCHAR,
|
||||||
|
long_summary VARCHAR,
|
||||||
|
webvtt TEXT
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Relationship Cardinalities:**
|
||||||
|
```
|
||||||
|
1 Room → N Meetings
|
||||||
|
1 Meeting → N Recordings (common: 1-21 recordings per meeting)
|
||||||
|
1 Recording → 1 Transcript
|
||||||
|
1 Meeting → N Transcripts (via recordings)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recording Multiplicity
|
||||||
|
|
||||||
|
### Why Multiple Recordings Per Meeting?
|
||||||
|
|
||||||
|
Daily.co creates a **new recording object** (new ID, new files) whenever recording stops and restarts. This happens due to:
|
||||||
|
|
||||||
|
1. **Manual stop/start** - User clicks stop, then start recording again
|
||||||
|
2. **Network reconnection** - Participant drops, reconnects → triggers restart
|
||||||
|
3. **Participant rejoin** - Last participant leaves, new one joins → new session
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Session Identifiers Explained
|
||||||
|
|
||||||
|
### The Hidden Entity: Daily.co Meeting Session
|
||||||
|
|
||||||
|
Daily.co has an **implicit ephemeral entity** that sits between Room and Recording:
|
||||||
|
|
||||||
|
```
|
||||||
|
Daily.co Room: "daily-private-igor-20260110042117"
|
||||||
|
│
|
||||||
|
├─ Daily.co Meeting Session #1 (mtgSessionId: c04334de...)
|
||||||
|
│ └─ Recording #3 (f4a50f94) - 4s, 1 track
|
||||||
|
│
|
||||||
|
└─ Daily.co Meeting Session #2 (mtgSessionId: 4cdae3c0...)
|
||||||
|
├─ Recording #2 (b0fa94da) - 80s, 2 tracks ← recording stopped
|
||||||
|
└─ Recording #1 (05edf519) - 62s, 1 track ← then restarted
|
||||||
|
```
|
||||||
|
|
||||||
|
**Daily.co Meeting Session:**
|
||||||
|
- **Lifecycle:** Starts when first participant joins, ends when last participant leaves
|
||||||
|
- **Identifier:** `mtgSessionId` (generated by Daily.co)
|
||||||
|
- **Persistence:** Ephemeral - new ID if everyone leaves and someone rejoins
|
||||||
|
- **Relationship:** 1 Session → N Recordings (if recording stops/restarts during session)
|
||||||
|
|
||||||
|
**Key Insight:** Multiple recordings can share the same `mtgSessionId` if recording was stopped and restarted while participants remained connected.
|
||||||
|
|
||||||
|
### mtgSessionId (Meeting Session Identifier)
|
||||||
|
|
||||||
|
`mtgSessionId` identifies a **Daily.co meeting session** (not individual participants, not a room).
|
||||||
|
|
||||||
|
### session_id (Per-Participant)
|
||||||
|
|
||||||
|
**Different concept:** Per-participant connection identifier from webhooks.
|
||||||
|
|
||||||
|
**Reflector Tracking:** `daily_participant_session` table
|
||||||
|
```sql
|
||||||
|
TABLE daily_participant_session (
|
||||||
|
id VARCHAR PRIMARY KEY, -- {meeting_id}:{user_id}:{joined_at_ms}
|
||||||
|
meeting_id VARCHAR,
|
||||||
|
session_id VARCHAR, -- From webhook (per-participant)
|
||||||
|
user_id VARCHAR,
|
||||||
|
user_name VARCHAR,
|
||||||
|
joined_at TIMESTAMP,
|
||||||
|
left_at TIMESTAMP
|
||||||
|
)
|
||||||
|
```
|
||||||
|
---
|
||||||
|
|
||||||
|
## Time-Based Matching
|
||||||
|
|
||||||
|
### Problem Statement
|
||||||
|
|
||||||
|
Daily.co's recordings API does not reliably return `mtgSessionId`, making it impossible to directly link recordings to meetings via Daily.co's identifiers.
|
||||||
|
|
||||||
|
**Example API response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "recording-uuid",
|
||||||
|
"room_name": "daily-private-igor-20260110042117",
|
||||||
|
"start_ts": 1768018896,
|
||||||
|
"mtgSessionId": null ← Missing!
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Solution: Time-Based Matching
|
||||||
|
|
||||||
|
**Implementation:** `reflector/db/meetings.py:get_by_room_name_and_time()`
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Multitrack Recording Details
|
||||||
|
|
||||||
|
### track_keys JSON Array
|
||||||
|
|
||||||
|
**Schema:** `recording.track_keys` (JSON, nullable)
|
||||||
|
```sql
|
||||||
|
-- Example recording with 2 audio tracks
|
||||||
|
{
|
||||||
|
"id": "b0fa94da-73b5-4f95-9239-5216a682a505",
|
||||||
|
"track_keys": [
|
||||||
|
"igormonadical/daily-private-igor-20260110042117/1768018896877-890c0eae-e186-4534-a7bd-7c794b7d6d7f-cam-audio-1768018914565",
|
||||||
|
"igormonadical/daily-private-igor-20260110042117/1768018896877-9660e8e9-4297-4f17-951d-0b2bf2401803-cam-audio-1768018899286"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Semantics:**
|
||||||
|
- `track_keys = null` → Not multitrack (cloud recording)
|
||||||
|
- `track_keys = []` → Multitrack recording with no audio captured (silence/muted)
|
||||||
|
- `track_keys = [...]` → Multitrack with N audio tracks
|
||||||
|
|
||||||
|
**Property:** `recording.is_multitrack` (Python)
|
||||||
|
```python
|
||||||
|
@property
|
||||||
|
def is_multitrack(self) -> bool:
|
||||||
|
return self.track_keys is not None and len(self.track_keys) > 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Track Filename Format
|
||||||
|
|
||||||
|
Daily.co multitrack filenames encode timing and participant information:
|
||||||
|
|
||||||
|
**Format:** `{recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}`
|
||||||
|
|
||||||
|
**Example:** `1768018896877-890c0eae-e186-4534-a7bd-7c794b7d6d7f-cam-audio-1768018914565`
|
||||||
|
|
||||||
|
**Parsed Components:**
|
||||||
|
```python
|
||||||
|
# reflector/utils/daily.py:25-60
|
||||||
|
class DailyRecordingFilename(NamedTuple):
|
||||||
|
recording_start_ts: int # 1768018896877 (milliseconds)
|
||||||
|
participant_id: str # 890c0eae-e186-4534-a7bd-7c794b7d6d7f
|
||||||
|
track_start_ts: int # 1768018914565 (milliseconds)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** Browser downloads from S3 add `.webm` extension due to MIME headers, but S3 object keys have no extension.
|
||||||
|
|
||||||
|
### Video Track Filtering
|
||||||
|
|
||||||
|
Daily.co API returns both audio and video tracks, but Reflector only processes audio.
|
||||||
|
|
||||||
|
**Filtering Logic:** `reflector/worker/process.py:660`
|
||||||
|
```python
|
||||||
|
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example API Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tracks": [
|
||||||
|
{"type": "audio", "s3Key": "...cam-audio-1768018914565"},
|
||||||
|
{"type": "audio", "s3Key": "...cam-audio-1768018899286"},
|
||||||
|
{"type": "video", "s3Key": "...cam-video-1768018897095"} ← Filtered out
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Result:** Only 2 audio tracks stored in `recording.track_keys`, video track discarded.
|
||||||
|
|
||||||
|
**Rationale:** Reflector is audio transcription system; video not needed for processing.
|
||||||
|
|
||||||
|
### Track-to-Participant Mapping
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Daily.co webhook/polling provides `track_keys` array
|
||||||
|
2. Each track filename contains `participant_id`
|
||||||
|
3. Reflector queries Daily.co API: `GET /meetings/{mtgSessionId}/participants`
|
||||||
|
4. Maps `participant_id` → `user_name`
|
||||||
|
5. Stores in `transcript.participants` JSON:
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "890c0eae-e186-4534-a7bd-7c794b7d6d7f",
|
||||||
|
"speaker": 0,
|
||||||
|
"name": "test2",
|
||||||
|
"user_id": "907f2cc1-eaab-435f-8ee2-09185f416b22"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "9660e8e9-4297-4f17-951d-0b2bf2401803",
|
||||||
|
"speaker": 1,
|
||||||
|
"name": "test",
|
||||||
|
"user_id": "907f2cc1-eaab-435f-8ee2-09185f416b22"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Diarization:** Multitrack recordings don't need speaker diarization AI — speaker identity comes from separate audio tracks.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
### Meeting: daily-private-igor-20260110042117
|
||||||
|
|
||||||
|
**Context:** User conducted test recording with start/stop cycles, producing 3 recordings.
|
||||||
|
|
||||||
|
#### Database State
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Meeting
|
||||||
|
id: 034804b8-cee2-4fb4-94d7-122f6f068a61
|
||||||
|
room_name: daily-private-igor-20260110042117
|
||||||
|
start_date: 2026-01-10 04:21:17+00
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Daily.co API Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "f4a50f94-053c-4f9d-bda6-78ad051fbc36",
|
||||||
|
"room_name": "daily-private-igor-20260110042117",
|
||||||
|
"start_ts": 1768018885,
|
||||||
|
"duration": 4,
|
||||||
|
"status": "finished",
|
||||||
|
"mtgSessionId": "c04334de-42a0-4c2a-96be-a49b068dca85",
|
||||||
|
"tracks": [
|
||||||
|
{"type": "audio", "s3Key": "...62e8f3ae...cam-audio-1768018885417"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "b0fa94da-73b5-4f95-9239-5216a682a505",
|
||||||
|
"room_name": "daily-private-igor-20260110042117",
|
||||||
|
"start_ts": 1768018896,
|
||||||
|
"duration": 80,
|
||||||
|
"status": "finished",
|
||||||
|
"mtgSessionId": "4cdae3c0-86cb-4578-8a6d-3a228bb48345",
|
||||||
|
"tracks": [
|
||||||
|
{"type": "audio", "s3Key": "...890c0eae...cam-audio-1768018914565"},
|
||||||
|
{"type": "audio", "s3Key": "...9660e8e9...cam-audio-1768018899286"},
|
||||||
|
{"type": "video", "s3Key": "...9660e8e9...cam-video-1768018897095"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "05edf519-9048-4b49-9a75-73e9826fd950",
|
||||||
|
"room_name": "daily-private-igor-20260110042117",
|
||||||
|
"start_ts": 1768018914,
|
||||||
|
"duration": 62,
|
||||||
|
"status": "finished",
|
||||||
|
"mtgSessionId": "4cdae3c0-86cb-4578-8a6d-3a228bb48345",
|
||||||
|
"tracks": [
|
||||||
|
{"type": "audio", "s3Key": "...890c0eae...cam-audio-1768018914948"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key Observations:**
|
||||||
|
- 3 recording objects returned by Daily.co
|
||||||
|
- 2 different `mtgSessionId` values (2 different meeting instances)
|
||||||
|
- Recording #2 has 3 tracks (2 audio + 1 video)
|
||||||
|
- Timestamps: 1768018885 → 1768018896 (+11s) → 1768018914 (+18s)
|
||||||
|
|
||||||
|
#### Reflector Database
|
||||||
|
|
||||||
|
**Recordings:**
|
||||||
|
```
|
||||||
|
┌──────────────────────────────────────┬──────────────┬────────────┬──────────────────────────────────────┐
|
||||||
|
│ id │ track_count │ duration │ mtgSessionId │
|
||||||
|
├──────────────────────────────────────┼──────────────┼────────────┼──────────────────────────────────────┤
|
||||||
|
│ f4a50f94-053c-4f9d-bda6-78ad051fbc36 │ 1 │ 4s │ c04334de-42a0-4c2a-96be-a49b068dca85 │
|
||||||
|
│ b0fa94da-73b5-4f95-9239-5216a682a505 │ 2 (video=0) │ 80s │ 4cdae3c0-86cb-4578-8a6d-3a228bb48345 │
|
||||||
|
│ 05edf519-9048-4b49-9a75-73e9826fd950 │ 1 │ 62s │ 4cdae3c0-86cb-4578-8a6d-3a228bb48345 │
|
||||||
|
└──────────────────────────────────────┴──────────────┴────────────┴──────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
**Note:** Recording #2 has 2 audio tracks (video filtered out), not 3.
|
||||||
|
|
||||||
|
**Transcripts:**
|
||||||
|
```
|
||||||
|
┌──────────────────────────────────────┬──────────────────────────────────────┬──────────────┬──────────────────────────────────────────────┐
|
||||||
|
│ id │ recording_id │ participants │ title │
|
||||||
|
├──────────────────────────────────────┼──────────────────────────────────────┼──────────────┼──────────────────────────────────────────────┤
|
||||||
|
│ 17149b1f-546c-4837-80a0-f8140bd16592 │ f4a50f94-053c-4f9d-bda6-78ad051fbc36 │ 1 (test) │ (empty - no speech) │
|
||||||
|
│ 49801332-3222-4c11-bdb2-375479fc87f2 │ b0fa94da-73b5-4f95-9239-5216a682a505 │ 2 (test, │ "Examination and Validation Procedures │
|
||||||
|
│ │ │ test2) │ Review" │
|
||||||
|
│ e5271e12-20fb-42d2-b5a8-21438abadef9 │ 05edf519-9048-4b49-9a75-73e9826fd950 │ 1 (test2) │ "Technical Sound Check Procedure Review" │
|
||||||
|
└──────────────────────────────────────┴──────────────────────────────────────┴──────────────┴──────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Transcript Content:**
|
||||||
|
|
||||||
|
*Transcript #1* (17149b1f): Empty WebVTT (no audio captured)
|
||||||
|
|
||||||
|
*Transcript #2* (49801332):
|
||||||
|
```webvtt
|
||||||
|
WEBVTT
|
||||||
|
|
||||||
|
00:00:03.109 --> 00:00:05.589
|
||||||
|
<v Speaker1>Test, test, test. Test, test, test, test, test.
|
||||||
|
|
||||||
|
00:00:19.829 --> 00:00:22.710
|
||||||
|
<v Speaker0>Test test test test test test test test test test test.
|
||||||
|
```
|
||||||
|
**AI-Generated Summary:**
|
||||||
|
> "The meeting focused on the critical importance of rigorous testing for ensuring reliability and quality, with test and test2 emphasizing the need for a structured testing framework and meticulous documentation..."
|
||||||
|
|
||||||
|
*Transcript #3* (e5271e12):
|
||||||
|
```webvtt
|
||||||
|
WEBVTT
|
||||||
|
|
||||||
|
00:00:02.029 --> 00:00:04.910
|
||||||
|
<v Speaker0>Test, test, test, test, test, test, test, test, test, test, test.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Validation: track_keys → participants
|
||||||
|
|
||||||
|
**Recording #2 (b0fa94da) tracks:**
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
".../890c0eae-e186-4534-a7bd-7c794b7d6d7f-cam-audio-...",
|
||||||
|
".../9660e8e9-4297-4f17-951d-0b2bf2401803-cam-audio-..."
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Transcript #2 (49801332) participants:**
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{"id": "890c0eae-e186-4534-a7bd-7c794b7d6d7f", "speaker": 0, "name": "test2"},
|
||||||
|
{"id": "9660e8e9-4297-4f17-951d-0b2bf2401803", "speaker": 1, "name": "test"}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Daily.co API: 3 recordings
|
||||||
|
↓
|
||||||
|
Polling: _poll_raw_tracks_recordings()
|
||||||
|
↓
|
||||||
|
Worker: process_multitrack_recording.delay() × 3
|
||||||
|
↓
|
||||||
|
DB: 3 recording rows created
|
||||||
|
↓
|
||||||
|
Pipeline: Audio processing + transcription × 3
|
||||||
|
↓
|
||||||
|
DB: 3 transcript rows created (1:1 with recordings)
|
||||||
|
↓
|
||||||
|
UI: User sees 3 separate transcripts
|
||||||
|
```
|
||||||
|
|
||||||
|
**Result:** ✅ 1:1 Recording → Transcript relationship maintained.
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
**Document Version:** 1.0
|
||||||
|
**Last Verified:** 2026-01-15
|
||||||
|
**Data Source:** Production database + Daily.co API inspection
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
"""add cloud recording support
|
||||||
|
|
||||||
|
Revision ID: 1b1e6a6fc465
|
||||||
|
Revises: bd3a729bb379
|
||||||
|
Create Date: 2026-01-09 17:17:33.535620
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "1b1e6a6fc465"
|
||||||
|
down_revision: Union[str, None] = "bd3a729bb379"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table("meeting", schema=None) as batch_op:
|
||||||
|
batch_op.add_column(
|
||||||
|
sa.Column("daily_composed_video_s3_key", sa.String(), nullable=True)
|
||||||
|
)
|
||||||
|
batch_op.add_column(
|
||||||
|
sa.Column("daily_composed_video_duration", sa.Integer(), nullable=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table("meeting", schema=None) as batch_op:
|
||||||
|
batch_op.drop_column("daily_composed_video_duration")
|
||||||
|
batch_op.drop_column("daily_composed_video_s3_key")
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
"""drop_use_celery_column
|
||||||
|
|
||||||
|
Revision ID: 3aa20b96d963
|
||||||
|
Revises: e69f08ead8ea
|
||||||
|
Create Date: 2026-02-05 10:12:44.065279
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "3aa20b96d963"
|
||||||
|
down_revision: Union[str, None] = "e69f08ead8ea"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
with op.batch_alter_table("room", schema=None) as batch_op:
|
||||||
|
batch_op.drop_column("use_celery")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
with op.batch_alter_table("room", schema=None) as batch_op:
|
||||||
|
batch_op.add_column(
|
||||||
|
sa.Column(
|
||||||
|
"use_celery",
|
||||||
|
sa.Boolean(),
|
||||||
|
server_default=sa.text("false"),
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
"""merge cloud recording and celery heads
|
||||||
|
|
||||||
|
Revision ID: e69f08ead8ea
|
||||||
|
Revises: 1b1e6a6fc465, 80beb1ea3269
|
||||||
|
Create Date: 2026-01-21 21:39:10.326841
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "e69f08ead8ea"
|
||||||
|
down_revision: Union[str, None] = ("1b1e6a6fc465", "80beb1ea3269")
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
pass
|
||||||
@@ -8,7 +8,7 @@ readme = "README.md"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"aiohttp>=3.9.0",
|
"aiohttp>=3.9.0",
|
||||||
"aiohttp-cors>=0.7.0",
|
"aiohttp-cors>=0.7.0",
|
||||||
"av>=10.0.0",
|
"av>=15.0.0",
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"aiortc>=1.5.0",
|
"aiortc>=1.5.0",
|
||||||
"sortedcontainers>=2.4.0",
|
"sortedcontainers>=2.4.0",
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ Daily.co API Module
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Client
|
# Client
|
||||||
from .client import DailyApiClient, DailyApiError
|
from .client import DailyApiClient, DailyApiError, RecordingType
|
||||||
|
|
||||||
# Request models
|
# Request models
|
||||||
from .requests import (
|
from .requests import (
|
||||||
@@ -64,6 +64,7 @@ __all__ = [
|
|||||||
# Client
|
# Client
|
||||||
"DailyApiClient",
|
"DailyApiClient",
|
||||||
"DailyApiError",
|
"DailyApiError",
|
||||||
|
"RecordingType",
|
||||||
# Requests
|
# Requests
|
||||||
"CreateRoomRequest",
|
"CreateRoomRequest",
|
||||||
"RoomProperties",
|
"RoomProperties",
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ Reference: https://docs.daily.co/reference/rest-api
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from typing import Any
|
from typing import Any, Literal
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import structlog
|
import structlog
|
||||||
@@ -32,6 +33,8 @@ from .responses import (
|
|||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
|
RecordingType = Literal["cloud", "raw-tracks"]
|
||||||
|
|
||||||
|
|
||||||
class DailyApiError(Exception):
|
class DailyApiError(Exception):
|
||||||
"""Daily.co API error with full request/response context."""
|
"""Daily.co API error with full request/response context."""
|
||||||
@@ -395,6 +398,38 @@ class DailyApiClient:
|
|||||||
|
|
||||||
return [RecordingResponse(**r) for r in data["data"]]
|
return [RecordingResponse(**r) for r in data["data"]]
|
||||||
|
|
||||||
|
async def start_recording(
|
||||||
|
self,
|
||||||
|
room_name: NonEmptyString,
|
||||||
|
recording_type: RecordingType,
|
||||||
|
instance_id: UUID,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Start recording via REST API.
|
||||||
|
|
||||||
|
Reference: https://docs.daily.co/reference/rest-api/rooms/recordings/start
|
||||||
|
|
||||||
|
Args:
|
||||||
|
room_name: Daily.co room name
|
||||||
|
recording_type: Recording type
|
||||||
|
instance_id: UUID for this recording session
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Recording start confirmation from Daily.co API
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
DailyApiError: If API request fails
|
||||||
|
"""
|
||||||
|
client = await self._get_client()
|
||||||
|
response = await client.post(
|
||||||
|
f"{self.base_url}/rooms/{room_name}/recordings/start",
|
||||||
|
headers=self.headers,
|
||||||
|
json={
|
||||||
|
"type": recording_type,
|
||||||
|
"instanceId": str(instance_id),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return await self._handle_response(response, "start_recording")
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# MEETING TOKENS
|
# MEETING TOKENS
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|||||||
37
server/reflector/dailyco_api/instance_id.py
Normal file
37
server/reflector/dailyco_api/instance_id.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
"""
|
||||||
|
Daily.co recording instanceId generation utilities.
|
||||||
|
|
||||||
|
Deterministic instance ID generation for cloud and raw-tracks recordings.
|
||||||
|
MUST match frontend logic
|
||||||
|
"""
|
||||||
|
|
||||||
|
from uuid import UUID, uuid5
|
||||||
|
|
||||||
|
from reflector.utils.string import NonEmptyString
|
||||||
|
|
||||||
|
# Namespace UUID for UUIDv5 generation of raw-tracks instanceIds
|
||||||
|
# DO NOT CHANGE: Breaks instanceId determinism across deployments and frontend/backend matching
|
||||||
|
RAW_TRACKS_NAMESPACE = UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_cloud_instance_id(meeting_id: NonEmptyString) -> UUID:
|
||||||
|
"""
|
||||||
|
Generate instanceId for cloud recording.
|
||||||
|
|
||||||
|
Cloud recordings use meeting ID directly as instanceId.
|
||||||
|
This ensures each meeting has one unique cloud recording.
|
||||||
|
"""
|
||||||
|
return UUID(meeting_id)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_raw_tracks_instance_id(meeting_id: NonEmptyString) -> UUID:
|
||||||
|
"""
|
||||||
|
Generate instanceId for raw-tracks recording.
|
||||||
|
|
||||||
|
Raw-tracks recordings use UUIDv5(meeting_id, namespace) to ensure
|
||||||
|
different instanceId from cloud while remaining deterministic.
|
||||||
|
|
||||||
|
Daily.co requires cloud and raw-tracks to have different instanceIds
|
||||||
|
for concurrent recording.
|
||||||
|
"""
|
||||||
|
return uuid5(RAW_TRACKS_NAMESPACE, meeting_id)
|
||||||
@@ -88,13 +88,6 @@ class MeetingTokenProperties(BaseModel):
|
|||||||
is_owner: bool = Field(
|
is_owner: bool = Field(
|
||||||
default=False, description="Grant owner privileges to token holder"
|
default=False, description="Grant owner privileges to token holder"
|
||||||
)
|
)
|
||||||
start_cloud_recording: bool = Field(
|
|
||||||
default=False, description="Automatically start cloud recording on join"
|
|
||||||
)
|
|
||||||
start_cloud_recording_opts: dict | None = Field(
|
|
||||||
default=None,
|
|
||||||
description="Options for startRecording when start_cloud_recording is true (e.g., maxDuration)",
|
|
||||||
)
|
|
||||||
enable_recording_ui: bool = Field(
|
enable_recording_ui: bool = Field(
|
||||||
default=True, description="Show recording controls in UI"
|
default=True, description="Show recording controls in UI"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -116,6 +116,7 @@ class RecordingS3Info(BaseModel):
|
|||||||
|
|
||||||
bucket_name: NonEmptyString
|
bucket_name: NonEmptyString
|
||||||
bucket_region: NonEmptyString
|
bucket_region: NonEmptyString
|
||||||
|
key: NonEmptyString | None = None
|
||||||
endpoint: NonEmptyString | None = None
|
endpoint: NonEmptyString | None = None
|
||||||
|
|
||||||
|
|
||||||
@@ -132,6 +133,9 @@ class RecordingResponse(BaseModel):
|
|||||||
id: NonEmptyString = Field(description="Recording identifier")
|
id: NonEmptyString = Field(description="Recording identifier")
|
||||||
room_name: NonEmptyString = Field(description="Room where recording occurred")
|
room_name: NonEmptyString = Field(description="Room where recording occurred")
|
||||||
start_ts: int = Field(description="Recording start timestamp (Unix epoch seconds)")
|
start_ts: int = Field(description="Recording start timestamp (Unix epoch seconds)")
|
||||||
|
type: Literal["cloud", "raw-tracks"] | None = Field(
|
||||||
|
None, description="Recording type (may be missing from API)"
|
||||||
|
)
|
||||||
status: RecordingStatus = Field(
|
status: RecordingStatus = Field(
|
||||||
description="Recording status ('in-progress' or 'finished')"
|
description="Recording status ('in-progress' or 'finished')"
|
||||||
)
|
)
|
||||||
@@ -145,6 +149,9 @@ class RecordingResponse(BaseModel):
|
|||||||
None, description="Token for sharing recording"
|
None, description="Token for sharing recording"
|
||||||
)
|
)
|
||||||
s3: RecordingS3Info | None = Field(None, description="S3 bucket information")
|
s3: RecordingS3Info | None = Field(None, description="S3 bucket information")
|
||||||
|
s3key: NonEmptyString | None = Field(
|
||||||
|
None, description="S3 key for cloud recordings (top-level field)"
|
||||||
|
)
|
||||||
tracks: list[DailyTrack] = Field(
|
tracks: list[DailyTrack] = Field(
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
description="Track list for raw-tracks recordings (always array, never null)",
|
description="Track list for raw-tracks recordings (always array, never null)",
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
@@ -9,7 +9,7 @@ from reflector.db import get_database, metadata
|
|||||||
from reflector.db.rooms import Room
|
from reflector.db.rooms import Room
|
||||||
from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
|
from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
|
||||||
from reflector.utils import generate_uuid4
|
from reflector.utils import generate_uuid4
|
||||||
from reflector.utils.string import assert_equal
|
from reflector.utils.string import NonEmptyString, assert_equal
|
||||||
|
|
||||||
meetings = sa.Table(
|
meetings = sa.Table(
|
||||||
"meeting",
|
"meeting",
|
||||||
@@ -63,6 +63,9 @@ meetings = sa.Table(
|
|||||||
nullable=False,
|
nullable=False,
|
||||||
server_default=assert_equal(WHEREBY_PLATFORM, "whereby"),
|
server_default=assert_equal(WHEREBY_PLATFORM, "whereby"),
|
||||||
),
|
),
|
||||||
|
# Daily.co composed video (Brady Bunch grid layout) - Daily.co only, not Whereby
|
||||||
|
sa.Column("daily_composed_video_s3_key", sa.String, nullable=True),
|
||||||
|
sa.Column("daily_composed_video_duration", sa.Integer, nullable=True),
|
||||||
sa.Index("idx_meeting_room_id", "room_id"),
|
sa.Index("idx_meeting_room_id", "room_id"),
|
||||||
sa.Index("idx_meeting_calendar_event", "calendar_event_id"),
|
sa.Index("idx_meeting_calendar_event", "calendar_event_id"),
|
||||||
)
|
)
|
||||||
@@ -110,6 +113,9 @@ class Meeting(BaseModel):
|
|||||||
calendar_event_id: str | None = None
|
calendar_event_id: str | None = None
|
||||||
calendar_metadata: dict[str, Any] | None = None
|
calendar_metadata: dict[str, Any] | None = None
|
||||||
platform: Platform = WHEREBY_PLATFORM
|
platform: Platform = WHEREBY_PLATFORM
|
||||||
|
# Daily.co composed video (Brady Bunch grid) - Daily.co only
|
||||||
|
daily_composed_video_s3_key: str | None = None
|
||||||
|
daily_composed_video_duration: int | None = None
|
||||||
|
|
||||||
|
|
||||||
class MeetingController:
|
class MeetingController:
|
||||||
@@ -171,6 +177,90 @@ class MeetingController:
|
|||||||
return None
|
return None
|
||||||
return Meeting(**result)
|
return Meeting(**result)
|
||||||
|
|
||||||
|
async def get_by_room_name_all(self, room_name: str) -> list[Meeting]:
|
||||||
|
"""Get all meetings for a room name (not just most recent)."""
|
||||||
|
query = meetings.select().where(meetings.c.room_name == room_name)
|
||||||
|
results = await get_database().fetch_all(query)
|
||||||
|
return [Meeting(**r) for r in results]
|
||||||
|
|
||||||
|
async def get_by_room_name_and_time(
|
||||||
|
self,
|
||||||
|
room_name: NonEmptyString,
|
||||||
|
recording_start: datetime,
|
||||||
|
time_window_hours: int = 168,
|
||||||
|
) -> Meeting | None:
|
||||||
|
"""
|
||||||
|
Get meeting by room name closest to recording timestamp.
|
||||||
|
|
||||||
|
HACK ALERT: Daily.co doesn't return instanceId in recordings API response,
|
||||||
|
and mtgSessionId is separate from our instanceId. Time-based matching is
|
||||||
|
the least-bad workaround.
|
||||||
|
|
||||||
|
This handles edge case of duplicate room_name values in DB (race conditions,
|
||||||
|
double-clicks, etc.) by matching based on temporal proximity.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
1. Find meetings within time_window_hours of recording_start
|
||||||
|
2. Return meeting with start_date closest to recording_start
|
||||||
|
3. If tie, return first by meeting.id (deterministic)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
room_name: Daily.co room name from recording
|
||||||
|
recording_start: Timezone-aware datetime from recording.start_ts
|
||||||
|
time_window_hours: Search window (default 168 = 1 week)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Meeting closest to recording timestamp, or None if no matches
|
||||||
|
|
||||||
|
Failure modes:
|
||||||
|
- Multiple meetings in same room within ~5 minutes: picks closest
|
||||||
|
- All meetings outside time window: returns None
|
||||||
|
- Clock skew between Daily.co and DB: 1-week window tolerates this
|
||||||
|
|
||||||
|
Why 1 week window:
|
||||||
|
- Handles webhook failures (recording discovered days later)
|
||||||
|
- Tolerates clock skew
|
||||||
|
- Rejects unrelated meetings from weeks ago
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Validate timezone-aware datetime
|
||||||
|
if recording_start.tzinfo is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"recording_start must be timezone-aware, got naive datetime: {recording_start}"
|
||||||
|
)
|
||||||
|
|
||||||
|
window_start = recording_start - timedelta(hours=time_window_hours)
|
||||||
|
window_end = recording_start + timedelta(hours=time_window_hours)
|
||||||
|
|
||||||
|
query = (
|
||||||
|
meetings.select()
|
||||||
|
.where(
|
||||||
|
sa.and_(
|
||||||
|
meetings.c.room_name == room_name,
|
||||||
|
meetings.c.start_date >= window_start,
|
||||||
|
meetings.c.start_date <= window_end,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.order_by(meetings.c.start_date)
|
||||||
|
)
|
||||||
|
|
||||||
|
results = await get_database().fetch_all(query)
|
||||||
|
if not results:
|
||||||
|
return None
|
||||||
|
|
||||||
|
candidates = [Meeting(**r) for r in results]
|
||||||
|
|
||||||
|
# Find meeting with start_date closest to recording_start
|
||||||
|
closest = min(
|
||||||
|
candidates,
|
||||||
|
key=lambda m: (
|
||||||
|
abs((m.start_date - recording_start).total_seconds()),
|
||||||
|
m.id, # Tie-breaker: deterministic by UUID
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
return closest
|
||||||
|
|
||||||
async def get_active(self, room: Room, current_time: datetime) -> Meeting | None:
|
async def get_active(self, room: Room, current_time: datetime) -> Meeting | None:
|
||||||
"""
|
"""
|
||||||
Get latest active meeting for a room.
|
Get latest active meeting for a room.
|
||||||
@@ -260,6 +350,44 @@ class MeetingController:
|
|||||||
query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs)
|
query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs)
|
||||||
await get_database().execute(query)
|
await get_database().execute(query)
|
||||||
|
|
||||||
|
async def set_cloud_recording_if_missing(
|
||||||
|
self,
|
||||||
|
meeting_id: NonEmptyString,
|
||||||
|
s3_key: NonEmptyString,
|
||||||
|
duration: int,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Set cloud recording only if not already set.
|
||||||
|
|
||||||
|
Returns True if updated, False if already set.
|
||||||
|
Prevents webhook/polling race condition via atomic WHERE clause.
|
||||||
|
"""
|
||||||
|
# Check current value before update to detect actual change
|
||||||
|
meeting_before = await self.get_by_id(meeting_id)
|
||||||
|
if not meeting_before:
|
||||||
|
return False
|
||||||
|
|
||||||
|
was_null = meeting_before.daily_composed_video_s3_key is None
|
||||||
|
|
||||||
|
query = (
|
||||||
|
meetings.update()
|
||||||
|
.where(
|
||||||
|
sa.and_(
|
||||||
|
meetings.c.id == meeting_id,
|
||||||
|
meetings.c.daily_composed_video_s3_key.is_(None),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.values(
|
||||||
|
daily_composed_video_s3_key=s3_key,
|
||||||
|
daily_composed_video_duration=duration,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await get_database().execute(query)
|
||||||
|
|
||||||
|
# Return True only if value was NULL before (actual update occurred)
|
||||||
|
# If was_null=False, the WHERE clause prevented the update
|
||||||
|
return was_null
|
||||||
|
|
||||||
async def increment_num_clients(self, meeting_id: str) -> None:
|
async def increment_num_clients(self, meeting_id: str) -> None:
|
||||||
"""Atomically increment participant count."""
|
"""Atomically increment participant count."""
|
||||||
query = (
|
query = (
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from sqlalchemy import or_
|
|||||||
|
|
||||||
from reflector.db import get_database, metadata
|
from reflector.db import get_database, metadata
|
||||||
from reflector.utils import generate_uuid4
|
from reflector.utils import generate_uuid4
|
||||||
|
from reflector.utils.string import NonEmptyString
|
||||||
|
|
||||||
recordings = sa.Table(
|
recordings = sa.Table(
|
||||||
"recording",
|
"recording",
|
||||||
@@ -71,6 +72,19 @@ class RecordingController:
|
|||||||
query = recordings.delete().where(recordings.c.id == id)
|
query = recordings.delete().where(recordings.c.id == id)
|
||||||
await get_database().execute(query)
|
await get_database().execute(query)
|
||||||
|
|
||||||
|
async def set_meeting_id(
|
||||||
|
self,
|
||||||
|
recording_id: NonEmptyString,
|
||||||
|
meeting_id: NonEmptyString,
|
||||||
|
) -> None:
|
||||||
|
"""Link recording to meeting."""
|
||||||
|
query = (
|
||||||
|
recordings.update()
|
||||||
|
.where(recordings.c.id == recording_id)
|
||||||
|
.values(meeting_id=meeting_id)
|
||||||
|
)
|
||||||
|
await get_database().execute(query)
|
||||||
|
|
||||||
# no check for existence
|
# no check for existence
|
||||||
async def get_by_ids(self, recording_ids: list[str]) -> list[Recording]:
|
async def get_by_ids(self, recording_ids: list[str]) -> list[Recording]:
|
||||||
if not recording_ids:
|
if not recording_ids:
|
||||||
|
|||||||
@@ -57,12 +57,6 @@ rooms = sqlalchemy.Table(
|
|||||||
sqlalchemy.String,
|
sqlalchemy.String,
|
||||||
nullable=False,
|
nullable=False,
|
||||||
),
|
),
|
||||||
sqlalchemy.Column(
|
|
||||||
"use_celery",
|
|
||||||
sqlalchemy.Boolean,
|
|
||||||
nullable=False,
|
|
||||||
server_default=false(),
|
|
||||||
),
|
|
||||||
sqlalchemy.Column(
|
sqlalchemy.Column(
|
||||||
"skip_consent",
|
"skip_consent",
|
||||||
sqlalchemy.Boolean,
|
sqlalchemy.Boolean,
|
||||||
@@ -97,7 +91,6 @@ class Room(BaseModel):
|
|||||||
ics_last_sync: datetime | None = None
|
ics_last_sync: datetime | None = None
|
||||||
ics_last_etag: str | None = None
|
ics_last_etag: str | None = None
|
||||||
platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
|
platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
|
||||||
use_celery: bool = False
|
|
||||||
skip_consent: bool = False
|
skip_consent: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Search functionality for transcripts and other entities."""
|
"""Search functionality for transcripts and other entities."""
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
@@ -172,6 +173,9 @@ class SearchResult(BaseModel):
|
|||||||
total_match_count: NonNegativeInt = Field(
|
total_match_count: NonNegativeInt = Field(
|
||||||
default=0, description="Total number of matches found in the transcript"
|
default=0, description="Total number of matches found in the transcript"
|
||||||
)
|
)
|
||||||
|
dag_status: list[dict] | None = Field(
|
||||||
|
default=None, description="Latest DAG task status for processing transcripts"
|
||||||
|
)
|
||||||
|
|
||||||
@field_serializer("created_at", when_used="json")
|
@field_serializer("created_at", when_used="json")
|
||||||
def serialize_datetime(self, dt: datetime) -> str:
|
def serialize_datetime(self, dt: datetime) -> str:
|
||||||
@@ -328,6 +332,42 @@ class SnippetGenerator:
|
|||||||
return summary_snippets + webvtt_snippets, total_matches
|
return summary_snippets + webvtt_snippets, total_matches
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_dag_statuses(transcript_ids: list[str]) -> dict[str, list[dict]]:
|
||||||
|
"""Fetch latest DAG_STATUS event data for given transcript IDs.
|
||||||
|
|
||||||
|
Returns dict mapping transcript_id -> tasks list from the last DAG_STATUS event.
|
||||||
|
"""
|
||||||
|
if not transcript_ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
db = get_database()
|
||||||
|
query = sqlalchemy.select(
|
||||||
|
[
|
||||||
|
transcripts.c.id,
|
||||||
|
transcripts.c.events,
|
||||||
|
]
|
||||||
|
).where(transcripts.c.id.in_(transcript_ids))
|
||||||
|
|
||||||
|
rows = await db.fetch_all(query)
|
||||||
|
result: dict[str, list[dict]] = {}
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
events_raw = row["events"]
|
||||||
|
if not events_raw:
|
||||||
|
continue
|
||||||
|
# events is stored as JSON list
|
||||||
|
events = events_raw if isinstance(events_raw, list) else json.loads(events_raw)
|
||||||
|
# Find last DAG_STATUS event
|
||||||
|
for ev in reversed(events):
|
||||||
|
if isinstance(ev, dict) and ev.get("event") == "DAG_STATUS":
|
||||||
|
tasks = ev.get("data", {}).get("tasks")
|
||||||
|
if tasks:
|
||||||
|
result[row["id"]] = tasks
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
class SearchController:
|
class SearchController:
|
||||||
"""Controller for search operations across different entities."""
|
"""Controller for search operations across different entities."""
|
||||||
|
|
||||||
@@ -470,6 +510,14 @@ class SearchController:
|
|||||||
logger.error(f"Error processing search results: {e}", exc_info=True)
|
logger.error(f"Error processing search results: {e}", exc_info=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
# Enrich processing transcripts with DAG status
|
||||||
|
processing_ids = [r.id for r in results if r.status == "processing"]
|
||||||
|
if processing_ids:
|
||||||
|
dag_statuses = await _fetch_dag_statuses(processing_ids)
|
||||||
|
for r in results:
|
||||||
|
if r.id in dag_statuses:
|
||||||
|
r.dag_status = dag_statuses[r.id]
|
||||||
|
|
||||||
return results, total
|
return results, total
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -234,7 +234,7 @@ class Transcript(BaseModel):
|
|||||||
return dt.isoformat()
|
return dt.isoformat()
|
||||||
|
|
||||||
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
|
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
|
||||||
ev = TranscriptEvent(event=event, data=data.model_dump())
|
ev = TranscriptEvent(event=event, data=data.model_dump(mode="json"))
|
||||||
self.events.append(ev)
|
self.events.append(ev)
|
||||||
return ev
|
return ev
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from reflector.utils.string import NonEmptyString
|
|||||||
from reflector.ws_manager import get_ws_manager
|
from reflector.ws_manager import get_ws_manager
|
||||||
|
|
||||||
# Events that should also be sent to user room (matches Celery behavior)
|
# Events that should also be sent to user room (matches Celery behavior)
|
||||||
USER_ROOM_EVENTS = {"STATUS", "FINAL_TITLE", "DURATION"}
|
USER_ROOM_EVENTS = {"STATUS", "FINAL_TITLE", "DURATION", "DAG_STATUS"}
|
||||||
|
|
||||||
|
|
||||||
async def broadcast_event(
|
async def broadcast_event(
|
||||||
|
|||||||
@@ -35,7 +35,9 @@ LLM_RATE_LIMIT_PER_SECOND = 10
|
|||||||
|
|
||||||
# Task execution timeouts (seconds)
|
# Task execution timeouts (seconds)
|
||||||
TIMEOUT_SHORT = 60 # Quick operations: API calls, DB updates
|
TIMEOUT_SHORT = 60 # Quick operations: API calls, DB updates
|
||||||
TIMEOUT_MEDIUM = 120 # Single LLM calls, waveform generation
|
TIMEOUT_MEDIUM = (
|
||||||
|
300 # Single LLM calls, waveform generation (5m for slow LLM responses)
|
||||||
|
)
|
||||||
TIMEOUT_LONG = 180 # Action items (larger context LLM)
|
TIMEOUT_LONG = 180 # Action items (larger context LLM)
|
||||||
TIMEOUT_AUDIO = 300 # Audio processing: padding, mixdown
|
TIMEOUT_AUDIO = 720 # Audio processing: padding, mixdown
|
||||||
TIMEOUT_HEAVY = 600 # Transcription, fan-out LLM tasks
|
TIMEOUT_HEAVY = 600 # Transcription, fan-out LLM tasks
|
||||||
|
|||||||
230
server/reflector/hatchet/dag_progress.py
Normal file
230
server/reflector/hatchet/dag_progress.py
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
"""
|
||||||
|
DAG Progress Reporting — models and transform.
|
||||||
|
|
||||||
|
Converts Hatchet V1WorkflowRunDetails into structured DagTask list
|
||||||
|
for frontend WebSocket/REST consumption.
|
||||||
|
|
||||||
|
Ported from render_hatchet_run.py (feat-dag-zulip) which renders markdown;
|
||||||
|
this module produces structured Pydantic models instead.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from enum import StrEnum
|
||||||
|
|
||||||
|
from hatchet_sdk.clients.rest.models import (
|
||||||
|
V1TaskStatus,
|
||||||
|
V1WorkflowRunDetails,
|
||||||
|
WorkflowRunShapeItemForWorkflowRunDetails,
|
||||||
|
)
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class DagTaskStatus(StrEnum):
|
||||||
|
QUEUED = "queued"
|
||||||
|
RUNNING = "running"
|
||||||
|
COMPLETED = "completed"
|
||||||
|
FAILED = "failed"
|
||||||
|
CANCELLED = "cancelled"
|
||||||
|
|
||||||
|
|
||||||
|
_HATCHET_TO_DAG_STATUS: dict[V1TaskStatus, DagTaskStatus] = {
|
||||||
|
V1TaskStatus.QUEUED: DagTaskStatus.QUEUED,
|
||||||
|
V1TaskStatus.RUNNING: DagTaskStatus.RUNNING,
|
||||||
|
V1TaskStatus.COMPLETED: DagTaskStatus.COMPLETED,
|
||||||
|
V1TaskStatus.FAILED: DagTaskStatus.FAILED,
|
||||||
|
V1TaskStatus.CANCELLED: DagTaskStatus.CANCELLED,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DagTask(BaseModel):
|
||||||
|
name: str
|
||||||
|
status: DagTaskStatus
|
||||||
|
started_at: datetime | None
|
||||||
|
finished_at: datetime | None
|
||||||
|
duration_seconds: float | None
|
||||||
|
parents: list[str]
|
||||||
|
error: str | None
|
||||||
|
children_total: int | None
|
||||||
|
children_completed: int | None
|
||||||
|
progress_pct: float | None
|
||||||
|
|
||||||
|
|
||||||
|
class DagStatusData(BaseModel):
|
||||||
|
workflow_run_id: str
|
||||||
|
tasks: list[DagTask]
|
||||||
|
|
||||||
|
|
||||||
|
def _topo_sort(
|
||||||
|
shape: list[WorkflowRunShapeItemForWorkflowRunDetails],
|
||||||
|
) -> list[str]:
|
||||||
|
"""Topological sort of step_ids from shape DAG (Kahn's algorithm).
|
||||||
|
|
||||||
|
Ported from render_hatchet_run.py.
|
||||||
|
"""
|
||||||
|
step_ids = {s.step_id for s in shape}
|
||||||
|
children_map: dict[str, list[str]] = {}
|
||||||
|
in_degree: dict[str, int] = {sid: 0 for sid in step_ids}
|
||||||
|
|
||||||
|
for s in shape:
|
||||||
|
children = [c for c in (s.children_step_ids or []) if c in step_ids]
|
||||||
|
children_map[s.step_id] = children
|
||||||
|
for c in children:
|
||||||
|
in_degree[c] += 1
|
||||||
|
|
||||||
|
queue = sorted(sid for sid, deg in in_degree.items() if deg == 0)
|
||||||
|
result: list[str] = []
|
||||||
|
while queue:
|
||||||
|
node = queue.pop(0)
|
||||||
|
result.append(node)
|
||||||
|
for c in children_map.get(node, []):
|
||||||
|
in_degree[c] -= 1
|
||||||
|
if in_degree[c] == 0:
|
||||||
|
queue.append(c)
|
||||||
|
queue.sort()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_error_summary(error_message: str | None) -> str | None:
|
||||||
|
"""Extract first meaningful line from error message, skipping traceback frames."""
|
||||||
|
if not error_message or not error_message.strip():
|
||||||
|
return None
|
||||||
|
|
||||||
|
err_lines = error_message.strip().split("\n")
|
||||||
|
err_summary = err_lines[0]
|
||||||
|
for line in err_lines:
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped and not stripped.startswith(("Traceback", "File ", "{", ")")):
|
||||||
|
err_summary = stripped
|
||||||
|
return err_summary
|
||||||
|
|
||||||
|
|
||||||
|
def extract_dag_tasks(details: V1WorkflowRunDetails) -> list[DagTask]:
|
||||||
|
"""Extract structured DagTask list from Hatchet workflow run details.
|
||||||
|
|
||||||
|
Returns tasks in topological order with status, timestamps, parents,
|
||||||
|
error summaries, and fan-out children counts.
|
||||||
|
"""
|
||||||
|
shape = details.shape or []
|
||||||
|
tasks = details.tasks or []
|
||||||
|
|
||||||
|
if not shape:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Build lookups
|
||||||
|
step_to_shape: dict[str, WorkflowRunShapeItemForWorkflowRunDetails] = {
|
||||||
|
s.step_id: s for s in shape
|
||||||
|
}
|
||||||
|
step_to_name: dict[str, str] = {s.step_id: s.task_name for s in shape}
|
||||||
|
|
||||||
|
# Reverse edges: child -> parent names
|
||||||
|
parents_by_step: dict[str, list[str]] = {s.step_id: [] for s in shape}
|
||||||
|
for s in shape:
|
||||||
|
for child_id in s.children_step_ids or []:
|
||||||
|
if child_id in parents_by_step:
|
||||||
|
parents_by_step[child_id].append(step_to_name[s.step_id])
|
||||||
|
|
||||||
|
# Join tasks by step_id
|
||||||
|
from hatchet_sdk.clients.rest.models import V1TaskSummary # noqa: PLC0415
|
||||||
|
|
||||||
|
task_by_step: dict[str, V1TaskSummary] = {}
|
||||||
|
for t in tasks:
|
||||||
|
if t.step_id and t.step_id in step_to_name:
|
||||||
|
task_by_step[t.step_id] = t
|
||||||
|
|
||||||
|
ordered = _topo_sort(shape)
|
||||||
|
|
||||||
|
result: list[DagTask] = []
|
||||||
|
for step_id in ordered:
|
||||||
|
name = step_to_name[step_id]
|
||||||
|
t = task_by_step.get(step_id)
|
||||||
|
|
||||||
|
if not t:
|
||||||
|
result.append(
|
||||||
|
DagTask(
|
||||||
|
name=name,
|
||||||
|
status=DagTaskStatus.QUEUED,
|
||||||
|
started_at=None,
|
||||||
|
finished_at=None,
|
||||||
|
duration_seconds=None,
|
||||||
|
parents=parents_by_step.get(step_id, []),
|
||||||
|
error=None,
|
||||||
|
children_total=None,
|
||||||
|
children_completed=None,
|
||||||
|
progress_pct=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
status = _HATCHET_TO_DAG_STATUS.get(t.status, DagTaskStatus.QUEUED)
|
||||||
|
|
||||||
|
duration_seconds: float | None = None
|
||||||
|
if t.duration is not None:
|
||||||
|
duration_seconds = t.duration / 1000.0
|
||||||
|
|
||||||
|
# Fan-out children
|
||||||
|
children_total: int | None = None
|
||||||
|
children_completed: int | None = None
|
||||||
|
if t.num_spawned_children and t.num_spawned_children > 0:
|
||||||
|
children_total = t.num_spawned_children
|
||||||
|
children_completed = sum(
|
||||||
|
1 for c in (t.children or []) if c.status == V1TaskStatus.COMPLETED
|
||||||
|
)
|
||||||
|
|
||||||
|
result.append(
|
||||||
|
DagTask(
|
||||||
|
name=name,
|
||||||
|
status=status,
|
||||||
|
started_at=t.started_at,
|
||||||
|
finished_at=t.finished_at,
|
||||||
|
duration_seconds=duration_seconds,
|
||||||
|
parents=parents_by_step.get(step_id, []),
|
||||||
|
error=_extract_error_summary(t.error_message),
|
||||||
|
children_total=children_total,
|
||||||
|
children_completed=children_completed,
|
||||||
|
progress_pct=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def broadcast_dag_status(transcript_id: str, workflow_run_id: str) -> None:
|
||||||
|
"""Fetch current DAG state from Hatchet and broadcast via WebSocket.
|
||||||
|
|
||||||
|
Fire-and-forget: exceptions are logged but never raised.
|
||||||
|
All imports are deferred for fork-safety (Hatchet workers fork processes).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from reflector.db.transcripts import transcripts_controller # noqa: I001, PLC0415
|
||||||
|
from reflector.hatchet.broadcast import append_event_and_broadcast # noqa: PLC0415
|
||||||
|
from reflector.hatchet.client import HatchetClientManager # noqa: PLC0415
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import ( # noqa: PLC0415
|
||||||
|
fresh_db_connection,
|
||||||
|
)
|
||||||
|
from reflector.logger import logger # noqa: PLC0415
|
||||||
|
|
||||||
|
async with fresh_db_connection():
|
||||||
|
client = HatchetClientManager.get_client()
|
||||||
|
details = await client.runs.aio_get(workflow_run_id)
|
||||||
|
dag_tasks = extract_dag_tasks(details)
|
||||||
|
dag_status = DagStatusData(workflow_run_id=workflow_run_id, tasks=dag_tasks)
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||||
|
if transcript:
|
||||||
|
await append_event_and_broadcast(
|
||||||
|
transcript_id,
|
||||||
|
transcript,
|
||||||
|
"DAG_STATUS",
|
||||||
|
dag_status,
|
||||||
|
logger,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
from reflector.logger import logger # noqa: PLC0415
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"[DAG Progress] Failed to broadcast DAG status",
|
||||||
|
transcript_id=transcript_id,
|
||||||
|
workflow_run_id=workflow_run_id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
@@ -184,7 +184,10 @@ class Loggable(Protocol):
|
|||||||
|
|
||||||
|
|
||||||
def make_audio_progress_logger(
|
def make_audio_progress_logger(
|
||||||
ctx: Loggable, task_name: TaskName, interval: float = 5.0
|
ctx: Loggable,
|
||||||
|
task_name: TaskName,
|
||||||
|
interval: float = 5.0,
|
||||||
|
transcript_id: str | None = None,
|
||||||
) -> Callable[[float | None, float], None]:
|
) -> Callable[[float | None, float], None]:
|
||||||
"""Create a throttled progress logger callback for audio processing.
|
"""Create a throttled progress logger callback for audio processing.
|
||||||
|
|
||||||
@@ -192,6 +195,7 @@ def make_audio_progress_logger(
|
|||||||
ctx: Object with .log() method (e.g., Hatchet Context).
|
ctx: Object with .log() method (e.g., Hatchet Context).
|
||||||
task_name: Name to prefix in log messages.
|
task_name: Name to prefix in log messages.
|
||||||
interval: Minimum seconds between log messages.
|
interval: Minimum seconds between log messages.
|
||||||
|
transcript_id: If provided, broadcasts transient DAG_TASK_PROGRESS events.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Callback(progress_pct, audio_position) that logs at most every `interval` seconds.
|
Callback(progress_pct, audio_position) that logs at most every `interval` seconds.
|
||||||
@@ -213,6 +217,27 @@ def make_audio_progress_logger(
|
|||||||
)
|
)
|
||||||
last_log_time[0] = now
|
last_log_time[0] = now
|
||||||
|
|
||||||
|
if transcript_id and progress_pct is not None:
|
||||||
|
try:
|
||||||
|
import asyncio # noqa: PLC0415
|
||||||
|
|
||||||
|
from reflector.db.transcripts import TranscriptEvent # noqa: PLC0415
|
||||||
|
from reflector.hatchet.broadcast import broadcast_event # noqa: PLC0415
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.create_task(
|
||||||
|
broadcast_event(
|
||||||
|
transcript_id,
|
||||||
|
TranscriptEvent(
|
||||||
|
event="DAG_TASK_PROGRESS",
|
||||||
|
data={"task_name": task_name, "progress_pct": progress_pct},
|
||||||
|
),
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass # transient, never fail the callback
|
||||||
|
|
||||||
return callback
|
return callback
|
||||||
|
|
||||||
|
|
||||||
@@ -237,8 +262,15 @@ def with_error_handling(
|
|||||||
) -> Callable[[PipelineInput, Context], Coroutine[Any, Any, R]]:
|
) -> Callable[[PipelineInput, Context], Coroutine[Any, Any, R]]:
|
||||||
@functools.wraps(func)
|
@functools.wraps(func)
|
||||||
async def wrapper(input: PipelineInput, ctx: Context) -> R:
|
async def wrapper(input: PipelineInput, ctx: Context) -> R:
|
||||||
|
from reflector.hatchet.dag_progress import broadcast_dag_status # noqa: I001, PLC0415
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return await func(input, ctx)
|
result = await func(input, ctx)
|
||||||
|
try:
|
||||||
|
await broadcast_dag_status(input.transcript_id, ctx.workflow_run_id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"[Hatchet] {step_name} failed",
|
f"[Hatchet] {step_name} failed",
|
||||||
@@ -246,6 +278,10 @@ def with_error_handling(
|
|||||||
error=str(e),
|
error=str(e),
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
await broadcast_dag_status(input.transcript_id, ctx.workflow_run_id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
if set_error_status:
|
if set_error_status:
|
||||||
await set_workflow_error_status(input.transcript_id)
|
await set_workflow_error_status(input.transcript_id)
|
||||||
raise
|
raise
|
||||||
@@ -322,6 +358,7 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
|||||||
mtg_session_id = recording.mtg_session_id
|
mtg_session_id = recording.mtg_session_id
|
||||||
async with fresh_db_connection():
|
async with fresh_db_connection():
|
||||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||||
|
TranscriptDuration,
|
||||||
TranscriptParticipant,
|
TranscriptParticipant,
|
||||||
transcripts_controller,
|
transcripts_controller,
|
||||||
)
|
)
|
||||||
@@ -330,15 +367,26 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
|||||||
if not transcript:
|
if not transcript:
|
||||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||||
# Note: title NOT cleared - preserves existing titles
|
# Note: title NOT cleared - preserves existing titles
|
||||||
|
# Duration from Daily API (seconds -> milliseconds) - master source
|
||||||
|
duration_ms = recording.duration * 1000 if recording.duration else 0
|
||||||
await transcripts_controller.update(
|
await transcripts_controller.update(
|
||||||
transcript,
|
transcript,
|
||||||
{
|
{
|
||||||
"events": [],
|
"events": [],
|
||||||
"topics": [],
|
"topics": [],
|
||||||
"participants": [],
|
"participants": [],
|
||||||
|
"duration": duration_ms,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
await append_event_and_broadcast(
|
||||||
|
input.transcript_id,
|
||||||
|
transcript,
|
||||||
|
"DURATION",
|
||||||
|
TranscriptDuration(duration=duration_ms),
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
|
||||||
mtg_session_id = assert_non_none_and_non_empty(
|
mtg_session_id = assert_non_none_and_non_empty(
|
||||||
mtg_session_id, "mtg_session_id is required"
|
mtg_session_id, "mtg_session_id is required"
|
||||||
)
|
)
|
||||||
@@ -548,7 +596,9 @@ async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
|
|||||||
target_sample_rate,
|
target_sample_rate,
|
||||||
offsets_seconds=None,
|
offsets_seconds=None,
|
||||||
logger=logger,
|
logger=logger,
|
||||||
progress_callback=make_audio_progress_logger(ctx, TaskName.MIXDOWN_TRACKS),
|
progress_callback=make_audio_progress_logger(
|
||||||
|
ctx, TaskName.MIXDOWN_TRACKS, transcript_id=input.transcript_id
|
||||||
|
),
|
||||||
expected_duration_sec=recording_duration if recording_duration > 0 else None,
|
expected_duration_sec=recording_duration if recording_duration > 0 else None,
|
||||||
)
|
)
|
||||||
await writer.flush()
|
await writer.flush()
|
||||||
@@ -1095,7 +1145,7 @@ async def identify_action_items(
|
|||||||
|
|
||||||
|
|
||||||
@daily_multitrack_pipeline.task(
|
@daily_multitrack_pipeline.task(
|
||||||
parents=[generate_waveform, generate_title, generate_recap, identify_action_items],
|
parents=[process_tracks, generate_title, generate_recap, identify_action_items],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||||
retries=3,
|
retries=3,
|
||||||
)
|
)
|
||||||
@@ -1108,12 +1158,8 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
|||||||
"""
|
"""
|
||||||
ctx.log("finalize: saving transcript and setting status to 'ended'")
|
ctx.log("finalize: saving transcript and setting status to 'ended'")
|
||||||
|
|
||||||
mixdown_result = ctx.task_output(mixdown_tracks)
|
|
||||||
track_result = ctx.task_output(process_tracks)
|
track_result = ctx.task_output(process_tracks)
|
||||||
|
|
||||||
duration = mixdown_result.duration
|
|
||||||
all_words = track_result.all_words
|
|
||||||
|
|
||||||
# Cleanup temporary padded S3 files (deferred until finalize for semantic parity with Celery)
|
# Cleanup temporary padded S3 files (deferred until finalize for semantic parity with Celery)
|
||||||
created_padded_files = track_result.created_padded_files
|
created_padded_files = track_result.created_padded_files
|
||||||
if created_padded_files:
|
if created_padded_files:
|
||||||
@@ -1133,7 +1179,6 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
|||||||
|
|
||||||
async with fresh_db_connection():
|
async with fresh_db_connection():
|
||||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||||
TranscriptDuration,
|
|
||||||
TranscriptText,
|
TranscriptText,
|
||||||
transcripts_controller,
|
transcripts_controller,
|
||||||
)
|
)
|
||||||
@@ -1142,34 +1187,26 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
|||||||
if transcript is None:
|
if transcript is None:
|
||||||
raise ValueError(f"Transcript {input.transcript_id} not found in database")
|
raise ValueError(f"Transcript {input.transcript_id} not found in database")
|
||||||
|
|
||||||
merged_transcript = TranscriptType(words=all_words, translation=None)
|
|
||||||
|
|
||||||
await append_event_and_broadcast(
|
await append_event_and_broadcast(
|
||||||
input.transcript_id,
|
input.transcript_id,
|
||||||
transcript,
|
transcript,
|
||||||
"TRANSCRIPT",
|
"TRANSCRIPT",
|
||||||
TranscriptText(
|
TranscriptText(
|
||||||
text=merged_transcript.text,
|
text="",
|
||||||
translation=merged_transcript.translation,
|
translation=None,
|
||||||
),
|
),
|
||||||
logger=logger,
|
logger=logger,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save duration and clear workflow_run_id (workflow completed successfully)
|
# Clear workflow_run_id (workflow completed successfully)
|
||||||
# Note: title/long_summary/short_summary already saved by their callbacks
|
# Note: title/long_summary/short_summary/duration already saved by their callbacks
|
||||||
await transcripts_controller.update(
|
await transcripts_controller.update(
|
||||||
transcript,
|
transcript,
|
||||||
{
|
{
|
||||||
"duration": duration,
|
|
||||||
"workflow_run_id": None, # Clear on success - no need to resume
|
"workflow_run_id": None, # Clear on success - no need to resume
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
duration_data = TranscriptDuration(duration=duration)
|
|
||||||
await append_event_and_broadcast(
|
|
||||||
input.transcript_id, transcript, "DURATION", duration_data, logger=logger
|
|
||||||
)
|
|
||||||
|
|
||||||
await set_status_and_broadcast(input.transcript_id, "ended", logger=logger)
|
await set_status_and_broadcast(input.transcript_id, "ended", logger=logger)
|
||||||
|
|
||||||
ctx.log(
|
ctx.log(
|
||||||
@@ -1347,14 +1384,34 @@ async def send_webhook(input: PipelineInput, ctx: Context) -> WebhookResult:
|
|||||||
f"participants={len(payload.transcript.participants)})"
|
f"participants={len(payload.transcript.participants)})"
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await send_webhook_request(
|
try:
|
||||||
url=room.webhook_url,
|
response = await send_webhook_request(
|
||||||
payload=payload,
|
url=room.webhook_url,
|
||||||
event_type="transcript.completed",
|
payload=payload,
|
||||||
webhook_secret=room.webhook_secret,
|
event_type="transcript.completed",
|
||||||
timeout=30.0,
|
webhook_secret=room.webhook_secret,
|
||||||
)
|
timeout=30.0,
|
||||||
|
)
|
||||||
|
|
||||||
ctx.log(f"send_webhook complete: status_code={response.status_code}")
|
ctx.log(f"send_webhook complete: status_code={response.status_code}")
|
||||||
|
return WebhookResult(webhook_sent=True, response_code=response.status_code)
|
||||||
|
|
||||||
return WebhookResult(webhook_sent=True, response_code=response.status_code)
|
except httpx.HTTPStatusError as e:
|
||||||
|
ctx.log(
|
||||||
|
f"send_webhook failed (HTTP {e.response.status_code}), continuing anyway"
|
||||||
|
)
|
||||||
|
return WebhookResult(
|
||||||
|
webhook_sent=False, response_code=e.response.status_code
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.ConnectError as e:
|
||||||
|
ctx.log(f"send_webhook failed (connection error), continuing anyway: {e}")
|
||||||
|
return WebhookResult(webhook_sent=False)
|
||||||
|
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
ctx.log(f"send_webhook failed (timeout), continuing anyway: {e}")
|
||||||
|
return WebhookResult(webhook_sent=False)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ctx.log(f"send_webhook unexpected error, continuing anyway: {e}")
|
||||||
|
return WebhookResult(webhook_sent=False)
|
||||||
|
|||||||
165
server/reflector/hatchet/workflows/padding_workflow.py
Normal file
165
server/reflector/hatchet/workflows/padding_workflow.py
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
"""
|
||||||
|
Hatchet child workflow: PaddingWorkflow
|
||||||
|
Handles individual audio track padding via Modal.com backend.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
import av
|
||||||
|
from hatchet_sdk import Context
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
from reflector.hatchet.constants import TIMEOUT_AUDIO
|
||||||
|
from reflector.hatchet.workflows.models import PadTrackResult
|
||||||
|
from reflector.logger import logger
|
||||||
|
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
|
||||||
|
from reflector.utils.audio_padding import extract_stream_start_time_from_container
|
||||||
|
|
||||||
|
|
||||||
|
class PaddingInput(BaseModel):
|
||||||
|
"""Input for individual track padding."""
|
||||||
|
|
||||||
|
track_index: int
|
||||||
|
s3_key: str
|
||||||
|
bucket_name: str
|
||||||
|
transcript_id: str
|
||||||
|
|
||||||
|
|
||||||
|
hatchet = HatchetClientManager.get_client()
|
||||||
|
|
||||||
|
padding_workflow = hatchet.workflow(
|
||||||
|
name="PaddingWorkflow", input_validator=PaddingInput
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@padding_workflow.task(execution_timeout=timedelta(seconds=TIMEOUT_AUDIO), retries=3)
|
||||||
|
async def pad_track(input: PaddingInput, ctx: Context) -> PadTrackResult:
|
||||||
|
"""Pad audio track with silence based on WebM container start_time."""
|
||||||
|
ctx.log(f"pad_track: track {input.track_index}, s3_key={input.s3_key}")
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] pad_track",
|
||||||
|
track_index=input.track_index,
|
||||||
|
s3_key=input.s3_key,
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create fresh storage instance to avoid aioboto3 fork issues
|
||||||
|
from reflector.settings import settings # noqa: PLC0415
|
||||||
|
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
|
||||||
|
|
||||||
|
storage = AwsStorage(
|
||||||
|
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||||
|
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||||
|
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||||
|
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||||
|
)
|
||||||
|
|
||||||
|
source_url = await storage.get_file_url(
|
||||||
|
input.s3_key,
|
||||||
|
operation="get_object",
|
||||||
|
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||||
|
bucket=input.bucket_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract start_time to determine if padding needed
|
||||||
|
with av.open(source_url) as in_container:
|
||||||
|
if in_container.duration:
|
||||||
|
try:
|
||||||
|
duration = timedelta(seconds=in_container.duration // 1_000_000)
|
||||||
|
ctx.log(
|
||||||
|
f"pad_track: track {input.track_index}, duration={duration}"
|
||||||
|
)
|
||||||
|
except (ValueError, TypeError, OverflowError) as e:
|
||||||
|
ctx.log(
|
||||||
|
f"pad_track: track {input.track_index}, duration error: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
start_time_seconds = extract_stream_start_time_from_container(
|
||||||
|
in_container, input.track_index, logger=logger
|
||||||
|
)
|
||||||
|
|
||||||
|
if start_time_seconds <= 0:
|
||||||
|
logger.info(
|
||||||
|
f"Track {input.track_index} requires no padding",
|
||||||
|
track_index=input.track_index,
|
||||||
|
)
|
||||||
|
return PadTrackResult(
|
||||||
|
padded_key=input.s3_key,
|
||||||
|
bucket_name=input.bucket_name,
|
||||||
|
size=0,
|
||||||
|
track_index=input.track_index,
|
||||||
|
)
|
||||||
|
|
||||||
|
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||||
|
|
||||||
|
# Presign PUT URL for output (Modal will upload directly)
|
||||||
|
output_url = await storage.get_file_url(
|
||||||
|
storage_path,
|
||||||
|
operation="put_object",
|
||||||
|
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
import httpx # noqa: PLC0415
|
||||||
|
|
||||||
|
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
|
||||||
|
AudioPaddingModalProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
processor = AudioPaddingModalProcessor()
|
||||||
|
result = await processor.pad_track(
|
||||||
|
track_url=source_url,
|
||||||
|
output_url=output_url,
|
||||||
|
start_time_seconds=start_time_seconds,
|
||||||
|
track_index=input.track_index,
|
||||||
|
)
|
||||||
|
file_size = result.size
|
||||||
|
|
||||||
|
ctx.log(f"pad_track: Modal returned size={file_size}")
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
error_detail = e.response.text if hasattr(e.response, "text") else str(e)
|
||||||
|
logger.error(
|
||||||
|
"[Hatchet] Modal padding HTTP error",
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
track_index=input.track_index,
|
||||||
|
status_code=e.response.status_code if hasattr(e, "response") else None,
|
||||||
|
error=error_detail,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
raise Exception(
|
||||||
|
f"Modal padding failed: HTTP {e.response.status_code}"
|
||||||
|
) from e
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
logger.error(
|
||||||
|
"[Hatchet] Modal padding timeout",
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
track_index=input.track_index,
|
||||||
|
error=str(e),
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
raise Exception("Modal padding timeout") from e
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] pad_track complete",
|
||||||
|
track_index=input.track_index,
|
||||||
|
padded_key=storage_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
return PadTrackResult(
|
||||||
|
padded_key=storage_path,
|
||||||
|
bucket_name=None, # None = use default transcript storage bucket
|
||||||
|
size=file_size,
|
||||||
|
track_index=input.track_index,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"[Hatchet] pad_track failed",
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
track_index=input.track_index,
|
||||||
|
error=str(e),
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
raise
|
||||||
@@ -14,9 +14,7 @@ Hatchet workers run in forked processes; fresh imports per task ensure
|
|||||||
storage/DB connections are not shared across forks.
|
storage/DB connections are not shared across forks.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import tempfile
|
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import av
|
import av
|
||||||
from hatchet_sdk import Context
|
from hatchet_sdk import Context
|
||||||
@@ -27,10 +25,7 @@ from reflector.hatchet.constants import TIMEOUT_AUDIO, TIMEOUT_HEAVY
|
|||||||
from reflector.hatchet.workflows.models import PadTrackResult, TranscribeTrackResult
|
from reflector.hatchet.workflows.models import PadTrackResult, TranscribeTrackResult
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
|
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
|
||||||
from reflector.utils.audio_padding import (
|
from reflector.utils.audio_padding import extract_stream_start_time_from_container
|
||||||
apply_audio_padding_to_file,
|
|
||||||
extract_stream_start_time_from_container,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TrackInput(BaseModel):
|
class TrackInput(BaseModel):
|
||||||
@@ -83,63 +78,44 @@ async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
|
|||||||
)
|
)
|
||||||
|
|
||||||
with av.open(source_url) as in_container:
|
with av.open(source_url) as in_container:
|
||||||
if in_container.duration:
|
|
||||||
try:
|
|
||||||
duration = timedelta(seconds=in_container.duration // 1_000_000)
|
|
||||||
ctx.log(
|
|
||||||
f"pad_track: track {input.track_index}, duration={duration}"
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
ctx.log(f"pad_track: track {input.track_index}, duration=ERROR")
|
|
||||||
|
|
||||||
start_time_seconds = extract_stream_start_time_from_container(
|
start_time_seconds = extract_stream_start_time_from_container(
|
||||||
in_container, input.track_index, logger=logger
|
in_container, input.track_index, logger=logger
|
||||||
)
|
)
|
||||||
|
|
||||||
# If no padding needed, return original S3 key
|
# If no padding needed, return original S3 key
|
||||||
if start_time_seconds <= 0:
|
if start_time_seconds <= 0:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Track {input.track_index} requires no padding",
|
f"Track {input.track_index} requires no padding",
|
||||||
track_index=input.track_index,
|
track_index=input.track_index,
|
||||||
)
|
)
|
||||||
return PadTrackResult(
|
return PadTrackResult(
|
||||||
padded_key=input.s3_key,
|
padded_key=input.s3_key,
|
||||||
bucket_name=input.bucket_name,
|
bucket_name=input.bucket_name,
|
||||||
size=0,
|
size=0,
|
||||||
track_index=input.track_index,
|
track_index=input.track_index,
|
||||||
)
|
)
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
|
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||||
temp_path = temp_file.name
|
|
||||||
|
|
||||||
try:
|
# Presign PUT URL for output (Modal uploads directly)
|
||||||
apply_audio_padding_to_file(
|
output_url = await storage.get_file_url(
|
||||||
in_container,
|
storage_path,
|
||||||
temp_path,
|
operation="put_object",
|
||||||
start_time_seconds,
|
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||||
input.track_index,
|
)
|
||||||
logger=logger,
|
|
||||||
)
|
|
||||||
|
|
||||||
file_size = Path(temp_path).stat().st_size
|
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
|
||||||
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
AudioPaddingModalProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
processor = AudioPaddingModalProcessor()
|
||||||
f"About to upload padded track",
|
result = await processor.pad_track(
|
||||||
key=storage_path,
|
track_url=source_url,
|
||||||
size=file_size,
|
output_url=output_url,
|
||||||
)
|
start_time_seconds=start_time_seconds,
|
||||||
|
track_index=input.track_index,
|
||||||
with open(temp_path, "rb") as padded_file:
|
)
|
||||||
await storage.put_file(storage_path, padded_file)
|
file_size = result.size
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Uploaded padded track to S3",
|
|
||||||
key=storage_path,
|
|
||||||
size=file_size,
|
|
||||||
)
|
|
||||||
finally:
|
|
||||||
Path(temp_path).unlink(missing_ok=True)
|
|
||||||
|
|
||||||
ctx.log(f"pad_track complete: track {input.track_index} -> {storage_path}")
|
ctx.log(f"pad_track complete: track {input.track_index} -> {storage_path}")
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
113
server/reflector/processors/audio_padding_modal.py
Normal file
113
server/reflector/processors/audio_padding_modal.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
"""
|
||||||
|
Modal.com backend for audio padding.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from reflector.hatchet.constants import TIMEOUT_AUDIO
|
||||||
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
|
||||||
|
class PaddingResponse(BaseModel):
|
||||||
|
size: int
|
||||||
|
cancelled: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class AudioPaddingModalProcessor:
|
||||||
|
"""Audio padding processor using Modal.com CPU backend via HTTP."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, padding_url: str | None = None, modal_api_key: str | None = None
|
||||||
|
):
|
||||||
|
self.padding_url = padding_url or os.getenv("PADDING_URL")
|
||||||
|
if not self.padding_url:
|
||||||
|
raise ValueError(
|
||||||
|
"PADDING_URL required to use AudioPaddingModalProcessor. "
|
||||||
|
"Set PADDING_URL environment variable or pass padding_url parameter."
|
||||||
|
)
|
||||||
|
|
||||||
|
self.modal_api_key = modal_api_key or os.getenv("MODAL_API_KEY")
|
||||||
|
|
||||||
|
async def pad_track(
|
||||||
|
self,
|
||||||
|
track_url: str,
|
||||||
|
output_url: str,
|
||||||
|
start_time_seconds: float,
|
||||||
|
track_index: int,
|
||||||
|
) -> PaddingResponse:
|
||||||
|
"""Pad audio track with silence via Modal backend.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
track_url: Presigned GET URL for source audio track
|
||||||
|
output_url: Presigned PUT URL for output WebM
|
||||||
|
start_time_seconds: Amount of silence to prepend
|
||||||
|
track_index: Track index for logging
|
||||||
|
"""
|
||||||
|
if not track_url:
|
||||||
|
raise ValueError("track_url cannot be empty")
|
||||||
|
if start_time_seconds <= 0:
|
||||||
|
raise ValueError(
|
||||||
|
f"start_time_seconds must be positive, got {start_time_seconds}"
|
||||||
|
)
|
||||||
|
|
||||||
|
log = logger.bind(track_index=track_index, padding_seconds=start_time_seconds)
|
||||||
|
log.info("Sending Modal padding HTTP request")
|
||||||
|
|
||||||
|
url = f"{self.padding_url}/pad"
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
if self.modal_api_key:
|
||||||
|
headers["Authorization"] = f"Bearer {self.modal_api_key}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=TIMEOUT_AUDIO) as client:
|
||||||
|
response = await client.post(
|
||||||
|
url,
|
||||||
|
headers=headers,
|
||||||
|
json={
|
||||||
|
"track_url": track_url,
|
||||||
|
"output_url": output_url,
|
||||||
|
"start_time_seconds": start_time_seconds,
|
||||||
|
"track_index": track_index,
|
||||||
|
},
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
error_body = response.text
|
||||||
|
log.error(
|
||||||
|
"Modal padding API error",
|
||||||
|
status_code=response.status_code,
|
||||||
|
error_body=error_body,
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
# Check if work was cancelled
|
||||||
|
if result.get("cancelled"):
|
||||||
|
log.warning("Modal padding was cancelled by disconnect detection")
|
||||||
|
raise asyncio.CancelledError(
|
||||||
|
"Padding cancelled due to client disconnect"
|
||||||
|
)
|
||||||
|
|
||||||
|
log.info("Modal padding complete", size=result["size"])
|
||||||
|
return PaddingResponse(**result)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
log.warning(
|
||||||
|
"Modal padding cancelled (Hatchet timeout, disconnect detected on Modal side)"
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
log.error("Modal padding timeout", error=str(e), exc_info=True)
|
||||||
|
raise Exception(f"Modal padding timeout: {e}") from e
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
log.error("Modal padding HTTP error", error=str(e), exc_info=True)
|
||||||
|
raise Exception(f"Modal padding HTTP error: {e}") from e
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Modal padding unexpected error", error=str(e), exc_info=True)
|
||||||
|
raise
|
||||||
@@ -11,18 +11,14 @@ from typing import Literal, Union, assert_never
|
|||||||
|
|
||||||
import celery
|
import celery
|
||||||
from celery.result import AsyncResult
|
from celery.result import AsyncResult
|
||||||
from hatchet_sdk.clients.rest.exceptions import ApiException
|
from hatchet_sdk.clients.rest.exceptions import ApiException, NotFoundException
|
||||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||||
|
|
||||||
from reflector.db.recordings import recordings_controller
|
from reflector.db.recordings import recordings_controller
|
||||||
from reflector.db.rooms import rooms_controller
|
|
||||||
from reflector.db.transcripts import Transcript, transcripts_controller
|
from reflector.db.transcripts import Transcript, transcripts_controller
|
||||||
from reflector.hatchet.client import HatchetClientManager
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||||
from reflector.pipelines.main_multitrack_pipeline import (
|
|
||||||
task_pipeline_multitrack_process,
|
|
||||||
)
|
|
||||||
from reflector.utils.string import NonEmptyString
|
from reflector.utils.string import NonEmptyString
|
||||||
|
|
||||||
|
|
||||||
@@ -181,39 +177,24 @@ async def dispatch_transcript_processing(
|
|||||||
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
|
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
|
||||||
"""
|
"""
|
||||||
if isinstance(config, MultitrackProcessingConfig):
|
if isinstance(config, MultitrackProcessingConfig):
|
||||||
use_celery = False
|
# Multitrack processing always uses Hatchet (no Celery fallback)
|
||||||
if config.room_id:
|
# First check if we can replay (outside transaction since it's read-only)
|
||||||
room = await rooms_controller.get_by_id(config.room_id)
|
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
||||||
use_celery = room.use_celery if room else False
|
if transcript and transcript.workflow_run_id and not force:
|
||||||
|
can_replay = await HatchetClientManager.can_replay(
|
||||||
use_hatchet = not use_celery
|
transcript.workflow_run_id
|
||||||
|
|
||||||
if use_celery:
|
|
||||||
logger.info(
|
|
||||||
"Room uses legacy Celery processing",
|
|
||||||
room_id=config.room_id,
|
|
||||||
transcript_id=config.transcript_id,
|
|
||||||
)
|
)
|
||||||
|
if can_replay:
|
||||||
if use_hatchet:
|
await HatchetClientManager.replay_workflow(transcript.workflow_run_id)
|
||||||
# First check if we can replay (outside transaction since it's read-only)
|
logger.info(
|
||||||
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
"Replaying Hatchet workflow",
|
||||||
if transcript and transcript.workflow_run_id and not force:
|
workflow_id=transcript.workflow_run_id,
|
||||||
can_replay = await HatchetClientManager.can_replay(
|
|
||||||
transcript.workflow_run_id
|
|
||||||
)
|
)
|
||||||
if can_replay:
|
return None
|
||||||
await HatchetClientManager.replay_workflow(
|
else:
|
||||||
transcript.workflow_run_id
|
# Workflow can't replay (CANCELLED, COMPLETED, or 404 deleted)
|
||||||
)
|
# Log and proceed to start new workflow
|
||||||
logger.info(
|
try:
|
||||||
"Replaying Hatchet workflow",
|
|
||||||
workflow_id=transcript.workflow_run_id,
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
# Workflow exists but can't replay (CANCELLED, COMPLETED, etc.)
|
|
||||||
# Log and proceed to start new workflow
|
|
||||||
status = await HatchetClientManager.get_workflow_run_status(
|
status = await HatchetClientManager.get_workflow_run_status(
|
||||||
transcript.workflow_run_id
|
transcript.workflow_run_id
|
||||||
)
|
)
|
||||||
@@ -222,68 +203,85 @@ async def dispatch_transcript_processing(
|
|||||||
old_workflow_id=transcript.workflow_run_id,
|
old_workflow_id=transcript.workflow_run_id,
|
||||||
old_status=status.value,
|
old_status=status.value,
|
||||||
)
|
)
|
||||||
|
except NotFoundException:
|
||||||
|
# Workflow deleted from Hatchet but ID still in DB
|
||||||
|
logger.info(
|
||||||
|
"Old workflow not found in Hatchet, starting new",
|
||||||
|
old_workflow_id=transcript.workflow_run_id,
|
||||||
|
)
|
||||||
|
|
||||||
# Force: cancel old workflow if exists
|
# Force: cancel old workflow if exists
|
||||||
if force and transcript and transcript.workflow_run_id:
|
if force and transcript and transcript.workflow_run_id:
|
||||||
|
try:
|
||||||
await HatchetClientManager.cancel_workflow(transcript.workflow_run_id)
|
await HatchetClientManager.cancel_workflow(transcript.workflow_run_id)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Cancelled old workflow (--force)",
|
"Cancelled old workflow (--force)",
|
||||||
workflow_id=transcript.workflow_run_id,
|
workflow_id=transcript.workflow_run_id,
|
||||||
)
|
)
|
||||||
await transcripts_controller.update(
|
except NotFoundException:
|
||||||
transcript, {"workflow_run_id": None}
|
logger.info(
|
||||||
|
"Old workflow already deleted (--force)",
|
||||||
|
workflow_id=transcript.workflow_run_id,
|
||||||
)
|
)
|
||||||
|
await transcripts_controller.update(transcript, {"workflow_run_id": None})
|
||||||
|
|
||||||
# Re-fetch and check for concurrent dispatch (optimistic approach).
|
# Re-fetch and check for concurrent dispatch (optimistic approach).
|
||||||
# No database lock - worst case is duplicate dispatch, but Hatchet
|
# No database lock - worst case is duplicate dispatch, but Hatchet
|
||||||
# workflows are idempotent so this is acceptable.
|
# workflows are idempotent so this is acceptable.
|
||||||
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
||||||
if transcript and transcript.workflow_run_id:
|
if transcript and transcript.workflow_run_id:
|
||||||
# Another process started a workflow between validation and now
|
# Another process started a workflow between validation and now
|
||||||
try:
|
try:
|
||||||
status = await HatchetClientManager.get_workflow_run_status(
|
status = await HatchetClientManager.get_workflow_run_status(
|
||||||
transcript.workflow_run_id
|
transcript.workflow_run_id
|
||||||
|
)
|
||||||
|
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
|
||||||
|
logger.info(
|
||||||
|
"Concurrent workflow detected, skipping dispatch",
|
||||||
|
workflow_id=transcript.workflow_run_id,
|
||||||
)
|
)
|
||||||
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
|
return None
|
||||||
logger.info(
|
except ApiException:
|
||||||
"Concurrent workflow detected, skipping dispatch",
|
# Workflow might be gone (404) or API issue - proceed with new workflow
|
||||||
workflow_id=transcript.workflow_run_id,
|
pass
|
||||||
)
|
|
||||||
return None
|
|
||||||
except ApiException:
|
|
||||||
# Workflow might be gone (404) or API issue - proceed with new workflow
|
|
||||||
pass
|
|
||||||
|
|
||||||
workflow_id = await HatchetClientManager.start_workflow(
|
workflow_id = await HatchetClientManager.start_workflow(
|
||||||
workflow_name="DiarizationPipeline",
|
workflow_name="DiarizationPipeline",
|
||||||
input_data={
|
input_data={
|
||||||
"recording_id": config.recording_id,
|
"recording_id": config.recording_id,
|
||||||
"tracks": [{"s3_key": k} for k in config.track_keys],
|
"tracks": [{"s3_key": k} for k in config.track_keys],
|
||||||
"bucket_name": config.bucket_name,
|
"bucket_name": config.bucket_name,
|
||||||
"transcript_id": config.transcript_id,
|
"transcript_id": config.transcript_id,
|
||||||
"room_id": config.room_id,
|
"room_id": config.room_id,
|
||||||
},
|
},
|
||||||
additional_metadata={
|
additional_metadata={
|
||||||
"transcript_id": config.transcript_id,
|
"transcript_id": config.transcript_id,
|
||||||
"recording_id": config.recording_id,
|
"recording_id": config.recording_id,
|
||||||
"daily_recording_id": config.recording_id,
|
"daily_recording_id": config.recording_id,
|
||||||
},
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if transcript:
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript, {"workflow_run_id": workflow_id}
|
||||||
)
|
)
|
||||||
|
|
||||||
if transcript:
|
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
|
||||||
await transcripts_controller.update(
|
|
||||||
transcript, {"workflow_run_id": workflow_id}
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
|
try:
|
||||||
return None
|
from reflector.hatchet.dag_progress import broadcast_dag_status # noqa: I001, PLC0415
|
||||||
|
|
||||||
|
await broadcast_dag_status(config.transcript_id, workflow_id)
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"[DAG Progress] Failed initial broadcast",
|
||||||
|
transcript_id=config.transcript_id,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
# Celery pipeline (durable workflows disabled)
|
|
||||||
return task_pipeline_multitrack_process.delay(
|
|
||||||
transcript_id=config.transcript_id,
|
|
||||||
bucket_name=config.bucket_name,
|
|
||||||
track_keys=config.track_keys,
|
|
||||||
)
|
|
||||||
elif isinstance(config, FileProcessingConfig):
|
elif isinstance(config, FileProcessingConfig):
|
||||||
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
|
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from pydantic.types import PositiveInt
|
from pydantic.types import PositiveInt
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
|
from reflector.schemas.platform import DAILY_PLATFORM, Platform
|
||||||
from reflector.utils.string import NonEmptyString
|
from reflector.utils.string import NonEmptyString
|
||||||
|
|
||||||
|
|
||||||
@@ -98,6 +98,10 @@ class Settings(BaseSettings):
|
|||||||
# Diarization: local pyannote.audio
|
# Diarization: local pyannote.audio
|
||||||
DIARIZATION_PYANNOTE_AUTH_TOKEN: str | None = None
|
DIARIZATION_PYANNOTE_AUTH_TOKEN: str | None = None
|
||||||
|
|
||||||
|
# Audio Padding (Modal.com backend)
|
||||||
|
PADDING_URL: str | None = None
|
||||||
|
PADDING_MODAL_API_KEY: str | None = None
|
||||||
|
|
||||||
# Sentry
|
# Sentry
|
||||||
SENTRY_DSN: str | None = None
|
SENTRY_DSN: str | None = None
|
||||||
|
|
||||||
@@ -151,7 +155,7 @@ class Settings(BaseSettings):
|
|||||||
None # Webhook UUID for this environment. Not used by production code
|
None # Webhook UUID for this environment. Not used by production code
|
||||||
)
|
)
|
||||||
# Platform Configuration
|
# Platform Configuration
|
||||||
DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM
|
DEFAULT_VIDEO_PLATFORM: Platform = DAILY_PLATFORM
|
||||||
|
|
||||||
# Zulip integration
|
# Zulip integration
|
||||||
ZULIP_REALM: str | None = None
|
ZULIP_REALM: str | None = None
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ Used by both Hatchet workflows and Celery pipelines for consistent audio encodin
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Opus codec settings
|
# Opus codec settings
|
||||||
|
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||||
OPUS_STANDARD_SAMPLE_RATE = 48000
|
OPUS_STANDARD_SAMPLE_RATE = 48000
|
||||||
|
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
|
||||||
OPUS_DEFAULT_BIT_RATE = 128000 # 128kbps for good speech quality
|
OPUS_DEFAULT_BIT_RATE = 128000 # 128kbps for good speech quality
|
||||||
|
|
||||||
# S3 presigned URL expiration
|
# S3 presigned URL expiration
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
from reflector.dailyco_api import (
|
from reflector.dailyco_api import (
|
||||||
CreateMeetingTokenRequest,
|
CreateMeetingTokenRequest,
|
||||||
@@ -12,9 +13,11 @@ from reflector.dailyco_api import (
|
|||||||
RoomProperties,
|
RoomProperties,
|
||||||
verify_webhook_signature,
|
verify_webhook_signature,
|
||||||
)
|
)
|
||||||
|
from reflector.dailyco_api import RecordingType as DailyRecordingType
|
||||||
from reflector.db.daily_participant_sessions import (
|
from reflector.db.daily_participant_sessions import (
|
||||||
daily_participant_sessions_controller,
|
daily_participant_sessions_controller,
|
||||||
)
|
)
|
||||||
|
from reflector.db.meetings import meetings_controller
|
||||||
from reflector.db.rooms import Room
|
from reflector.db.rooms import Room
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.storage import get_dailyco_storage
|
from reflector.storage import get_dailyco_storage
|
||||||
@@ -58,10 +61,9 @@ class DailyClient(VideoPlatformClient):
|
|||||||
enable_recording = None
|
enable_recording = None
|
||||||
if room.recording_type == self.RECORDING_LOCAL:
|
if room.recording_type == self.RECORDING_LOCAL:
|
||||||
enable_recording = "local"
|
enable_recording = "local"
|
||||||
elif (
|
elif room.recording_type == self.RECORDING_CLOUD:
|
||||||
room.recording_type == self.RECORDING_CLOUD
|
# Don't set enable_recording - recordings started via REST API (not auto-start)
|
||||||
): # daily "cloud" is not our "cloud"
|
enable_recording = None
|
||||||
enable_recording = "raw-tracks"
|
|
||||||
|
|
||||||
properties = RoomProperties(
|
properties = RoomProperties(
|
||||||
enable_recording=enable_recording,
|
enable_recording=enable_recording,
|
||||||
@@ -106,8 +108,6 @@ class DailyClient(VideoPlatformClient):
|
|||||||
Daily.co doesn't provide historical session API, so we query our database
|
Daily.co doesn't provide historical session API, so we query our database
|
||||||
where participant.joined/left webhooks are stored.
|
where participant.joined/left webhooks are stored.
|
||||||
"""
|
"""
|
||||||
from reflector.db.meetings import meetings_controller # noqa: PLC0415
|
|
||||||
|
|
||||||
meeting = await meetings_controller.get_by_room_name(room_name)
|
meeting = await meetings_controller.get_by_room_name(room_name)
|
||||||
if not meeting:
|
if not meeting:
|
||||||
return []
|
return []
|
||||||
@@ -179,21 +179,14 @@ class DailyClient(VideoPlatformClient):
|
|||||||
async def create_meeting_token(
|
async def create_meeting_token(
|
||||||
self,
|
self,
|
||||||
room_name: DailyRoomName,
|
room_name: DailyRoomName,
|
||||||
start_cloud_recording: bool,
|
|
||||||
enable_recording_ui: bool,
|
enable_recording_ui: bool,
|
||||||
user_id: NonEmptyString | None = None,
|
user_id: NonEmptyString | None = None,
|
||||||
is_owner: bool = False,
|
is_owner: bool = False,
|
||||||
max_recording_duration_seconds: int | None = None,
|
max_recording_duration_seconds: int | None = None,
|
||||||
) -> NonEmptyString:
|
) -> NonEmptyString:
|
||||||
start_cloud_recording_opts = None
|
|
||||||
if start_cloud_recording and max_recording_duration_seconds:
|
|
||||||
start_cloud_recording_opts = {"maxDuration": max_recording_duration_seconds}
|
|
||||||
|
|
||||||
properties = MeetingTokenProperties(
|
properties = MeetingTokenProperties(
|
||||||
room_name=room_name,
|
room_name=room_name,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
start_cloud_recording=start_cloud_recording,
|
|
||||||
start_cloud_recording_opts=start_cloud_recording_opts,
|
|
||||||
enable_recording_ui=enable_recording_ui,
|
enable_recording_ui=enable_recording_ui,
|
||||||
is_owner=is_owner,
|
is_owner=is_owner,
|
||||||
)
|
)
|
||||||
@@ -201,6 +194,23 @@ class DailyClient(VideoPlatformClient):
|
|||||||
result = await self._api_client.create_meeting_token(request)
|
result = await self._api_client.create_meeting_token(request)
|
||||||
return result.token
|
return result.token
|
||||||
|
|
||||||
|
async def start_recording(
|
||||||
|
self,
|
||||||
|
room_name: DailyRoomName,
|
||||||
|
recording_type: DailyRecordingType,
|
||||||
|
instance_id: UUID,
|
||||||
|
) -> dict:
|
||||||
|
"""Start recording via Daily.co REST API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
instance_id: UUID for this recording session - one UUID per "room" in Daily (which is "meeting" in Reflector)
|
||||||
|
"""
|
||||||
|
return await self._api_client.start_recording(
|
||||||
|
room_name=room_name,
|
||||||
|
recording_type=recording_type,
|
||||||
|
instance_id=instance_id,
|
||||||
|
)
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""Clean up API client resources."""
|
"""Clean up API client resources."""
|
||||||
await self._api_client.close()
|
await self._api_client.close()
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from reflector.video_platforms.factory import create_platform_client
|
|||||||
from reflector.worker.process import (
|
from reflector.worker.process import (
|
||||||
poll_daily_room_presence_task,
|
poll_daily_room_presence_task,
|
||||||
process_multitrack_recording,
|
process_multitrack_recording,
|
||||||
|
store_cloud_recording,
|
||||||
)
|
)
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
@@ -174,46 +175,64 @@ async def _handle_recording_started(event: RecordingStartedEvent):
|
|||||||
async def _handle_recording_ready(event: RecordingReadyEvent):
|
async def _handle_recording_ready(event: RecordingReadyEvent):
|
||||||
room_name = event.payload.room_name
|
room_name = event.payload.room_name
|
||||||
recording_id = event.payload.recording_id
|
recording_id = event.payload.recording_id
|
||||||
tracks = event.payload.tracks
|
recording_type = event.payload.type
|
||||||
|
|
||||||
if not tracks:
|
|
||||||
logger.warning(
|
|
||||||
"recording.ready-to-download: missing tracks",
|
|
||||||
room_name=room_name,
|
|
||||||
recording_id=recording_id,
|
|
||||||
payload=event.payload,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Recording ready for download",
|
"Recording ready for download",
|
||||||
room_name=room_name,
|
room_name=room_name,
|
||||||
recording_id=recording_id,
|
recording_id=recording_id,
|
||||||
num_tracks=len(tracks),
|
recording_type=recording_type,
|
||||||
platform="daily",
|
platform="daily",
|
||||||
)
|
)
|
||||||
|
|
||||||
bucket_name = settings.DAILYCO_STORAGE_AWS_BUCKET_NAME
|
bucket_name = settings.DAILYCO_STORAGE_AWS_BUCKET_NAME
|
||||||
if not bucket_name:
|
if not bucket_name:
|
||||||
logger.error(
|
logger.error("DAILYCO_STORAGE_AWS_BUCKET_NAME not configured")
|
||||||
"DAILYCO_STORAGE_AWS_BUCKET_NAME not configured; cannot process Daily recording"
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
track_keys = [t.s3Key for t in tracks if t.type == "audio"]
|
if recording_type == "cloud":
|
||||||
|
await store_cloud_recording(
|
||||||
|
recording_id=recording_id,
|
||||||
|
room_name=room_name,
|
||||||
|
s3_key=event.payload.s3_key,
|
||||||
|
duration=event.payload.duration,
|
||||||
|
start_ts=event.payload.start_ts,
|
||||||
|
source="webhook",
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
elif recording_type == "raw-tracks":
|
||||||
"Recording webhook queuing processing",
|
tracks = event.payload.tracks
|
||||||
recording_id=recording_id,
|
if not tracks:
|
||||||
room_name=room_name,
|
logger.warning(
|
||||||
)
|
"raw-tracks recording: missing tracks array",
|
||||||
|
room_name=room_name,
|
||||||
|
recording_id=recording_id,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
process_multitrack_recording.delay(
|
track_keys = [t.s3Key for t in tracks if t.type == "audio"]
|
||||||
bucket_name=bucket_name,
|
|
||||||
daily_room_name=room_name,
|
logger.info(
|
||||||
recording_id=recording_id,
|
"Raw-tracks recording queuing processing",
|
||||||
track_keys=track_keys,
|
recording_id=recording_id,
|
||||||
)
|
room_name=room_name,
|
||||||
|
num_tracks=len(track_keys),
|
||||||
|
)
|
||||||
|
|
||||||
|
process_multitrack_recording.delay(
|
||||||
|
bucket_name=bucket_name,
|
||||||
|
daily_room_name=room_name,
|
||||||
|
recording_id=recording_id,
|
||||||
|
track_keys=track_keys,
|
||||||
|
recording_start_ts=event.payload.start_ts,
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Unknown recording type",
|
||||||
|
recording_type=recording_type,
|
||||||
|
recording_id=recording_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _handle_recording_error(event: RecordingErrorEvent):
|
async def _handle_recording_error(event: RecordingErrorEvent):
|
||||||
|
|||||||
@@ -1,16 +1,23 @@
|
|||||||
|
import json
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Annotated, Optional
|
from typing import Annotated, Any, Optional
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
import reflector.auth as auth
|
import reflector.auth as auth
|
||||||
|
from reflector.dailyco_api import RecordingType
|
||||||
|
from reflector.dailyco_api.client import DailyApiError
|
||||||
from reflector.db.meetings import (
|
from reflector.db.meetings import (
|
||||||
MeetingConsent,
|
MeetingConsent,
|
||||||
meeting_consent_controller,
|
meeting_consent_controller,
|
||||||
meetings_controller,
|
meetings_controller,
|
||||||
)
|
)
|
||||||
from reflector.db.rooms import rooms_controller
|
from reflector.db.rooms import rooms_controller
|
||||||
|
from reflector.logger import logger
|
||||||
|
from reflector.utils.string import NonEmptyString
|
||||||
|
from reflector.video_platforms.factory import create_platform_client
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -73,3 +80,72 @@ async def meeting_deactivate(
|
|||||||
await meetings_controller.update_meeting(meeting_id, is_active=False)
|
await meetings_controller.update_meeting(meeting_id, is_active=False)
|
||||||
|
|
||||||
return {"status": "success", "meeting_id": meeting_id}
|
return {"status": "success", "meeting_id": meeting_id}
|
||||||
|
|
||||||
|
|
||||||
|
class StartRecordingRequest(BaseModel):
|
||||||
|
type: RecordingType
|
||||||
|
instanceId: UUID
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/meetings/{meeting_id}/recordings/start")
|
||||||
|
async def start_recording(
|
||||||
|
meeting_id: NonEmptyString, body: StartRecordingRequest
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Start cloud or raw-tracks recording via Daily.co REST API.
|
||||||
|
|
||||||
|
Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation of allowing only 1 recording at a time.
|
||||||
|
Uses different instanceIds for cloud vs raw-tracks (same won't work)
|
||||||
|
|
||||||
|
Note: No authentication required - anonymous users supported. TODO this is a DOS vector
|
||||||
|
"""
|
||||||
|
meeting = await meetings_controller.get_by_id(meeting_id)
|
||||||
|
if not meeting:
|
||||||
|
raise HTTPException(status_code=404, detail="Meeting not found")
|
||||||
|
|
||||||
|
log = logger.bind(
|
||||||
|
meeting_id=meeting_id,
|
||||||
|
room_name=meeting.room_name,
|
||||||
|
recording_type=body.type,
|
||||||
|
instance_id=body.instanceId,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = create_platform_client("daily")
|
||||||
|
result = await client.start_recording(
|
||||||
|
room_name=meeting.room_name,
|
||||||
|
recording_type=body.type,
|
||||||
|
instance_id=body.instanceId,
|
||||||
|
)
|
||||||
|
|
||||||
|
log.info(f"Started {body.type} recording via REST API")
|
||||||
|
|
||||||
|
return {"status": "ok", "result": result}
|
||||||
|
|
||||||
|
except DailyApiError as e:
|
||||||
|
# Parse Daily.co error response to detect "has an active stream"
|
||||||
|
try:
|
||||||
|
error_body = json.loads(e.response_body)
|
||||||
|
error_info = error_body.get("info", "")
|
||||||
|
|
||||||
|
# "has an active stream" means recording already started by another participant
|
||||||
|
# This is SUCCESS from business logic perspective - return 200
|
||||||
|
if "has an active stream" in error_info:
|
||||||
|
log.info(
|
||||||
|
f"{body.type} recording already active (started by another participant)"
|
||||||
|
)
|
||||||
|
return {"status": "already_active", "instanceId": str(body.instanceId)}
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
pass # Fall through to error handling
|
||||||
|
|
||||||
|
# All other Daily.co API errors
|
||||||
|
log.error(f"Failed to start {body.type} recording", error=str(e))
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500, detail=f"Failed to start recording: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Non-Daily.co errors
|
||||||
|
log.error(f"Failed to start {body.type} recording", error=str(e))
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500, detail=f"Failed to start recording: {str(e)}"
|
||||||
|
)
|
||||||
|
|||||||
@@ -73,6 +73,8 @@ class Meeting(BaseModel):
|
|||||||
calendar_event_id: str | None = None
|
calendar_event_id: str | None = None
|
||||||
calendar_metadata: dict[str, Any] | None = None
|
calendar_metadata: dict[str, Any] | None = None
|
||||||
platform: Platform
|
platform: Platform
|
||||||
|
daily_composed_video_s3_key: str | None = None
|
||||||
|
daily_composed_video_duration: int | None = None
|
||||||
|
|
||||||
|
|
||||||
class CreateRoom(BaseModel):
|
class CreateRoom(BaseModel):
|
||||||
@@ -586,7 +588,6 @@ async def rooms_join_meeting(
|
|||||||
)
|
)
|
||||||
token = await client.create_meeting_token(
|
token = await client.create_meeting_token(
|
||||||
meeting.room_name,
|
meeting.room_name,
|
||||||
start_cloud_recording=meeting.recording_type == "cloud",
|
|
||||||
enable_recording_ui=enable_recording_ui,
|
enable_recording_ui=enable_recording_ui,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
is_owner=user_id == room.user_id,
|
is_owner=user_id == room.user_id,
|
||||||
|
|||||||
@@ -111,6 +111,7 @@ class GetTranscriptMinimal(BaseModel):
|
|||||||
room_id: str | None = None
|
room_id: str | None = None
|
||||||
room_name: str | None = None
|
room_name: str | None = None
|
||||||
audio_deleted: bool | None = None
|
audio_deleted: bool | None = None
|
||||||
|
dag_status: list[dict] | None = None
|
||||||
|
|
||||||
|
|
||||||
class TranscriptParticipantWithEmail(TranscriptParticipant):
|
class TranscriptParticipantWithEmail(TranscriptParticipant):
|
||||||
@@ -491,6 +492,13 @@ async def transcript_get(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
dag_status = None
|
||||||
|
if transcript.status == "processing" and transcript.events:
|
||||||
|
for ev in reversed(transcript.events):
|
||||||
|
if ev.event == "DAG_STATUS":
|
||||||
|
dag_status = ev.data.get("tasks") if isinstance(ev.data, dict) else None
|
||||||
|
break
|
||||||
|
|
||||||
base_data = {
|
base_data = {
|
||||||
"id": transcript.id,
|
"id": transcript.id,
|
||||||
"user_id": transcript.user_id,
|
"user_id": transcript.user_id,
|
||||||
@@ -512,6 +520,7 @@ async def transcript_get(
|
|||||||
"room_id": transcript.room_id,
|
"room_id": transcript.room_id,
|
||||||
"room_name": room_name,
|
"room_name": room_name,
|
||||||
"audio_deleted": transcript.audio_deleted,
|
"audio_deleted": transcript.audio_deleted,
|
||||||
|
"dag_status": dag_status,
|
||||||
"participants": participants,
|
"participants": participants,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -41,13 +41,19 @@ async def transcript_events_websocket(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# on first connection, send all events only to the current user
|
# on first connection, send all events only to the current user
|
||||||
|
# Find the last DAG_STATUS to send after other historical events
|
||||||
|
last_dag_status = None
|
||||||
for event in transcript.events:
|
for event in transcript.events:
|
||||||
# for now, do not send TRANSCRIPT or STATUS options - theses are live event
|
|
||||||
# not necessary to be sent to the client; but keep the rest
|
|
||||||
name = event.event
|
name = event.event
|
||||||
if name in ("TRANSCRIPT", "STATUS"):
|
if name in ("TRANSCRIPT", "STATUS"):
|
||||||
continue
|
continue
|
||||||
|
if name == "DAG_STATUS":
|
||||||
|
last_dag_status = event
|
||||||
|
continue
|
||||||
await websocket.send_json(event.model_dump(mode="json"))
|
await websocket.send_json(event.model_dump(mode="json"))
|
||||||
|
# Send only the most recent DAG_STATUS so reconnecting clients get current state
|
||||||
|
if last_dag_status is not None:
|
||||||
|
await websocket.send_json(last_dag_status.model_dump(mode="json"))
|
||||||
|
|
||||||
# XXX if transcript is final (locked=True and status=ended)
|
# XXX if transcript is final (locked=True and status=ended)
|
||||||
# XXX send a final event to the client and close the connection
|
# XXX send a final event to the client and close the connection
|
||||||
|
|||||||
@@ -6,6 +6,11 @@ from celery.schedules import crontab
|
|||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
|
# Polling intervals (seconds)
|
||||||
|
# Webhook-aware: 180s when webhook configured (backup mode), 15s when no webhook (primary discovery)
|
||||||
|
POLL_DAILY_RECORDINGS_INTERVAL_SEC = 180.0 if settings.DAILY_WEBHOOK_SECRET else 15.0
|
||||||
|
|
||||||
if celery.current_app.main != "default":
|
if celery.current_app.main != "default":
|
||||||
logger.info(f"Celery already configured ({celery.current_app})")
|
logger.info(f"Celery already configured ({celery.current_app})")
|
||||||
app = celery.current_app
|
app = celery.current_app
|
||||||
@@ -44,7 +49,7 @@ else:
|
|||||||
},
|
},
|
||||||
"poll_daily_recordings": {
|
"poll_daily_recordings": {
|
||||||
"task": "reflector.worker.process.poll_daily_recordings",
|
"task": "reflector.worker.process.poll_daily_recordings",
|
||||||
"schedule": 180.0, # Every 3 minutes (configurable lookback window)
|
"schedule": POLL_DAILY_RECORDINGS_INTERVAL_SEC,
|
||||||
},
|
},
|
||||||
"trigger_daily_reconciliation": {
|
"trigger_daily_reconciliation": {
|
||||||
"task": "reflector.worker.process.trigger_daily_reconciliation",
|
"task": "reflector.worker.process.trigger_daily_reconciliation",
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import List
|
from typing import List, Literal
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
import av
|
import av
|
||||||
@@ -27,9 +27,6 @@ from reflector.db.transcripts import (
|
|||||||
from reflector.hatchet.client import HatchetClientManager
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||||
from reflector.pipelines.main_live_pipeline import asynctask
|
from reflector.pipelines.main_live_pipeline import asynctask
|
||||||
from reflector.pipelines.main_multitrack_pipeline import (
|
|
||||||
task_pipeline_multitrack_process,
|
|
||||||
)
|
|
||||||
from reflector.pipelines.topic_processing import EmptyPipeline
|
from reflector.pipelines.topic_processing import EmptyPipeline
|
||||||
from reflector.processors import AudioFileWriterProcessor
|
from reflector.processors import AudioFileWriterProcessor
|
||||||
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
|
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
|
||||||
@@ -42,6 +39,7 @@ from reflector.utils.daily import (
|
|||||||
filter_cam_audio_tracks,
|
filter_cam_audio_tracks,
|
||||||
recording_lock_key,
|
recording_lock_key,
|
||||||
)
|
)
|
||||||
|
from reflector.utils.string import NonEmptyString
|
||||||
from reflector.video_platforms.factory import create_platform_client
|
from reflector.video_platforms.factory import create_platform_client
|
||||||
from reflector.video_platforms.whereby_utils import (
|
from reflector.video_platforms.whereby_utils import (
|
||||||
parse_whereby_recording_filename,
|
parse_whereby_recording_filename,
|
||||||
@@ -175,13 +173,18 @@ async def process_multitrack_recording(
|
|||||||
daily_room_name: DailyRoomName,
|
daily_room_name: DailyRoomName,
|
||||||
recording_id: str,
|
recording_id: str,
|
||||||
track_keys: list[str],
|
track_keys: list[str],
|
||||||
|
recording_start_ts: int,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Process raw-tracks (multitrack) recording from Daily.co.
|
||||||
|
"""
|
||||||
logger.info(
|
logger.info(
|
||||||
"Processing multitrack recording",
|
"Processing multitrack recording",
|
||||||
bucket=bucket_name,
|
bucket=bucket_name,
|
||||||
room_name=daily_room_name,
|
room_name=daily_room_name,
|
||||||
recording_id=recording_id,
|
recording_id=recording_id,
|
||||||
provided_keys=len(track_keys),
|
provided_keys=len(track_keys),
|
||||||
|
recording_start_ts=recording_start_ts,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not track_keys:
|
if not track_keys:
|
||||||
@@ -212,7 +215,7 @@ async def process_multitrack_recording(
|
|||||||
)
|
)
|
||||||
|
|
||||||
await _process_multitrack_recording_inner(
|
await _process_multitrack_recording_inner(
|
||||||
bucket_name, daily_room_name, recording_id, track_keys
|
bucket_name, daily_room_name, recording_id, track_keys, recording_start_ts
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -221,8 +224,18 @@ async def _process_multitrack_recording_inner(
|
|||||||
daily_room_name: DailyRoomName,
|
daily_room_name: DailyRoomName,
|
||||||
recording_id: str,
|
recording_id: str,
|
||||||
track_keys: list[str],
|
track_keys: list[str],
|
||||||
|
recording_start_ts: int,
|
||||||
):
|
):
|
||||||
"""Inner function containing the actual processing logic."""
|
"""
|
||||||
|
Process multitrack recording (first time or reprocessing).
|
||||||
|
|
||||||
|
For first processing (webhook/polling):
|
||||||
|
- Uses recording_start_ts for time-based meeting matching (no instanceId available)
|
||||||
|
|
||||||
|
For reprocessing:
|
||||||
|
- Uses recording.meeting_id directly (already linked during first processing)
|
||||||
|
- recording_start_ts is ignored
|
||||||
|
"""
|
||||||
|
|
||||||
tz = timezone.utc
|
tz = timezone.utc
|
||||||
recorded_at = datetime.now(tz)
|
recorded_at = datetime.now(tz)
|
||||||
@@ -240,7 +253,53 @@ async def _process_multitrack_recording_inner(
|
|||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
meeting = await meetings_controller.get_by_room_name(daily_room_name)
|
# Check if recording already exists (reprocessing path)
|
||||||
|
recording = await recordings_controller.get_by_id(recording_id)
|
||||||
|
|
||||||
|
if recording and recording.meeting_id:
|
||||||
|
# Reprocessing: recording exists with meeting already linked
|
||||||
|
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||||
|
if not meeting:
|
||||||
|
logger.error(
|
||||||
|
"Reprocessing: meeting not found for recording - skipping",
|
||||||
|
meeting_id=recording.meeting_id,
|
||||||
|
recording_id=recording_id,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Reprocessing: using existing recording.meeting_id",
|
||||||
|
recording_id=recording_id,
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
room_name=daily_room_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# First processing: recording doesn't exist, need time-based matching
|
||||||
|
# (Daily.co doesn't return instanceId in API, must match by timestamp)
|
||||||
|
recording_start = datetime.fromtimestamp(recording_start_ts, tz=timezone.utc)
|
||||||
|
meeting = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name=daily_room_name,
|
||||||
|
recording_start=recording_start,
|
||||||
|
time_window_hours=168, # 1 week
|
||||||
|
)
|
||||||
|
if not meeting:
|
||||||
|
logger.error(
|
||||||
|
"Raw-tracks: no meeting found within 1-week window (time-based match) - skipping",
|
||||||
|
recording_id=recording_id,
|
||||||
|
room_name=daily_room_name,
|
||||||
|
recording_start_ts=recording_start_ts,
|
||||||
|
recording_start=recording_start.isoformat(),
|
||||||
|
)
|
||||||
|
return # Skip processing, will retry on next poll
|
||||||
|
logger.info(
|
||||||
|
"First processing: found meeting via time-based matching",
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
room_name=daily_room_name,
|
||||||
|
recording_id=recording_id,
|
||||||
|
time_delta_seconds=abs(
|
||||||
|
(meeting.start_date - recording_start).total_seconds()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
room_name_base = extract_base_room_name(daily_room_name)
|
room_name_base = extract_base_room_name(daily_room_name)
|
||||||
|
|
||||||
@@ -248,18 +307,8 @@ async def _process_multitrack_recording_inner(
|
|||||||
if not room:
|
if not room:
|
||||||
raise Exception(f"Room not found: {room_name_base}")
|
raise Exception(f"Room not found: {room_name_base}")
|
||||||
|
|
||||||
if not meeting:
|
|
||||||
raise Exception(f"Meeting not found: {room_name_base}")
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Found existing Meeting for recording",
|
|
||||||
meeting_id=meeting.id,
|
|
||||||
room_name=daily_room_name,
|
|
||||||
recording_id=recording_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
recording = await recordings_controller.get_by_id(recording_id)
|
|
||||||
if not recording:
|
if not recording:
|
||||||
|
# Create recording (only happens during first processing)
|
||||||
object_key_dir = os.path.dirname(track_keys[0]) if track_keys else ""
|
object_key_dir = os.path.dirname(track_keys[0]) if track_keys else ""
|
||||||
recording = await recordings_controller.create(
|
recording = await recordings_controller.create(
|
||||||
Recording(
|
Recording(
|
||||||
@@ -271,7 +320,19 @@ async def _process_multitrack_recording_inner(
|
|||||||
track_keys=track_keys,
|
track_keys=track_keys,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# else: Recording already exists; metadata set at creation time
|
elif not recording.meeting_id:
|
||||||
|
# Recording exists but meeting_id is null (failed first processing)
|
||||||
|
# Update with meeting from time-based matching
|
||||||
|
await recordings_controller.set_meeting_id(
|
||||||
|
recording_id=recording.id,
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
)
|
||||||
|
recording.meeting_id = meeting.id
|
||||||
|
logger.info(
|
||||||
|
"Updated existing recording with meeting_id",
|
||||||
|
recording_id=recording.id,
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
)
|
||||||
|
|
||||||
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
||||||
if not transcript:
|
if not transcript:
|
||||||
@@ -287,49 +348,29 @@ async def _process_multitrack_recording_inner(
|
|||||||
room_id=room.id,
|
room_id=room.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
use_celery = room and room.use_celery
|
# Multitrack processing always uses Hatchet (no Celery fallback)
|
||||||
use_hatchet = not use_celery
|
workflow_id = await HatchetClientManager.start_workflow(
|
||||||
|
workflow_name="DiarizationPipeline",
|
||||||
if use_celery:
|
input_data={
|
||||||
logger.info(
|
"recording_id": recording_id,
|
||||||
"Room uses legacy Celery processing",
|
"tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
|
||||||
room_id=room.id,
|
"bucket_name": bucket_name,
|
||||||
transcript_id=transcript.id,
|
"transcript_id": transcript.id,
|
||||||
)
|
"room_id": room.id,
|
||||||
|
},
|
||||||
if use_hatchet:
|
additional_metadata={
|
||||||
workflow_id = await HatchetClientManager.start_workflow(
|
"transcript_id": transcript.id,
|
||||||
workflow_name="DiarizationPipeline",
|
"recording_id": recording_id,
|
||||||
input_data={
|
"daily_recording_id": recording_id,
|
||||||
"recording_id": recording_id,
|
},
|
||||||
"tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
|
|
||||||
"bucket_name": bucket_name,
|
|
||||||
"transcript_id": transcript.id,
|
|
||||||
"room_id": room.id,
|
|
||||||
},
|
|
||||||
additional_metadata={
|
|
||||||
"transcript_id": transcript.id,
|
|
||||||
"recording_id": recording_id,
|
|
||||||
"daily_recording_id": recording_id,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
"Started Hatchet workflow",
|
|
||||||
workflow_id=workflow_id,
|
|
||||||
transcript_id=transcript.id,
|
|
||||||
)
|
|
||||||
|
|
||||||
await transcripts_controller.update(
|
|
||||||
transcript, {"workflow_run_id": workflow_id}
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Celery pipeline (runs when durable workflows disabled)
|
|
||||||
task_pipeline_multitrack_process.delay(
|
|
||||||
transcript_id=transcript.id,
|
|
||||||
bucket_name=bucket_name,
|
|
||||||
track_keys=filter_cam_audio_tracks(track_keys),
|
|
||||||
)
|
)
|
||||||
|
logger.info(
|
||||||
|
"Started Hatchet workflow",
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
transcript_id=transcript.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.update(transcript, {"workflow_run_id": workflow_id})
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
@@ -338,9 +379,11 @@ async def poll_daily_recordings():
|
|||||||
"""Poll Daily.co API for recordings and process missing ones.
|
"""Poll Daily.co API for recordings and process missing ones.
|
||||||
|
|
||||||
Fetches latest recordings from Daily.co API (default limit 100), compares with DB,
|
Fetches latest recordings from Daily.co API (default limit 100), compares with DB,
|
||||||
and queues processing for recordings not already in DB.
|
and stores/queues missing recordings:
|
||||||
|
- Cloud recordings: Store S3 key in meeting table
|
||||||
|
- Raw-tracks recordings: Queue multitrack processing
|
||||||
|
|
||||||
For each missing recording, uses audio tracks from API response.
|
Acts as fallback when webhooks active, primary discovery when webhooks unavailable.
|
||||||
|
|
||||||
Worker-level locking provides idempotency (see process_multitrack_recording).
|
Worker-level locking provides idempotency (see process_multitrack_recording).
|
||||||
"""
|
"""
|
||||||
@@ -381,51 +424,222 @@ async def poll_daily_recordings():
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
recording_ids = [rec.id for rec in finished_recordings]
|
# Separate cloud and raw-tracks recordings
|
||||||
|
cloud_recordings = []
|
||||||
|
raw_tracks_recordings = []
|
||||||
|
for rec in finished_recordings:
|
||||||
|
if rec.type:
|
||||||
|
# Daily.co API returns null type - make sure this assumption stays
|
||||||
|
# If this logs, Daily.co API changed - we can remove inference logic.
|
||||||
|
recording_type = rec.type
|
||||||
|
logger.warning(
|
||||||
|
"Recording has explicit type field from Daily.co API (unexpected, API may have changed)",
|
||||||
|
recording_id=rec.id,
|
||||||
|
room_name=rec.room_name,
|
||||||
|
recording_type=recording_type,
|
||||||
|
has_s3key=bool(rec.s3key),
|
||||||
|
tracks_count=len(rec.tracks),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# DAILY.CO API LIMITATION:
|
||||||
|
# GET /recordings response does NOT include type field.
|
||||||
|
# Daily.co docs mention type field exists, but API never returns it.
|
||||||
|
# Verified: 84 recordings from Nov 2025 - Jan 2026 ALL have type=None.
|
||||||
|
#
|
||||||
|
# This is not a recent API change - Daily.co has never returned type.
|
||||||
|
# Must infer from structural properties.
|
||||||
|
#
|
||||||
|
# Inference heuristic (reliable for finished recordings):
|
||||||
|
# - Has tracks array → raw-tracks
|
||||||
|
# - Has s3key but no tracks → cloud
|
||||||
|
# - Neither → failed/incomplete recording
|
||||||
|
if len(rec.tracks) > 0:
|
||||||
|
recording_type = "raw-tracks"
|
||||||
|
elif rec.s3key and len(rec.tracks) == 0:
|
||||||
|
recording_type = "cloud"
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Recording has no type, no s3key, and no tracks - likely failed recording",
|
||||||
|
recording_id=rec.id,
|
||||||
|
room_name=rec.room_name,
|
||||||
|
status=rec.status,
|
||||||
|
duration=rec.duration,
|
||||||
|
mtg_session_id=rec.mtgSessionId,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if recording_type == "cloud":
|
||||||
|
cloud_recordings.append(rec)
|
||||||
|
else:
|
||||||
|
raw_tracks_recordings.append(rec)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Poll results",
|
||||||
|
total=len(finished_recordings),
|
||||||
|
cloud=len(cloud_recordings),
|
||||||
|
raw_tracks=len(raw_tracks_recordings),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process cloud recordings
|
||||||
|
await _poll_cloud_recordings(cloud_recordings)
|
||||||
|
|
||||||
|
# Process raw-tracks recordings
|
||||||
|
await _poll_raw_tracks_recordings(raw_tracks_recordings, bucket_name)
|
||||||
|
|
||||||
|
|
||||||
|
async def store_cloud_recording(
|
||||||
|
recording_id: NonEmptyString,
|
||||||
|
room_name: NonEmptyString,
|
||||||
|
s3_key: NonEmptyString,
|
||||||
|
duration: int,
|
||||||
|
start_ts: int,
|
||||||
|
source: Literal["webhook", "polling"],
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Store cloud recording reference in meeting table.
|
||||||
|
|
||||||
|
Common function for both webhook and polling code paths.
|
||||||
|
Uses time-based matching to handle duplicate room_name values.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
recording_id: Daily.co recording ID
|
||||||
|
room_name: Daily.co room name
|
||||||
|
s3_key: S3 key where recording is stored
|
||||||
|
duration: Recording duration in seconds
|
||||||
|
start_ts: Unix timestamp when recording started
|
||||||
|
source: "webhook" or "polling" (for logging)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if stored, False if skipped/failed
|
||||||
|
"""
|
||||||
|
recording_start = datetime.fromtimestamp(start_ts, tz=timezone.utc)
|
||||||
|
|
||||||
|
meeting = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name=room_name,
|
||||||
|
recording_start=recording_start,
|
||||||
|
time_window_hours=168, # 1 week
|
||||||
|
)
|
||||||
|
|
||||||
|
if not meeting:
|
||||||
|
logger.warning(
|
||||||
|
f"Cloud recording ({source}): no meeting found within 1-week window",
|
||||||
|
recording_id=recording_id,
|
||||||
|
room_name=room_name,
|
||||||
|
recording_start_ts=start_ts,
|
||||||
|
recording_start=recording_start.isoformat(),
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
success = await meetings_controller.set_cloud_recording_if_missing(
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
s3_key=s3_key,
|
||||||
|
duration=duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
logger.debug(
|
||||||
|
f"Cloud recording ({source}): already set (race lost)",
|
||||||
|
recording_id=recording_id,
|
||||||
|
room_name=room_name,
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Cloud recording stored via {source} (time-based match)",
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
recording_id=recording_id,
|
||||||
|
s3_key=s3_key,
|
||||||
|
duration=duration,
|
||||||
|
time_delta_seconds=abs((meeting.start_date - recording_start).total_seconds()),
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
async def _poll_cloud_recordings(cloud_recordings: List[FinishedRecordingResponse]):
|
||||||
|
"""
|
||||||
|
Store cloud recordings missing from meeting table via polling.
|
||||||
|
|
||||||
|
Uses time-based matching via store_cloud_recording().
|
||||||
|
"""
|
||||||
|
if not cloud_recordings:
|
||||||
|
return
|
||||||
|
|
||||||
|
stored_count = 0
|
||||||
|
for recording in cloud_recordings:
|
||||||
|
# Extract S3 key from recording (cloud recordings use s3key field)
|
||||||
|
s3_key = recording.s3key or (recording.s3.key if recording.s3 else None)
|
||||||
|
if not s3_key:
|
||||||
|
logger.warning(
|
||||||
|
"Cloud recording: missing S3 key",
|
||||||
|
recording_id=recording.id,
|
||||||
|
room_name=recording.room_name,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
stored = await store_cloud_recording(
|
||||||
|
recording_id=recording.id,
|
||||||
|
room_name=recording.room_name,
|
||||||
|
s3_key=s3_key,
|
||||||
|
duration=recording.duration,
|
||||||
|
start_ts=recording.start_ts,
|
||||||
|
source="polling",
|
||||||
|
)
|
||||||
|
if stored:
|
||||||
|
stored_count += 1
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Cloud recording polling complete",
|
||||||
|
total=len(cloud_recordings),
|
||||||
|
stored=stored_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _poll_raw_tracks_recordings(
|
||||||
|
raw_tracks_recordings: List[FinishedRecordingResponse],
|
||||||
|
bucket_name: str,
|
||||||
|
):
|
||||||
|
"""Queue raw-tracks recordings missing from DB (existing logic)."""
|
||||||
|
if not raw_tracks_recordings:
|
||||||
|
return
|
||||||
|
|
||||||
|
recording_ids = [rec.id for rec in raw_tracks_recordings]
|
||||||
existing_recordings = await recordings_controller.get_by_ids(recording_ids)
|
existing_recordings = await recordings_controller.get_by_ids(recording_ids)
|
||||||
existing_ids = {rec.id for rec in existing_recordings}
|
existing_ids = {rec.id for rec in existing_recordings}
|
||||||
|
|
||||||
missing_recordings = [
|
missing_recordings = [
|
||||||
rec for rec in finished_recordings if rec.id not in existing_ids
|
rec for rec in raw_tracks_recordings if rec.id not in existing_ids
|
||||||
]
|
]
|
||||||
|
|
||||||
if not missing_recordings:
|
if not missing_recordings:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"All recordings already in DB",
|
"All raw-tracks recordings already in DB",
|
||||||
api_count=len(finished_recordings),
|
api_count=len(raw_tracks_recordings),
|
||||||
existing_count=len(existing_recordings),
|
existing_count=len(existing_recordings),
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Found recordings missing from DB",
|
"Found raw-tracks recordings missing from DB",
|
||||||
missing_count=len(missing_recordings),
|
missing_count=len(missing_recordings),
|
||||||
total_api_count=len(finished_recordings),
|
total_api_count=len(raw_tracks_recordings),
|
||||||
existing_count=len(existing_recordings),
|
existing_count=len(existing_recordings),
|
||||||
)
|
)
|
||||||
|
|
||||||
for recording in missing_recordings:
|
for recording in missing_recordings:
|
||||||
if not recording.tracks:
|
if not recording.tracks:
|
||||||
if recording.status == "finished":
|
logger.warning(
|
||||||
logger.warning(
|
"Finished raw-tracks recording has no tracks (no audio captured)",
|
||||||
"Finished recording has no tracks (no audio captured)",
|
recording_id=recording.id,
|
||||||
recording_id=recording.id,
|
room_name=recording.room_name,
|
||||||
room_name=recording.room_name,
|
)
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.debug(
|
|
||||||
"No tracks in recording yet",
|
|
||||||
recording_id=recording.id,
|
|
||||||
room_name=recording.room_name,
|
|
||||||
status=recording.status,
|
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
track_keys = [t.s3Key for t in recording.tracks if t.type == "audio"]
|
||||||
|
|
||||||
if not track_keys:
|
if not track_keys:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"No audio tracks found in recording (only video tracks)",
|
"No audio tracks found in raw-tracks recording",
|
||||||
recording_id=recording.id,
|
recording_id=recording.id,
|
||||||
room_name=recording.room_name,
|
room_name=recording.room_name,
|
||||||
total_tracks=len(recording.tracks),
|
total_tracks=len(recording.tracks),
|
||||||
@@ -433,7 +647,7 @@ async def poll_daily_recordings():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Queueing missing recording for processing",
|
"Queueing missing raw-tracks recording for processing",
|
||||||
recording_id=recording.id,
|
recording_id=recording.id,
|
||||||
room_name=recording.room_name,
|
room_name=recording.room_name,
|
||||||
track_count=len(track_keys),
|
track_count=len(track_keys),
|
||||||
@@ -444,6 +658,7 @@ async def poll_daily_recordings():
|
|||||||
daily_room_name=recording.room_name,
|
daily_room_name=recording.room_name,
|
||||||
recording_id=recording.id,
|
recording_id=recording.id,
|
||||||
track_keys=track_keys,
|
track_keys=track_keys,
|
||||||
|
recording_start_ts=recording.start_ts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -834,61 +1049,43 @@ async def reprocess_failed_daily_recordings():
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
use_celery = room and room.use_celery
|
# Multitrack reprocessing always uses Hatchet (no Celery fallback)
|
||||||
use_hatchet = not use_celery
|
if not transcript:
|
||||||
|
logger.warning(
|
||||||
if use_hatchet:
|
"No transcript for Hatchet reprocessing, skipping",
|
||||||
if not transcript:
|
|
||||||
logger.warning(
|
|
||||||
"No transcript for Hatchet reprocessing, skipping",
|
|
||||||
recording_id=recording.id,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
workflow_id = await HatchetClientManager.start_workflow(
|
|
||||||
workflow_name="DiarizationPipeline",
|
|
||||||
input_data={
|
|
||||||
"recording_id": recording.id,
|
|
||||||
"tracks": [
|
|
||||||
{"s3_key": k}
|
|
||||||
for k in filter_cam_audio_tracks(recording.track_keys)
|
|
||||||
],
|
|
||||||
"bucket_name": bucket_name,
|
|
||||||
"transcript_id": transcript.id,
|
|
||||||
"room_id": room.id if room else None,
|
|
||||||
},
|
|
||||||
additional_metadata={
|
|
||||||
"transcript_id": transcript.id,
|
|
||||||
"recording_id": recording.id,
|
|
||||||
"reprocess": True,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
await transcripts_controller.update(
|
|
||||||
transcript, {"workflow_run_id": workflow_id}
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Queued Daily recording for Hatchet reprocessing",
|
|
||||||
recording_id=recording.id,
|
recording_id=recording.id,
|
||||||
workflow_id=workflow_id,
|
|
||||||
room_name=meeting.room_name,
|
|
||||||
track_count=len(recording.track_keys),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
"Queueing Daily recording for Celery reprocessing",
|
|
||||||
recording_id=recording.id,
|
|
||||||
room_name=meeting.room_name,
|
|
||||||
track_count=len(recording.track_keys),
|
|
||||||
transcript_status=transcript.status if transcript else None,
|
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
process_multitrack_recording.delay(
|
workflow_id = await HatchetClientManager.start_workflow(
|
||||||
bucket_name=bucket_name,
|
workflow_name="DiarizationPipeline",
|
||||||
daily_room_name=meeting.room_name,
|
input_data={
|
||||||
recording_id=recording.id,
|
"recording_id": recording.id,
|
||||||
track_keys=recording.track_keys,
|
"tracks": [
|
||||||
)
|
{"s3_key": k}
|
||||||
|
for k in filter_cam_audio_tracks(recording.track_keys)
|
||||||
|
],
|
||||||
|
"bucket_name": bucket_name,
|
||||||
|
"transcript_id": transcript.id,
|
||||||
|
"room_id": room.id if room else None,
|
||||||
|
},
|
||||||
|
additional_metadata={
|
||||||
|
"transcript_id": transcript.id,
|
||||||
|
"recording_id": recording.id,
|
||||||
|
"reprocess": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript, {"workflow_run_id": workflow_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Queued Daily recording for Hatchet reprocessing",
|
||||||
|
recording_id=recording.id,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
room_name=meeting.room_name,
|
||||||
|
track_count=len(recording.track_keys),
|
||||||
|
)
|
||||||
|
|
||||||
reprocessed_count += 1
|
reprocessed_count += 1
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ broadcast messages to all connected websockets.
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import threading
|
|
||||||
|
|
||||||
import redis.asyncio as redis
|
import redis.asyncio as redis
|
||||||
from fastapi import WebSocket
|
from fastapi import WebSocket
|
||||||
@@ -98,6 +97,7 @@ class WebsocketManager:
|
|||||||
|
|
||||||
async def _pubsub_data_reader(self, pubsub_subscriber):
|
async def _pubsub_data_reader(self, pubsub_subscriber):
|
||||||
while True:
|
while True:
|
||||||
|
# timeout=1.0 prevents tight CPU loop when no messages available
|
||||||
message = await pubsub_subscriber.get_message(
|
message = await pubsub_subscriber.get_message(
|
||||||
ignore_subscribe_messages=True
|
ignore_subscribe_messages=True
|
||||||
)
|
)
|
||||||
@@ -109,29 +109,38 @@ class WebsocketManager:
|
|||||||
await socket.send_json(data)
|
await socket.send_json(data)
|
||||||
|
|
||||||
|
|
||||||
|
# Process-global singleton to ensure only one WebsocketManager instance exists.
|
||||||
|
# Multiple instances would cause resource leaks and CPU issues.
|
||||||
|
_ws_manager: WebsocketManager | None = None
|
||||||
|
|
||||||
|
|
||||||
def get_ws_manager() -> WebsocketManager:
|
def get_ws_manager() -> WebsocketManager:
|
||||||
"""
|
"""
|
||||||
Returns the WebsocketManager instance for managing websockets.
|
Returns the global WebsocketManager singleton.
|
||||||
|
|
||||||
This function initializes and returns the WebsocketManager instance,
|
Creates instance on first call, subsequent calls return cached instance.
|
||||||
which is responsible for managing websockets and handling websocket
|
Thread-safe via GIL. Concurrent initialization may create duplicate
|
||||||
connections.
|
instances but last write wins (acceptable for this use case).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
WebsocketManager: The initialized WebsocketManager instance.
|
WebsocketManager: The global WebsocketManager instance.
|
||||||
|
|
||||||
Raises:
|
|
||||||
ImportError: If the 'reflector.settings' module cannot be imported.
|
|
||||||
RedisConnectionError: If there is an error connecting to the Redis server.
|
|
||||||
"""
|
"""
|
||||||
local = threading.local()
|
global _ws_manager
|
||||||
if hasattr(local, "ws_manager"):
|
|
||||||
return local.ws_manager
|
|
||||||
|
|
||||||
|
if _ws_manager is not None:
|
||||||
|
return _ws_manager
|
||||||
|
|
||||||
|
# No lock needed - GIL makes this safe enough
|
||||||
|
# Worst case: race creates two instances, last assignment wins
|
||||||
pubsub_client = RedisPubSubManager(
|
pubsub_client = RedisPubSubManager(
|
||||||
host=settings.REDIS_HOST,
|
host=settings.REDIS_HOST,
|
||||||
port=settings.REDIS_PORT,
|
port=settings.REDIS_PORT,
|
||||||
)
|
)
|
||||||
ws_manager = WebsocketManager(pubsub_client=pubsub_client)
|
_ws_manager = WebsocketManager(pubsub_client=pubsub_client)
|
||||||
local.ws_manager = ws_manager
|
return _ws_manager
|
||||||
return ws_manager
|
|
||||||
|
|
||||||
|
def reset_ws_manager() -> None:
|
||||||
|
"""Reset singleton for testing. DO NOT use in production."""
|
||||||
|
global _ws_manager
|
||||||
|
_ws_manager = None
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
import os
|
import os
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from tempfile import NamedTemporaryFile
|
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from reflector.schemas.platform import WHEREBY_PLATFORM
|
from reflector.schemas.platform import DAILY_PLATFORM, WHEREBY_PLATFORM
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
@@ -15,6 +14,7 @@ def register_mock_platform():
|
|||||||
from reflector.video_platforms.registry import register_platform
|
from reflector.video_platforms.registry import register_platform
|
||||||
|
|
||||||
register_platform(WHEREBY_PLATFORM, MockPlatformClient)
|
register_platform(WHEREBY_PLATFORM, MockPlatformClient)
|
||||||
|
register_platform(DAILY_PLATFORM, MockPlatformClient)
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
@@ -333,11 +333,14 @@ def celery_enable_logging():
|
|||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def celery_config():
|
def celery_config():
|
||||||
with NamedTemporaryFile() as f:
|
redis_host = os.environ.get("REDIS_HOST", "localhost")
|
||||||
yield {
|
redis_port = os.environ.get("REDIS_PORT", "6379")
|
||||||
"broker_url": "memory://",
|
# Use db 2 to avoid conflicts with main app
|
||||||
"result_backend": f"db+sqlite:///{f.name}",
|
redis_url = f"redis://{redis_host}:{redis_port}/2"
|
||||||
}
|
yield {
|
||||||
|
"broker_url": redis_url,
|
||||||
|
"result_backend": redis_url,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
@@ -370,9 +373,12 @@ async def ws_manager_in_memory(monkeypatch):
|
|||||||
def __init__(self, queue: asyncio.Queue):
|
def __init__(self, queue: asyncio.Queue):
|
||||||
self.queue = queue
|
self.queue = queue
|
||||||
|
|
||||||
async def get_message(self, ignore_subscribe_messages: bool = True):
|
async def get_message(
|
||||||
|
self, ignore_subscribe_messages: bool = True, timeout: float | None = None
|
||||||
|
):
|
||||||
|
wait_timeout = timeout if timeout is not None else 0.05
|
||||||
try:
|
try:
|
||||||
return await asyncio.wait_for(self.queue.get(), timeout=0.05)
|
return await asyncio.wait_for(self.queue.get(), timeout=wait_timeout)
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
959
server/tests/test_dag_progress.py
Normal file
959
server/tests/test_dag_progress.py
Normal file
@@ -0,0 +1,959 @@
|
|||||||
|
"""Tests for DAG progress models and transform function.
|
||||||
|
|
||||||
|
Tests the extract_dag_tasks function that converts Hatchet V1WorkflowRunDetails
|
||||||
|
into structured DagTask list for frontend consumption.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from reflector.hatchet.constants import TaskName
|
||||||
|
from reflector.hatchet.dag_progress import (
|
||||||
|
DagStatusData,
|
||||||
|
DagTask,
|
||||||
|
DagTaskStatus,
|
||||||
|
extract_dag_tasks,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_shape_item(
|
||||||
|
step_id: str,
|
||||||
|
task_name: str,
|
||||||
|
children_step_ids: list[str] | None = None,
|
||||||
|
) -> MagicMock:
|
||||||
|
"""Create a mock WorkflowRunShapeItemForWorkflowRunDetails."""
|
||||||
|
item = MagicMock()
|
||||||
|
item.step_id = step_id
|
||||||
|
item.task_name = task_name
|
||||||
|
item.children_step_ids = children_step_ids or []
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_summary(
|
||||||
|
step_id: str,
|
||||||
|
status: str = "QUEUED",
|
||||||
|
started_at: datetime | None = None,
|
||||||
|
finished_at: datetime | None = None,
|
||||||
|
duration: int | None = None,
|
||||||
|
error_message: str | None = None,
|
||||||
|
task_external_id: str | None = None,
|
||||||
|
num_spawned_children: int | None = None,
|
||||||
|
children: list | None = None,
|
||||||
|
) -> MagicMock:
|
||||||
|
"""Create a mock V1TaskSummary."""
|
||||||
|
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||||
|
|
||||||
|
task = MagicMock()
|
||||||
|
task.step_id = step_id
|
||||||
|
task.status = V1TaskStatus(status)
|
||||||
|
task.started_at = started_at
|
||||||
|
task.finished_at = finished_at
|
||||||
|
task.duration = duration
|
||||||
|
task.error_message = error_message
|
||||||
|
task.task_external_id = task_external_id or f"ext-{step_id}"
|
||||||
|
task.num_spawned_children = num_spawned_children
|
||||||
|
task.children = children or []
|
||||||
|
return task
|
||||||
|
|
||||||
|
|
||||||
|
def _make_details(
|
||||||
|
shape: list,
|
||||||
|
tasks: list,
|
||||||
|
run_id: str = "test-run-id",
|
||||||
|
) -> MagicMock:
|
||||||
|
"""Create a mock V1WorkflowRunDetails."""
|
||||||
|
details = MagicMock()
|
||||||
|
details.shape = shape
|
||||||
|
details.tasks = tasks
|
||||||
|
details.task_events = []
|
||||||
|
details.run = MagicMock()
|
||||||
|
details.run.metadata = MagicMock()
|
||||||
|
details.run.metadata.id = run_id
|
||||||
|
return details
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractDagTasksBasic:
|
||||||
|
"""Test basic extraction of DAG tasks from workflow run details."""
|
||||||
|
|
||||||
|
def test_empty_shape_returns_empty_list(self):
|
||||||
|
details = _make_details(shape=[], tasks=[])
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_single_task_queued(self):
|
||||||
|
shape = [_make_shape_item("s1", "get_recording")]
|
||||||
|
tasks = [_make_task_summary("s1", status="QUEUED")]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].name == "get_recording"
|
||||||
|
assert result[0].status == DagTaskStatus.QUEUED
|
||||||
|
assert result[0].parents == []
|
||||||
|
assert result[0].started_at is None
|
||||||
|
assert result[0].finished_at is None
|
||||||
|
assert result[0].duration_seconds is None
|
||||||
|
assert result[0].error is None
|
||||||
|
assert result[0].children_total is None
|
||||||
|
assert result[0].children_completed is None
|
||||||
|
assert result[0].progress_pct is None
|
||||||
|
|
||||||
|
def test_completed_task_with_duration(self):
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
shape = [_make_shape_item("s1", "get_recording")]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary(
|
||||||
|
"s1",
|
||||||
|
status="COMPLETED",
|
||||||
|
started_at=now,
|
||||||
|
finished_at=now,
|
||||||
|
duration=1500, # milliseconds
|
||||||
|
)
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert result[0].status == DagTaskStatus.COMPLETED
|
||||||
|
assert result[0].duration_seconds == 1.5
|
||||||
|
assert result[0].started_at == now
|
||||||
|
assert result[0].finished_at == now
|
||||||
|
|
||||||
|
def test_failed_task_with_error(self):
|
||||||
|
shape = [_make_shape_item("s1", "get_recording")]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary(
|
||||||
|
"s1",
|
||||||
|
status="FAILED",
|
||||||
|
error_message="Traceback (most recent call last):\n File something\nConnectionError: connection refused",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert result[0].status == DagTaskStatus.FAILED
|
||||||
|
assert result[0].error == "ConnectionError: connection refused"
|
||||||
|
|
||||||
|
def test_running_task(self):
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
shape = [_make_shape_item("s1", "mixdown_tracks")]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary(
|
||||||
|
"s1",
|
||||||
|
status="RUNNING",
|
||||||
|
started_at=now,
|
||||||
|
duration=5000,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert result[0].status == DagTaskStatus.RUNNING
|
||||||
|
assert result[0].started_at == now
|
||||||
|
assert result[0].duration_seconds == 5.0
|
||||||
|
|
||||||
|
def test_cancelled_task(self):
|
||||||
|
shape = [_make_shape_item("s1", "post_zulip")]
|
||||||
|
tasks = [_make_task_summary("s1", status="CANCELLED")]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert result[0].status == DagTaskStatus.CANCELLED
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractDagTasksTopology:
|
||||||
|
"""Test topological ordering and parent extraction."""
|
||||||
|
|
||||||
|
def test_linear_chain_parents(self):
|
||||||
|
"""A -> B -> C should produce correct parents."""
|
||||||
|
shape = [
|
||||||
|
_make_shape_item("s1", "get_recording", children_step_ids=["s2"]),
|
||||||
|
_make_shape_item("s2", "get_participants", children_step_ids=["s3"]),
|
||||||
|
_make_shape_item("s3", "process_tracks"),
|
||||||
|
]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary("s1", status="COMPLETED"),
|
||||||
|
_make_task_summary("s2", status="COMPLETED"),
|
||||||
|
_make_task_summary("s3", status="QUEUED"),
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert [t.name for t in result] == [
|
||||||
|
"get_recording",
|
||||||
|
"get_participants",
|
||||||
|
"process_tracks",
|
||||||
|
]
|
||||||
|
assert result[0].parents == []
|
||||||
|
assert result[1].parents == ["get_recording"]
|
||||||
|
assert result[2].parents == ["get_participants"]
|
||||||
|
|
||||||
|
def test_diamond_dag(self):
|
||||||
|
"""
|
||||||
|
A -> B, A -> C, B -> D, C -> D
|
||||||
|
D should have parents [B, C] (or [C, B] depending on sort).
|
||||||
|
"""
|
||||||
|
shape = [
|
||||||
|
_make_shape_item("s1", "get_recording", children_step_ids=["s2", "s3"]),
|
||||||
|
_make_shape_item("s2", "mixdown_tracks", children_step_ids=["s4"]),
|
||||||
|
_make_shape_item("s3", "detect_topics", children_step_ids=["s4"]),
|
||||||
|
_make_shape_item("s4", "finalize"),
|
||||||
|
]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary("s1", status="COMPLETED"),
|
||||||
|
_make_task_summary("s2", status="RUNNING"),
|
||||||
|
_make_task_summary("s3", status="RUNNING"),
|
||||||
|
_make_task_summary("s4", status="QUEUED"),
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
# Topological: s1 first, s2/s3 in some order, s4 last
|
||||||
|
assert result[0].name == "get_recording"
|
||||||
|
assert result[-1].name == "finalize"
|
||||||
|
finalize = result[-1]
|
||||||
|
assert set(finalize.parents) == {"mixdown_tracks", "detect_topics"}
|
||||||
|
|
||||||
|
def test_topological_order_is_stable(self):
|
||||||
|
"""Verify deterministic ordering (sorted queue in Kahn's)."""
|
||||||
|
shape = [
|
||||||
|
_make_shape_item("s_c", "task_c"),
|
||||||
|
_make_shape_item("s_a", "task_a", children_step_ids=["s_c"]),
|
||||||
|
_make_shape_item("s_b", "task_b", children_step_ids=["s_c"]),
|
||||||
|
]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary("s_c", status="QUEUED"),
|
||||||
|
_make_task_summary("s_a", status="COMPLETED"),
|
||||||
|
_make_task_summary("s_b", status="COMPLETED"),
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
# s_a and s_b both roots with in-degree 0; sorted alphabetically by step_id
|
||||||
|
names = [t.name for t in result]
|
||||||
|
assert names[-1] == "task_c"
|
||||||
|
# First two should be task_a, task_b (sorted by step_id: s_a < s_b)
|
||||||
|
assert names[0] == "task_a"
|
||||||
|
assert names[1] == "task_b"
|
||||||
|
|
||||||
|
def test_production_dag_shape(self):
|
||||||
|
"""Test the real 15-task pipeline topology with mixed statuses.
|
||||||
|
|
||||||
|
Simulates a mid-pipeline state where early tasks completed,
|
||||||
|
middle tasks running, and later tasks still queued.
|
||||||
|
"""
|
||||||
|
# Production DAG edges (parent -> children):
|
||||||
|
# get_recording -> get_participants
|
||||||
|
# get_participants -> process_tracks
|
||||||
|
# process_tracks -> mixdown_tracks, detect_topics, finalize
|
||||||
|
# mixdown_tracks -> generate_waveform
|
||||||
|
# detect_topics -> generate_title, extract_subjects
|
||||||
|
# extract_subjects -> process_subjects, identify_action_items
|
||||||
|
# process_subjects -> generate_recap
|
||||||
|
# generate_title -> finalize
|
||||||
|
# generate_recap -> finalize
|
||||||
|
# identify_action_items -> finalize
|
||||||
|
# finalize -> cleanup_consent
|
||||||
|
# cleanup_consent -> post_zulip, send_webhook
|
||||||
|
shape = [
|
||||||
|
_make_shape_item(
|
||||||
|
"s_get_recording", TaskName.GET_RECORDING, ["s_get_participants"]
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_get_participants", TaskName.GET_PARTICIPANTS, ["s_process_tracks"]
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_process_tracks",
|
||||||
|
TaskName.PROCESS_TRACKS,
|
||||||
|
["s_mixdown_tracks", "s_detect_topics", "s_finalize"],
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_mixdown_tracks", TaskName.MIXDOWN_TRACKS, ["s_generate_waveform"]
|
||||||
|
),
|
||||||
|
_make_shape_item("s_generate_waveform", TaskName.GENERATE_WAVEFORM),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_detect_topics",
|
||||||
|
TaskName.DETECT_TOPICS,
|
||||||
|
["s_generate_title", "s_extract_subjects"],
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_generate_title", TaskName.GENERATE_TITLE, ["s_finalize"]
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_extract_subjects",
|
||||||
|
TaskName.EXTRACT_SUBJECTS,
|
||||||
|
["s_process_subjects", "s_identify_action_items"],
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_process_subjects", TaskName.PROCESS_SUBJECTS, ["s_generate_recap"]
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_generate_recap", TaskName.GENERATE_RECAP, ["s_finalize"]
|
||||||
|
),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_identify_action_items",
|
||||||
|
TaskName.IDENTIFY_ACTION_ITEMS,
|
||||||
|
["s_finalize"],
|
||||||
|
),
|
||||||
|
_make_shape_item("s_finalize", TaskName.FINALIZE, ["s_cleanup_consent"]),
|
||||||
|
_make_shape_item(
|
||||||
|
"s_cleanup_consent",
|
||||||
|
TaskName.CLEANUP_CONSENT,
|
||||||
|
["s_post_zulip", "s_send_webhook"],
|
||||||
|
),
|
||||||
|
_make_shape_item("s_post_zulip", TaskName.POST_ZULIP),
|
||||||
|
_make_shape_item("s_send_webhook", TaskName.SEND_WEBHOOK),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Mid-pipeline: early tasks done, middle running, later queued
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary("s_get_recording", status="COMPLETED"),
|
||||||
|
_make_task_summary("s_get_participants", status="COMPLETED"),
|
||||||
|
_make_task_summary("s_process_tracks", status="COMPLETED"),
|
||||||
|
_make_task_summary("s_mixdown_tracks", status="RUNNING"),
|
||||||
|
_make_task_summary("s_generate_waveform", status="QUEUED"),
|
||||||
|
_make_task_summary("s_detect_topics", status="RUNNING"),
|
||||||
|
_make_task_summary("s_generate_title", status="QUEUED"),
|
||||||
|
_make_task_summary("s_extract_subjects", status="QUEUED"),
|
||||||
|
_make_task_summary("s_process_subjects", status="QUEUED"),
|
||||||
|
_make_task_summary("s_generate_recap", status="QUEUED"),
|
||||||
|
_make_task_summary("s_identify_action_items", status="QUEUED"),
|
||||||
|
_make_task_summary("s_finalize", status="QUEUED"),
|
||||||
|
_make_task_summary("s_cleanup_consent", status="QUEUED"),
|
||||||
|
_make_task_summary("s_post_zulip", status="QUEUED"),
|
||||||
|
_make_task_summary("s_send_webhook", status="QUEUED"),
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
# All 15 tasks present
|
||||||
|
assert len(result) == 15
|
||||||
|
result_names = [t.name for t in result]
|
||||||
|
assert set(result_names) == {
|
||||||
|
TaskName.GET_RECORDING,
|
||||||
|
TaskName.GET_PARTICIPANTS,
|
||||||
|
TaskName.PROCESS_TRACKS,
|
||||||
|
TaskName.MIXDOWN_TRACKS,
|
||||||
|
TaskName.GENERATE_WAVEFORM,
|
||||||
|
TaskName.DETECT_TOPICS,
|
||||||
|
TaskName.GENERATE_TITLE,
|
||||||
|
TaskName.EXTRACT_SUBJECTS,
|
||||||
|
TaskName.PROCESS_SUBJECTS,
|
||||||
|
TaskName.GENERATE_RECAP,
|
||||||
|
TaskName.IDENTIFY_ACTION_ITEMS,
|
||||||
|
TaskName.FINALIZE,
|
||||||
|
TaskName.CLEANUP_CONSENT,
|
||||||
|
TaskName.POST_ZULIP,
|
||||||
|
TaskName.SEND_WEBHOOK,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Topological order invariant: no task appears before its parents
|
||||||
|
name_to_index = {t.name: i for i, t in enumerate(result)}
|
||||||
|
for task in result:
|
||||||
|
for parent_name in task.parents:
|
||||||
|
assert name_to_index[parent_name] < name_to_index[task.name], (
|
||||||
|
f"Parent {parent_name} (idx {name_to_index[parent_name]}) "
|
||||||
|
f"must appear before {task.name} (idx {name_to_index[task.name]})"
|
||||||
|
)
|
||||||
|
|
||||||
|
# finalize has exactly 4 parents
|
||||||
|
finalize = next(t for t in result if t.name == TaskName.FINALIZE)
|
||||||
|
assert set(finalize.parents) == {
|
||||||
|
TaskName.PROCESS_TRACKS,
|
||||||
|
TaskName.GENERATE_TITLE,
|
||||||
|
TaskName.GENERATE_RECAP,
|
||||||
|
TaskName.IDENTIFY_ACTION_ITEMS,
|
||||||
|
}
|
||||||
|
|
||||||
|
# cleanup_consent has 1 parent (finalize)
|
||||||
|
cleanup = next(t for t in result if t.name == TaskName.CLEANUP_CONSENT)
|
||||||
|
assert cleanup.parents == [TaskName.FINALIZE]
|
||||||
|
|
||||||
|
# post_zulip and send_webhook both have cleanup_consent as parent
|
||||||
|
post_zulip = next(t for t in result if t.name == TaskName.POST_ZULIP)
|
||||||
|
send_webhook = next(t for t in result if t.name == TaskName.SEND_WEBHOOK)
|
||||||
|
assert post_zulip.parents == [TaskName.CLEANUP_CONSENT]
|
||||||
|
assert send_webhook.parents == [TaskName.CLEANUP_CONSENT]
|
||||||
|
|
||||||
|
# Verify statuses propagated correctly
|
||||||
|
assert (
|
||||||
|
next(t for t in result if t.name == TaskName.GET_RECORDING).status
|
||||||
|
== DagTaskStatus.COMPLETED
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
next(t for t in result if t.name == TaskName.MIXDOWN_TRACKS).status
|
||||||
|
== DagTaskStatus.RUNNING
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
next(t for t in result if t.name == TaskName.FINALIZE).status
|
||||||
|
== DagTaskStatus.QUEUED
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_topological_sort_invariant_complex_dag(self):
|
||||||
|
"""For a complex DAG, every task's parents appear earlier in the list.
|
||||||
|
|
||||||
|
Uses a wider branching/merging DAG than diamond to stress the invariant.
|
||||||
|
"""
|
||||||
|
# DAG: A -> B, A -> C, A -> D, B -> E, C -> E, C -> F, D -> F, E -> G, F -> G
|
||||||
|
shape = [
|
||||||
|
_make_shape_item("s_a", "task_a", ["s_b", "s_c", "s_d"]),
|
||||||
|
_make_shape_item("s_b", "task_b", ["s_e"]),
|
||||||
|
_make_shape_item("s_c", "task_c", ["s_e", "s_f"]),
|
||||||
|
_make_shape_item("s_d", "task_d", ["s_f"]),
|
||||||
|
_make_shape_item("s_e", "task_e", ["s_g"]),
|
||||||
|
_make_shape_item("s_f", "task_f", ["s_g"]),
|
||||||
|
_make_shape_item("s_g", "task_g"),
|
||||||
|
]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary("s_a", status="COMPLETED"),
|
||||||
|
_make_task_summary("s_b", status="COMPLETED"),
|
||||||
|
_make_task_summary("s_c", status="RUNNING"),
|
||||||
|
_make_task_summary("s_d", status="COMPLETED"),
|
||||||
|
_make_task_summary("s_e", status="QUEUED"),
|
||||||
|
_make_task_summary("s_f", status="QUEUED"),
|
||||||
|
_make_task_summary("s_g", status="QUEUED"),
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert len(result) == 7
|
||||||
|
name_to_index = {t.name: i for i, t in enumerate(result)}
|
||||||
|
|
||||||
|
# Verify invariant: every parent appears before its child
|
||||||
|
for task in result:
|
||||||
|
for parent_name in task.parents:
|
||||||
|
assert name_to_index[parent_name] < name_to_index[task.name], (
|
||||||
|
f"Parent {parent_name} (idx {name_to_index[parent_name]}) "
|
||||||
|
f"must appear before {task.name} (idx {name_to_index[task.name]})"
|
||||||
|
)
|
||||||
|
|
||||||
|
# task_g has 2 parents
|
||||||
|
task_g = next(t for t in result if t.name == "task_g")
|
||||||
|
assert set(task_g.parents) == {"task_e", "task_f"}
|
||||||
|
|
||||||
|
# task_e has 2 parents
|
||||||
|
task_e = next(t for t in result if t.name == "task_e")
|
||||||
|
assert set(task_e.parents) == {"task_b", "task_c"}
|
||||||
|
|
||||||
|
# task_a is root (first in topological order)
|
||||||
|
assert result[0].name == "task_a"
|
||||||
|
assert result[0].parents == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractDagTasksFanOut:
|
||||||
|
"""Test fan-out tasks with spawned children."""
|
||||||
|
|
||||||
|
def test_fan_out_children_counts(self):
|
||||||
|
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||||
|
|
||||||
|
child_mocks = []
|
||||||
|
for status in ["COMPLETED", "COMPLETED", "RUNNING", "QUEUED"]:
|
||||||
|
child = MagicMock()
|
||||||
|
child.status = V1TaskStatus(status)
|
||||||
|
child_mocks.append(child)
|
||||||
|
|
||||||
|
shape = [_make_shape_item("s1", "process_tracks")]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary(
|
||||||
|
"s1",
|
||||||
|
status="RUNNING",
|
||||||
|
num_spawned_children=4,
|
||||||
|
children=child_mocks,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert result[0].children_total == 4
|
||||||
|
assert result[0].children_completed == 2
|
||||||
|
|
||||||
|
def test_no_children_when_no_spawn(self):
|
||||||
|
shape = [_make_shape_item("s1", "get_recording")]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary("s1", status="COMPLETED", num_spawned_children=None)
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert result[0].children_total is None
|
||||||
|
assert result[0].children_completed is None
|
||||||
|
|
||||||
|
def test_zero_spawned_children(self):
|
||||||
|
shape = [_make_shape_item("s1", "process_tracks")]
|
||||||
|
tasks = [_make_task_summary("s1", status="COMPLETED", num_spawned_children=0)]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert result[0].children_total is None
|
||||||
|
assert result[0].children_completed is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractDagTasksErrorExtraction:
|
||||||
|
"""Test error message extraction logic."""
|
||||||
|
|
||||||
|
def test_simple_error(self):
|
||||||
|
shape = [_make_shape_item("s1", "mixdown_tracks")]
|
||||||
|
tasks = [
|
||||||
|
_make_task_summary(
|
||||||
|
"s1", status="FAILED", error_message="ValueError: no tracks"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
assert result[0].error == "ValueError: no tracks"
|
||||||
|
|
||||||
|
def test_traceback_extracts_meaningful_line(self):
|
||||||
|
error = (
|
||||||
|
"Traceback (most recent call last):\n"
|
||||||
|
' File "/app/something.py", line 42\n'
|
||||||
|
"RuntimeError: out of memory"
|
||||||
|
)
|
||||||
|
shape = [_make_shape_item("s1", "mixdown_tracks")]
|
||||||
|
tasks = [_make_task_summary("s1", status="FAILED", error_message=error)]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
assert result[0].error == "RuntimeError: out of memory"
|
||||||
|
|
||||||
|
def test_no_error_when_none(self):
|
||||||
|
shape = [_make_shape_item("s1", "get_recording")]
|
||||||
|
tasks = [_make_task_summary("s1", status="COMPLETED", error_message=None)]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
assert result[0].error is None
|
||||||
|
|
||||||
|
def test_empty_error_message(self):
|
||||||
|
shape = [_make_shape_item("s1", "get_recording")]
|
||||||
|
tasks = [_make_task_summary("s1", status="FAILED", error_message="")]
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
assert result[0].error is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractDagTasksMissingData:
|
||||||
|
"""Test edge cases with missing task data."""
|
||||||
|
|
||||||
|
def test_shape_without_matching_task(self):
|
||||||
|
"""Shape has a step but tasks list doesn't contain it."""
|
||||||
|
shape = [_make_shape_item("s1", "get_recording")]
|
||||||
|
tasks = [] # No matching task
|
||||||
|
details = _make_details(shape, tasks)
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].name == "get_recording"
|
||||||
|
assert result[0].status == DagTaskStatus.QUEUED # default when no task data
|
||||||
|
assert result[0].started_at is None
|
||||||
|
|
||||||
|
def test_none_shape_returns_empty(self):
|
||||||
|
details = _make_details(shape=[], tasks=[])
|
||||||
|
details.shape = None
|
||||||
|
|
||||||
|
result = extract_dag_tasks(details)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestDagStatusData:
|
||||||
|
"""Test DagStatusData model serialization."""
|
||||||
|
|
||||||
|
def test_serialization(self):
|
||||||
|
task = DagTask(
|
||||||
|
name="get_recording",
|
||||||
|
status=DagTaskStatus.COMPLETED,
|
||||||
|
started_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
|
||||||
|
finished_at=datetime(2025, 1, 1, 0, 0, 1, tzinfo=timezone.utc),
|
||||||
|
duration_seconds=1.0,
|
||||||
|
parents=[],
|
||||||
|
error=None,
|
||||||
|
children_total=None,
|
||||||
|
children_completed=None,
|
||||||
|
progress_pct=None,
|
||||||
|
)
|
||||||
|
data = DagStatusData(workflow_run_id="test-123", tasks=[task])
|
||||||
|
dumped = data.model_dump(mode="json")
|
||||||
|
|
||||||
|
assert dumped["workflow_run_id"] == "test-123"
|
||||||
|
assert len(dumped["tasks"]) == 1
|
||||||
|
assert dumped["tasks"][0]["name"] == "get_recording"
|
||||||
|
assert dumped["tasks"][0]["status"] == "completed"
|
||||||
|
assert dumped["tasks"][0]["duration_seconds"] == 1.0
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncContextManager:
|
||||||
|
"""No-op async context manager for mocking fresh_db_connection."""
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def __aexit__(self, *args):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class TestBroadcastDagStatus:
|
||||||
|
"""Test broadcast_dag_status function.
|
||||||
|
|
||||||
|
broadcast_dag_status uses deferred imports inside its function body.
|
||||||
|
We mock the source modules/objects before calling the function.
|
||||||
|
Importing daily_multitrack_pipeline triggers a cascade
|
||||||
|
(subject_processing -> HatchetClientManager.get_client at module level),
|
||||||
|
so we set _instance before the import to prevent real SDK init.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _setup_hatchet_mock(self):
|
||||||
|
"""Set HatchetClientManager._instance to a mock to prevent real SDK init.
|
||||||
|
|
||||||
|
Module-level code in workflow files calls get_client() during import.
|
||||||
|
Setting _instance before import avoids ClientConfig validation.
|
||||||
|
"""
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
|
||||||
|
original = HatchetClientManager._instance
|
||||||
|
HatchetClientManager._instance = MagicMock()
|
||||||
|
yield
|
||||||
|
HatchetClientManager._instance = original
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_broadcasts_dag_status(self):
|
||||||
|
"""broadcast_dag_status fetches run, transforms, and broadcasts."""
|
||||||
|
mock_transcript = MagicMock()
|
||||||
|
mock_transcript.id = "t-123"
|
||||||
|
|
||||||
|
mock_details = _make_details(
|
||||||
|
shape=[_make_shape_item("s1", "get_recording")],
|
||||||
|
tasks=[_make_task_summary("s1", status="COMPLETED")],
|
||||||
|
run_id="wf-abc",
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.runs.aio_get = AsyncMock(return_value=mock_details)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.client.HatchetClientManager.get_client",
|
||||||
|
return_value=mock_client,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.broadcast.append_event_and_broadcast",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_broadcast,
|
||||||
|
patch(
|
||||||
|
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=mock_transcript,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
|
||||||
|
return_value=AsyncContextManager(),
|
||||||
|
),
|
||||||
|
):
|
||||||
|
from reflector.hatchet.dag_progress import broadcast_dag_status
|
||||||
|
|
||||||
|
await broadcast_dag_status("t-123", "wf-abc")
|
||||||
|
|
||||||
|
mock_client.runs.aio_get.assert_called_once_with("wf-abc")
|
||||||
|
mock_broadcast.assert_called_once()
|
||||||
|
call_args = mock_broadcast.call_args
|
||||||
|
assert call_args[0][0] == "t-123" # transcript_id
|
||||||
|
assert call_args[0][1] is mock_transcript # transcript
|
||||||
|
assert call_args[0][2] == "DAG_STATUS" # event_name
|
||||||
|
data = call_args[0][3]
|
||||||
|
assert isinstance(data, DagStatusData)
|
||||||
|
assert data.workflow_run_id == "wf-abc"
|
||||||
|
assert len(data.tasks) == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_swallows_exceptions(self):
|
||||||
|
"""broadcast_dag_status never raises even when internals fail."""
|
||||||
|
from reflector.hatchet.dag_progress import broadcast_dag_status
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
|
||||||
|
side_effect=RuntimeError("db exploded"),
|
||||||
|
):
|
||||||
|
# Should not raise
|
||||||
|
await broadcast_dag_status("t-123", "wf-abc")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_broadcast_when_transcript_not_found(self):
|
||||||
|
"""broadcast_dag_status does not broadcast if transcript is None."""
|
||||||
|
mock_details = _make_details(
|
||||||
|
shape=[_make_shape_item("s1", "get_recording")],
|
||||||
|
tasks=[_make_task_summary("s1", status="COMPLETED")],
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.runs.aio_get = AsyncMock(return_value=mock_details)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.client.HatchetClientManager.get_client",
|
||||||
|
return_value=mock_client,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
|
||||||
|
return_value=AsyncContextManager(),
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=None,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.broadcast.append_event_and_broadcast",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_broadcast,
|
||||||
|
):
|
||||||
|
from reflector.hatchet.dag_progress import broadcast_dag_status
|
||||||
|
|
||||||
|
await broadcast_dag_status("t-123", "wf-abc")
|
||||||
|
|
||||||
|
mock_broadcast.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
class TestMakeAudioProgressLoggerWithBroadcast:
|
||||||
|
"""Test make_audio_progress_logger with transcript_id for transient broadcasts."""
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _setup_hatchet_mock(self):
|
||||||
|
"""Set HatchetClientManager._instance to prevent real SDK init on import."""
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
|
||||||
|
original = HatchetClientManager._instance
|
||||||
|
if original is None:
|
||||||
|
HatchetClientManager._instance = MagicMock()
|
||||||
|
yield
|
||||||
|
HatchetClientManager._instance = original
|
||||||
|
|
||||||
|
def test_broadcasts_transient_progress_event(self):
|
||||||
|
"""When transcript_id provided and progress_pct not None, broadcasts event."""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
make_audio_progress_logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.log = MagicMock()
|
||||||
|
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
|
||||||
|
mock_broadcast = AsyncMock()
|
||||||
|
tasks_created = []
|
||||||
|
|
||||||
|
original_create_task = loop.create_task
|
||||||
|
|
||||||
|
def capture_create_task(coro):
|
||||||
|
task = original_create_task(coro)
|
||||||
|
tasks_created.append(task)
|
||||||
|
return task
|
||||||
|
|
||||||
|
try:
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.broadcast.broadcast_event",
|
||||||
|
mock_broadcast,
|
||||||
|
),
|
||||||
|
patch.object(loop, "create_task", side_effect=capture_create_task),
|
||||||
|
):
|
||||||
|
callback = make_audio_progress_logger(
|
||||||
|
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id="t-123"
|
||||||
|
)
|
||||||
|
callback(50.0, 100.0)
|
||||||
|
|
||||||
|
# Run pending tasks
|
||||||
|
if tasks_created:
|
||||||
|
loop.run_until_complete(asyncio.gather(*tasks_created))
|
||||||
|
|
||||||
|
mock_broadcast.assert_called_once()
|
||||||
|
event_arg = mock_broadcast.call_args[0][1]
|
||||||
|
assert event_arg.event == "DAG_TASK_PROGRESS"
|
||||||
|
assert event_arg.data["task_name"] == TaskName.MIXDOWN_TRACKS
|
||||||
|
assert event_arg.data["progress_pct"] == 50.0
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
def test_no_broadcast_without_transcript_id(self):
|
||||||
|
"""When transcript_id is None, no broadcast happens."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
make_audio_progress_logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.broadcast.broadcast_event",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_broadcast:
|
||||||
|
callback = make_audio_progress_logger(
|
||||||
|
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id=None
|
||||||
|
)
|
||||||
|
callback(50.0, 100.0)
|
||||||
|
mock_broadcast.assert_not_called()
|
||||||
|
|
||||||
|
def test_no_broadcast_when_progress_pct_is_none(self):
|
||||||
|
"""When progress_pct is None, no broadcast happens even with transcript_id."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
make_audio_progress_logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.broadcast.broadcast_event",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_broadcast:
|
||||||
|
callback = make_audio_progress_logger(
|
||||||
|
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id="t-123"
|
||||||
|
)
|
||||||
|
callback(None, 100.0)
|
||||||
|
mock_broadcast.assert_not_called()
|
||||||
|
|
||||||
|
def test_logging_throttled_by_interval(self):
|
||||||
|
"""With interval=5.0, rapid calls only log once until interval elapses.
|
||||||
|
|
||||||
|
The throttle applies to ctx.log() calls. Broadcasts (fire-and-forget)
|
||||||
|
are not throttled — they occur every call when transcript_id + progress_pct set.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import time as time_mod
|
||||||
|
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
make_audio_progress_logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.log = MagicMock()
|
||||||
|
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
|
||||||
|
mock_broadcast = AsyncMock()
|
||||||
|
tasks_created = []
|
||||||
|
original_create_task = loop.create_task
|
||||||
|
|
||||||
|
def capture_create_task(coro):
|
||||||
|
task = original_create_task(coro)
|
||||||
|
tasks_created.append(task)
|
||||||
|
return task
|
||||||
|
|
||||||
|
# Controlled monotonic values for the 4 calls from make_audio_progress_logger:
|
||||||
|
# init (start_time, last_log_time), call1 (now), call2 (now), call3 (now)
|
||||||
|
# After those, fall back to real time.monotonic() for asyncio internals.
|
||||||
|
controlled_values = [100.0, 100.0, 101.0, 106.0]
|
||||||
|
call_index = [0]
|
||||||
|
real_monotonic = time_mod.monotonic
|
||||||
|
|
||||||
|
def mock_monotonic():
|
||||||
|
if call_index[0] < len(controlled_values):
|
||||||
|
val = controlled_values[call_index[0]]
|
||||||
|
call_index[0] += 1
|
||||||
|
return val
|
||||||
|
return real_monotonic()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.time.monotonic",
|
||||||
|
side_effect=mock_monotonic,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.broadcast.broadcast_event",
|
||||||
|
mock_broadcast,
|
||||||
|
),
|
||||||
|
patch.object(loop, "create_task", side_effect=capture_create_task),
|
||||||
|
):
|
||||||
|
callback = make_audio_progress_logger(
|
||||||
|
ctx, TaskName.MIXDOWN_TRACKS, interval=5.0, transcript_id="t-123"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Call 1 at t=100.0: 100.0 - 100.0 = 0.0 < 5.0 => no log
|
||||||
|
callback(25.0, 50.0)
|
||||||
|
assert ctx.log.call_count == 0
|
||||||
|
|
||||||
|
# Call 2 at t=101.0: 101.0 - 100.0 = 1.0 < 5.0 => no log
|
||||||
|
callback(50.0, 100.0)
|
||||||
|
assert ctx.log.call_count == 0
|
||||||
|
|
||||||
|
# Call 3 at t=106.0: 106.0 - 100.0 = 6.0 >= 5.0 => logs
|
||||||
|
callback(75.0, 150.0)
|
||||||
|
assert ctx.log.call_count == 1
|
||||||
|
|
||||||
|
# Run pending broadcast tasks
|
||||||
|
if tasks_created:
|
||||||
|
loop.run_until_complete(asyncio.gather(*tasks_created))
|
||||||
|
|
||||||
|
# Broadcasts happen on every call (not throttled) — 3 calls total
|
||||||
|
assert mock_broadcast.call_count == 3
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
def test_uses_broadcast_event_not_append_event_and_broadcast(self):
|
||||||
|
"""Progress events use broadcast_event (transient), not append_event_and_broadcast (persisted)."""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
make_audio_progress_logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.log = MagicMock()
|
||||||
|
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
|
||||||
|
mock_broadcast_event = AsyncMock()
|
||||||
|
mock_append = AsyncMock()
|
||||||
|
tasks_created = []
|
||||||
|
original_create_task = loop.create_task
|
||||||
|
|
||||||
|
def capture_create_task(coro):
|
||||||
|
task = original_create_task(coro)
|
||||||
|
tasks_created.append(task)
|
||||||
|
return task
|
||||||
|
|
||||||
|
try:
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.broadcast.broadcast_event",
|
||||||
|
mock_broadcast_event,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.broadcast.append_event_and_broadcast",
|
||||||
|
mock_append,
|
||||||
|
),
|
||||||
|
patch.object(loop, "create_task", side_effect=capture_create_task),
|
||||||
|
):
|
||||||
|
callback = make_audio_progress_logger(
|
||||||
|
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id="t-123"
|
||||||
|
)
|
||||||
|
callback(50.0, 100.0)
|
||||||
|
|
||||||
|
if tasks_created:
|
||||||
|
loop.run_until_complete(asyncio.gather(*tasks_created))
|
||||||
|
|
||||||
|
# broadcast_event (transient) IS called
|
||||||
|
mock_broadcast_event.assert_called_once()
|
||||||
|
# append_event_and_broadcast (persisted) is NOT called
|
||||||
|
mock_append.assert_not_called()
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
181
server/tests/test_dag_progress_decorator.py
Normal file
181
server/tests/test_dag_progress_decorator.py
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
"""Tests for with_error_handling decorator integration with broadcast_dag_status.
|
||||||
|
|
||||||
|
The decorator wraps each pipeline task and calls broadcast_dag_status on both
|
||||||
|
success and failure paths. These tests verify that integration rather than
|
||||||
|
testing broadcast_dag_status in isolation (which test_dag_progress.py covers).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from reflector.hatchet.constants import TaskName
|
||||||
|
|
||||||
|
|
||||||
|
class TestWithErrorHandlingBroadcast:
|
||||||
|
"""Test with_error_handling decorator's integration with broadcast_dag_status."""
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _setup_hatchet_mock(self):
|
||||||
|
"""Set HatchetClientManager._instance to a mock to prevent real SDK init.
|
||||||
|
|
||||||
|
Module-level code in workflow files calls get_client() during import.
|
||||||
|
Setting _instance before import avoids ClientConfig validation.
|
||||||
|
"""
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
|
||||||
|
original = HatchetClientManager._instance
|
||||||
|
HatchetClientManager._instance = MagicMock()
|
||||||
|
yield
|
||||||
|
HatchetClientManager._instance = original
|
||||||
|
|
||||||
|
def _make_input(self, transcript_id: str = "t-123") -> MagicMock:
|
||||||
|
"""Create a mock PipelineInput with transcript_id."""
|
||||||
|
inp = MagicMock()
|
||||||
|
inp.transcript_id = transcript_id
|
||||||
|
return inp
|
||||||
|
|
||||||
|
def _make_ctx(self, workflow_run_id: str = "wf-abc") -> MagicMock:
|
||||||
|
"""Create a mock Context with workflow_run_id."""
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.workflow_run_id = workflow_run_id
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_calls_broadcast_on_success(self):
|
||||||
|
"""Decorator calls broadcast_dag_status once when task succeeds."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
inner = AsyncMock(return_value="ok")
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.dag_progress.broadcast_dag_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_broadcast:
|
||||||
|
result = await wrapped(self._make_input(), self._make_ctx())
|
||||||
|
|
||||||
|
assert result == "ok"
|
||||||
|
mock_broadcast.assert_called_once_with("t-123", "wf-abc")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_calls_broadcast_on_failure(self):
|
||||||
|
"""Decorator calls broadcast_dag_status once when task raises."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
inner = AsyncMock(side_effect=RuntimeError("boom"))
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.dag_progress.broadcast_dag_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_broadcast,
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
with pytest.raises(RuntimeError, match="boom"):
|
||||||
|
await wrapped(self._make_input(), self._make_ctx())
|
||||||
|
|
||||||
|
mock_broadcast.assert_called_once_with("t-123", "wf-abc")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_swallows_broadcast_exception_on_success(self):
|
||||||
|
"""Broadcast failure does not crash the task on the success path."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
inner = AsyncMock(return_value="ok")
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.dag_progress.broadcast_dag_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
side_effect=RuntimeError("broadcast exploded"),
|
||||||
|
):
|
||||||
|
result = await wrapped(self._make_input(), self._make_ctx())
|
||||||
|
|
||||||
|
assert result == "ok"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_swallows_broadcast_exception_on_failure(self):
|
||||||
|
"""Original task exception propagates even when broadcast also fails."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
inner = AsyncMock(side_effect=ValueError("original error"))
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.dag_progress.broadcast_dag_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
side_effect=RuntimeError("broadcast exploded"),
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
with pytest.raises(ValueError, match="original error"):
|
||||||
|
await wrapped(self._make_input(), self._make_ctx())
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_calls_set_workflow_error_status_on_failure(self):
|
||||||
|
"""On task failure with set_error_status=True (default), calls set_workflow_error_status."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
inner = AsyncMock(side_effect=RuntimeError("boom"))
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.dag_progress.broadcast_dag_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_set_error,
|
||||||
|
):
|
||||||
|
with pytest.raises(RuntimeError, match="boom"):
|
||||||
|
await wrapped(self._make_input(), self._make_ctx())
|
||||||
|
|
||||||
|
mock_set_error.assert_called_once_with("t-123")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_set_workflow_error_status_when_disabled(self):
|
||||||
|
"""With set_error_status=False, set_workflow_error_status is NOT called on failure."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
inner = AsyncMock(side_effect=RuntimeError("boom"))
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING, set_error_status=False)(
|
||||||
|
inner
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.dag_progress.broadcast_dag_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_set_error,
|
||||||
|
):
|
||||||
|
with pytest.raises(RuntimeError, match="boom"):
|
||||||
|
await wrapped(self._make_input(), self._make_ctx())
|
||||||
|
|
||||||
|
mock_set_error.assert_not_called()
|
||||||
421
server/tests/test_dag_progress_rest.py
Normal file
421
server/tests/test_dag_progress_rest.py
Normal file
@@ -0,0 +1,421 @@
|
|||||||
|
"""Tests for DAG status REST enrichment on search and transcript GET endpoints."""
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import reflector.db.search as search_module
|
||||||
|
from reflector.db.search import SearchResult, _fetch_dag_statuses
|
||||||
|
from reflector.db.transcripts import TranscriptEvent
|
||||||
|
|
||||||
|
|
||||||
|
class TestFetchDagStatuses:
|
||||||
|
"""Test the _fetch_dag_statuses helper."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_returns_empty_for_empty_ids(self):
|
||||||
|
result = await _fetch_dag_statuses([])
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_extracts_last_dag_status(self):
|
||||||
|
events = [
|
||||||
|
{"event": "STATUS", "data": {"value": "processing"}},
|
||||||
|
{
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [{"name": "get_recording", "status": "completed"}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [
|
||||||
|
{"name": "get_recording", "status": "completed"},
|
||||||
|
{"name": "process_tracks", "status": "running"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
mock_row = {"id": "t1", "events": events}
|
||||||
|
|
||||||
|
with patch("reflector.db.search.get_database") as mock_db:
|
||||||
|
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
|
||||||
|
result = await _fetch_dag_statuses(["t1"])
|
||||||
|
|
||||||
|
assert "t1" in result
|
||||||
|
assert len(result["t1"]) == 2 # Last DAG_STATUS had 2 tasks
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_skips_transcripts_without_events(self):
|
||||||
|
mock_row = {"id": "t1", "events": None}
|
||||||
|
|
||||||
|
with patch("reflector.db.search.get_database") as mock_db:
|
||||||
|
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
|
||||||
|
result = await _fetch_dag_statuses(["t1"])
|
||||||
|
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_skips_transcripts_without_dag_status(self):
|
||||||
|
events = [
|
||||||
|
{"event": "STATUS", "data": {"value": "processing"}},
|
||||||
|
{"event": "DURATION", "data": {"duration": 1000}},
|
||||||
|
]
|
||||||
|
mock_row = {"id": "t1", "events": events}
|
||||||
|
|
||||||
|
with patch("reflector.db.search.get_database") as mock_db:
|
||||||
|
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
|
||||||
|
result = await _fetch_dag_statuses(["t1"])
|
||||||
|
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_handles_json_string_events(self):
|
||||||
|
"""Events stored as JSON string rather than already-parsed list."""
|
||||||
|
import json
|
||||||
|
|
||||||
|
events = [
|
||||||
|
{
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [{"name": "transcribe", "status": "running"}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
mock_row = {"id": "t1", "events": json.dumps(events)}
|
||||||
|
|
||||||
|
with patch("reflector.db.search.get_database") as mock_db:
|
||||||
|
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
|
||||||
|
result = await _fetch_dag_statuses(["t1"])
|
||||||
|
|
||||||
|
assert "t1" in result
|
||||||
|
assert len(result["t1"]) == 1
|
||||||
|
assert result["t1"][0]["name"] == "transcribe"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_transcripts(self):
|
||||||
|
"""Handles multiple transcripts in one call."""
|
||||||
|
events_t1 = [
|
||||||
|
{
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [{"name": "a", "status": "completed"}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
events_t2 = [
|
||||||
|
{
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {
|
||||||
|
"workflow_run_id": "r2",
|
||||||
|
"tasks": [{"name": "b", "status": "running"}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
mock_rows = [
|
||||||
|
{"id": "t1", "events": events_t1},
|
||||||
|
{"id": "t2", "events": events_t2},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("reflector.db.search.get_database") as mock_db:
|
||||||
|
mock_db.return_value.fetch_all = AsyncMock(return_value=mock_rows)
|
||||||
|
result = await _fetch_dag_statuses(["t1", "t2"])
|
||||||
|
|
||||||
|
assert "t1" in result
|
||||||
|
assert "t2" in result
|
||||||
|
assert result["t1"][0]["name"] == "a"
|
||||||
|
assert result["t2"][0]["name"] == "b"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dag_status_without_tasks_key_skipped(self):
|
||||||
|
"""DAG_STATUS event with no tasks key in data should be skipped."""
|
||||||
|
events = [
|
||||||
|
{"event": "DAG_STATUS", "data": {"workflow_run_id": "r1"}},
|
||||||
|
]
|
||||||
|
mock_row = {"id": "t1", "events": events}
|
||||||
|
|
||||||
|
with patch("reflector.db.search.get_database") as mock_db:
|
||||||
|
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
|
||||||
|
result = await _fetch_dag_statuses(["t1"])
|
||||||
|
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_dag_status_from_transcript(transcript):
|
||||||
|
"""Replicate the dag_status extraction logic from transcript_get view.
|
||||||
|
|
||||||
|
This mirrors the code in reflector/views/transcripts.py lines 495-500:
|
||||||
|
dag_status = None
|
||||||
|
if transcript.status == "processing" and transcript.events:
|
||||||
|
for ev in reversed(transcript.events):
|
||||||
|
if ev.event == "DAG_STATUS":
|
||||||
|
dag_status = ev.data.get("tasks") if isinstance(ev.data, dict) else None
|
||||||
|
break
|
||||||
|
"""
|
||||||
|
dag_status = None
|
||||||
|
if transcript.status == "processing" and transcript.events:
|
||||||
|
for ev in reversed(transcript.events):
|
||||||
|
if ev.event == "DAG_STATUS":
|
||||||
|
dag_status = ev.data.get("tasks") if isinstance(ev.data, dict) else None
|
||||||
|
break
|
||||||
|
return dag_status
|
||||||
|
|
||||||
|
|
||||||
|
class TestTranscriptGetDagStatusExtraction:
|
||||||
|
"""Test dag_status extraction logic from transcript_get endpoint.
|
||||||
|
|
||||||
|
The actual endpoint is complex to set up, so we test the extraction
|
||||||
|
logic directly using the same code pattern from the view.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_processing_transcript_with_dag_status_events(self):
|
||||||
|
"""Processing transcript with DAG_STATUS events returns tasks from last event."""
|
||||||
|
transcript = SimpleNamespace(
|
||||||
|
status="processing",
|
||||||
|
events=[
|
||||||
|
TranscriptEvent(event="STATUS", data={"value": "processing"}),
|
||||||
|
TranscriptEvent(
|
||||||
|
event="DAG_STATUS",
|
||||||
|
data={
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [{"name": "get_recording", "status": "completed"}],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
TranscriptEvent(
|
||||||
|
event="DAG_STATUS",
|
||||||
|
data={
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [
|
||||||
|
{"name": "get_recording", "status": "completed"},
|
||||||
|
{"name": "transcribe", "status": "running"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _extract_dag_status_from_transcript(transcript)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert len(result) == 2
|
||||||
|
assert result[0]["name"] == "get_recording"
|
||||||
|
assert result[1]["name"] == "transcribe"
|
||||||
|
assert result[1]["status"] == "running"
|
||||||
|
|
||||||
|
def test_processing_transcript_without_dag_status_events(self):
|
||||||
|
"""Processing transcript with only non-DAG_STATUS events returns None."""
|
||||||
|
transcript = SimpleNamespace(
|
||||||
|
status="processing",
|
||||||
|
events=[
|
||||||
|
TranscriptEvent(event="STATUS", data={"value": "processing"}),
|
||||||
|
TranscriptEvent(event="DURATION", data={"duration": 1000}),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _extract_dag_status_from_transcript(transcript)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_ended_transcript_with_dag_status_events(self):
|
||||||
|
"""Ended transcript with DAG_STATUS events returns None (status check)."""
|
||||||
|
transcript = SimpleNamespace(
|
||||||
|
status="ended",
|
||||||
|
events=[
|
||||||
|
TranscriptEvent(
|
||||||
|
event="DAG_STATUS",
|
||||||
|
data={
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [{"name": "transcribe", "status": "completed"}],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _extract_dag_status_from_transcript(transcript)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_processing_transcript_with_empty_events(self):
|
||||||
|
"""Processing transcript with empty events list returns None."""
|
||||||
|
transcript = SimpleNamespace(
|
||||||
|
status="processing",
|
||||||
|
events=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _extract_dag_status_from_transcript(transcript)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_processing_transcript_with_none_events(self):
|
||||||
|
"""Processing transcript with None events returns None."""
|
||||||
|
transcript = SimpleNamespace(
|
||||||
|
status="processing",
|
||||||
|
events=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _extract_dag_status_from_transcript(transcript)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_extracts_last_dag_status_not_first(self):
|
||||||
|
"""Should pick the last DAG_STATUS event (most recent), not the first."""
|
||||||
|
transcript = SimpleNamespace(
|
||||||
|
status="processing",
|
||||||
|
events=[
|
||||||
|
TranscriptEvent(
|
||||||
|
event="DAG_STATUS",
|
||||||
|
data={
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [{"name": "a", "status": "running"}],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
TranscriptEvent(event="STATUS", data={"value": "processing"}),
|
||||||
|
TranscriptEvent(
|
||||||
|
event="DAG_STATUS",
|
||||||
|
data={
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [
|
||||||
|
{"name": "a", "status": "completed"},
|
||||||
|
{"name": "b", "status": "running"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = _extract_dag_status_from_transcript(transcript)
|
||||||
|
assert len(result) == 2
|
||||||
|
assert result[0]["status"] == "completed"
|
||||||
|
assert result[1]["name"] == "b"
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchEnrichmentIntegration:
|
||||||
|
"""Test DAG status enrichment in search results.
|
||||||
|
|
||||||
|
The search function enriches processing transcripts with dag_status
|
||||||
|
by calling _fetch_dag_statuses for processing IDs and assigning results.
|
||||||
|
We test this enrichment logic by mocking _fetch_dag_statuses.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _make_search_result(self, id: str, status: str) -> SearchResult:
|
||||||
|
"""Create a minimal SearchResult for testing."""
|
||||||
|
return SearchResult(
|
||||||
|
id=id,
|
||||||
|
title=f"Transcript {id}",
|
||||||
|
user_id="u1",
|
||||||
|
room_id=None,
|
||||||
|
room_name=None,
|
||||||
|
source_kind="live",
|
||||||
|
created_at=datetime(2024, 1, 1, tzinfo=timezone.utc),
|
||||||
|
status=status,
|
||||||
|
rank=1.0,
|
||||||
|
duration=60.0,
|
||||||
|
search_snippets=[],
|
||||||
|
total_match_count=0,
|
||||||
|
dag_status=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_processing_result_gets_dag_status(self):
|
||||||
|
"""SearchResult with status='processing' and matching DAG_STATUS events
|
||||||
|
gets dag_status populated."""
|
||||||
|
results = [self._make_search_result("t1", "processing")]
|
||||||
|
dag_tasks = [
|
||||||
|
{"name": "get_recording", "status": "completed"},
|
||||||
|
{"name": "transcribe", "status": "running"},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
search_module,
|
||||||
|
"_fetch_dag_statuses",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"t1": dag_tasks},
|
||||||
|
) as mock_fetch:
|
||||||
|
# Replicate the enrichment logic from SearchController.search_transcripts
|
||||||
|
processing_ids = [r.id for r in results if r.status == "processing"]
|
||||||
|
if processing_ids:
|
||||||
|
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
|
||||||
|
for r in results:
|
||||||
|
if r.id in dag_statuses:
|
||||||
|
r.dag_status = dag_statuses[r.id]
|
||||||
|
|
||||||
|
mock_fetch.assert_called_once_with(["t1"])
|
||||||
|
|
||||||
|
assert results[0].dag_status == dag_tasks
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_ended_result_does_not_trigger_fetch(self):
|
||||||
|
"""SearchResult with status='ended' does NOT trigger _fetch_dag_statuses."""
|
||||||
|
results = [self._make_search_result("t1", "ended")]
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
search_module,
|
||||||
|
"_fetch_dag_statuses",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={},
|
||||||
|
) as mock_fetch:
|
||||||
|
processing_ids = [r.id for r in results if r.status == "processing"]
|
||||||
|
if processing_ids:
|
||||||
|
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
|
||||||
|
for r in results:
|
||||||
|
if r.id in dag_statuses:
|
||||||
|
r.dag_status = dag_statuses[r.id]
|
||||||
|
|
||||||
|
mock_fetch.assert_not_called()
|
||||||
|
|
||||||
|
assert results[0].dag_status is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_mixed_processing_and_ended_results(self):
|
||||||
|
"""Only processing results get enriched; ended results stay None."""
|
||||||
|
results = [
|
||||||
|
self._make_search_result("t1", "processing"),
|
||||||
|
self._make_search_result("t2", "ended"),
|
||||||
|
self._make_search_result("t3", "processing"),
|
||||||
|
]
|
||||||
|
dag_tasks_t1 = [{"name": "transcribe", "status": "running"}]
|
||||||
|
dag_tasks_t3 = [{"name": "diarize", "status": "completed"}]
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
search_module,
|
||||||
|
"_fetch_dag_statuses",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={"t1": dag_tasks_t1, "t3": dag_tasks_t3},
|
||||||
|
) as mock_fetch:
|
||||||
|
processing_ids = [r.id for r in results if r.status == "processing"]
|
||||||
|
if processing_ids:
|
||||||
|
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
|
||||||
|
for r in results:
|
||||||
|
if r.id in dag_statuses:
|
||||||
|
r.dag_status = dag_statuses[r.id]
|
||||||
|
|
||||||
|
mock_fetch.assert_called_once_with(["t1", "t3"])
|
||||||
|
|
||||||
|
assert results[0].dag_status == dag_tasks_t1
|
||||||
|
assert results[1].dag_status is None
|
||||||
|
assert results[2].dag_status == dag_tasks_t3
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_processing_result_without_dag_events_stays_none(self):
|
||||||
|
"""Processing result with no DAG_STATUS events in DB stays dag_status=None."""
|
||||||
|
results = [self._make_search_result("t1", "processing")]
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
search_module,
|
||||||
|
"_fetch_dag_statuses",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value={},
|
||||||
|
) as mock_fetch:
|
||||||
|
processing_ids = [r.id for r in results if r.status == "processing"]
|
||||||
|
if processing_ids:
|
||||||
|
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
|
||||||
|
for r in results:
|
||||||
|
if r.id in dag_statuses:
|
||||||
|
r.dag_status = dag_statuses[r.id]
|
||||||
|
|
||||||
|
mock_fetch.assert_called_once_with(["t1"])
|
||||||
|
|
||||||
|
assert results[0].dag_status is None
|
||||||
147
server/tests/test_dailyco_instance_id.py
Normal file
147
server/tests/test_dailyco_instance_id.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
"""
|
||||||
|
Tests for Daily.co instanceId generation.
|
||||||
|
|
||||||
|
Verifies deterministic behavior and frontend/backend consistency.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from reflector.dailyco_api.instance_id import (
|
||||||
|
RAW_TRACKS_NAMESPACE,
|
||||||
|
generate_cloud_instance_id,
|
||||||
|
generate_raw_tracks_instance_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestInstanceIdDeterminism:
|
||||||
|
"""Test deterministic generation of instanceIds."""
|
||||||
|
|
||||||
|
def test_cloud_instance_id_is_meeting_id(self):
|
||||||
|
"""Cloud instanceId is meeting ID directly (implicitly tests determinism)."""
|
||||||
|
meeting_id = "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
result1 = generate_cloud_instance_id(meeting_id)
|
||||||
|
result2 = generate_cloud_instance_id(meeting_id)
|
||||||
|
assert str(result1) == meeting_id
|
||||||
|
assert result1 == result2
|
||||||
|
|
||||||
|
def test_raw_tracks_instance_id_deterministic(self):
|
||||||
|
"""Raw-tracks instanceId generation is deterministic."""
|
||||||
|
meeting_id = "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
result1 = generate_raw_tracks_instance_id(meeting_id)
|
||||||
|
result2 = generate_raw_tracks_instance_id(meeting_id)
|
||||||
|
assert result1 == result2
|
||||||
|
|
||||||
|
def test_raw_tracks_different_from_cloud(self):
|
||||||
|
"""Raw-tracks instanceId differs from cloud instanceId."""
|
||||||
|
meeting_id = "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
cloud_id = generate_cloud_instance_id(meeting_id)
|
||||||
|
raw_tracks_id = generate_raw_tracks_instance_id(meeting_id)
|
||||||
|
assert cloud_id != raw_tracks_id
|
||||||
|
|
||||||
|
def test_different_meetings_different_instance_ids(self):
|
||||||
|
"""Different meetings generate different instanceIds."""
|
||||||
|
meeting_id1 = "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
meeting_id2 = "6ba7b810-9dad-11d1-80b4-00c04fd430c8"
|
||||||
|
|
||||||
|
cloud1 = generate_cloud_instance_id(meeting_id1)
|
||||||
|
cloud2 = generate_cloud_instance_id(meeting_id2)
|
||||||
|
assert cloud1 != cloud2
|
||||||
|
|
||||||
|
raw1 = generate_raw_tracks_instance_id(meeting_id1)
|
||||||
|
raw2 = generate_raw_tracks_instance_id(meeting_id2)
|
||||||
|
assert raw1 != raw2
|
||||||
|
|
||||||
|
|
||||||
|
class TestFrontendBackendConsistency:
|
||||||
|
"""Test that backend matches frontend logic."""
|
||||||
|
|
||||||
|
def test_namespace_matches_frontend(self):
|
||||||
|
"""Namespace UUID matches frontend RAW_TRACKS_NAMESPACE constant."""
|
||||||
|
# From www/app/[roomName]/components/DailyRoom.tsx
|
||||||
|
frontend_namespace = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
|
||||||
|
assert str(RAW_TRACKS_NAMESPACE) == frontend_namespace
|
||||||
|
|
||||||
|
def test_raw_tracks_generation_matches_frontend_logic(self):
|
||||||
|
"""Backend UUIDv5 generation matches frontend uuidv5() call."""
|
||||||
|
# Example meeting ID
|
||||||
|
meeting_id = "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
|
||||||
|
# Backend result
|
||||||
|
backend_result = generate_raw_tracks_instance_id(meeting_id)
|
||||||
|
|
||||||
|
# Expected result from frontend: uuidv5(meeting.id, RAW_TRACKS_NAMESPACE)
|
||||||
|
# Python uuid5 uses (namespace, name) argument order
|
||||||
|
# JavaScript uuid.v5(name, namespace) - same args, different order
|
||||||
|
# Frontend: uuidv5(meeting.id, "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
|
||||||
|
# Backend: uuid5(UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890"), meeting.id)
|
||||||
|
|
||||||
|
# Verify it's a valid UUID (will raise if not)
|
||||||
|
assert len(str(backend_result)) == 36
|
||||||
|
assert backend_result.version == 5
|
||||||
|
|
||||||
|
|
||||||
|
class TestEdgeCases:
|
||||||
|
"""Test edge cases and error conditions."""
|
||||||
|
|
||||||
|
def test_invalid_uuid_format_raises(self):
|
||||||
|
"""Invalid UUID format raises ValueError."""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
generate_cloud_instance_id("not-a-uuid")
|
||||||
|
|
||||||
|
def test_lowercase_uuid_normalized_for_cloud(self):
|
||||||
|
"""Cloud instanceId: lowercase/uppercase UUIDs produce same result."""
|
||||||
|
meeting_id_lower = "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
meeting_id_upper = "550E8400-E29B-41D4-A716-446655440000"
|
||||||
|
|
||||||
|
cloud_lower = generate_cloud_instance_id(meeting_id_lower)
|
||||||
|
cloud_upper = generate_cloud_instance_id(meeting_id_upper)
|
||||||
|
assert cloud_lower == cloud_upper
|
||||||
|
|
||||||
|
def test_uuid5_is_case_sensitive_warning(self):
|
||||||
|
"""
|
||||||
|
Documents uuid5 case sensitivity - different case UUIDs produce different hashes.
|
||||||
|
|
||||||
|
Not a problem: meeting.id always lowercase from DB and API.
|
||||||
|
Frontend generates raw-tracks instanceId from lowercase meeting.id.
|
||||||
|
Backend receives lowercase meeting_id when matching.
|
||||||
|
|
||||||
|
This test documents the behavior, not a requirement.
|
||||||
|
"""
|
||||||
|
meeting_id_lower = "550e8400-e29b-41d4-a716-446655440000"
|
||||||
|
meeting_id_upper = "550E8400-E29B-41D4-A716-446655440000"
|
||||||
|
|
||||||
|
raw_lower = generate_raw_tracks_instance_id(meeting_id_lower)
|
||||||
|
raw_upper = generate_raw_tracks_instance_id(meeting_id_upper)
|
||||||
|
assert raw_lower != raw_upper
|
||||||
|
|
||||||
|
|
||||||
|
class TestMtgSessionIdVsInstanceId:
|
||||||
|
"""
|
||||||
|
Documents that Daily.co's mtgSessionId differs from our instanceId.
|
||||||
|
|
||||||
|
Why this matters: We investigated using mtgSessionId for matching but discovered
|
||||||
|
it's Daily.co-generated and unrelated to instanceId we send. This test documents
|
||||||
|
that finding so we don't investigate it again.
|
||||||
|
|
||||||
|
Production data from 2026-01-13:
|
||||||
|
- Meeting ID: 4ad503b6-8189-4910-a8f7-68cdd1b7f990
|
||||||
|
- Cloud instanceId: 4ad503b6-8189-4910-a8f7-68cdd1b7f990 (same as meeting ID)
|
||||||
|
- Raw-tracks instanceId: 784b3af3-c7dd-57f0-ac54-2ee91c6927cb (UUIDv5 derived)
|
||||||
|
- Recording mtgSessionId: f25a2e09-740f-4932-9c0d-b1bebaa669c6 (different!)
|
||||||
|
|
||||||
|
Conclusion: Cannot use mtgSessionId for recording-to-meeting matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_mtg_session_id_differs_from_our_instance_ids(self):
|
||||||
|
"""mtgSessionId (Daily.co) != instanceId (ours) for both cloud and raw-tracks."""
|
||||||
|
meeting_id = "4ad503b6-8189-4910-a8f7-68cdd1b7f990"
|
||||||
|
expected_raw_tracks_id = "784b3af3-c7dd-57f0-ac54-2ee91c6927cb"
|
||||||
|
mtg_session_id = "f25a2e09-740f-4932-9c0d-b1bebaa669c6"
|
||||||
|
|
||||||
|
cloud_instance_id = generate_cloud_instance_id(meeting_id)
|
||||||
|
raw_tracks_instance_id = generate_raw_tracks_instance_id(meeting_id)
|
||||||
|
|
||||||
|
assert str(cloud_instance_id) == meeting_id
|
||||||
|
assert str(raw_tracks_instance_id) == expected_raw_tracks_id
|
||||||
|
assert str(cloud_instance_id) != mtg_session_id
|
||||||
|
assert str(raw_tracks_instance_id) != mtg_session_id
|
||||||
374
server/tests/test_time_based_meeting_matching.py
Normal file
374
server/tests/test_time_based_meeting_matching.py
Normal file
@@ -0,0 +1,374 @@
|
|||||||
|
"""
|
||||||
|
Integration tests for time-based meeting-to-recording matching.
|
||||||
|
|
||||||
|
Tests the critical path for matching Daily.co recordings to meetings when
|
||||||
|
API doesn't return instanceId.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from reflector.db.meetings import meetings_controller
|
||||||
|
from reflector.db.rooms import rooms_controller
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def test_room():
|
||||||
|
"""Create a test room for meetings."""
|
||||||
|
room = await rooms_controller.add(
|
||||||
|
name="test-room-time",
|
||||||
|
user_id="test-user-id",
|
||||||
|
zulip_auto_post=False,
|
||||||
|
zulip_stream="",
|
||||||
|
zulip_topic="",
|
||||||
|
is_locked=False,
|
||||||
|
room_mode="normal",
|
||||||
|
recording_type="cloud",
|
||||||
|
recording_trigger="automatic",
|
||||||
|
is_shared=False,
|
||||||
|
platform="daily",
|
||||||
|
)
|
||||||
|
return room
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def base_time():
|
||||||
|
"""Fixed timestamp for deterministic tests."""
|
||||||
|
return datetime(2026, 1, 14, 9, 0, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTimeBasedMatching:
|
||||||
|
"""Test get_by_room_name_and_time() matching logic."""
|
||||||
|
|
||||||
|
async def test_exact_time_match(self, test_room, base_time):
|
||||||
|
"""Recording timestamp exactly matches meeting start_date."""
|
||||||
|
meeting = await meetings_controller.create(
|
||||||
|
id="meeting-exact",
|
||||||
|
room_name="daily-test-20260114090000",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-20260114090000",
|
||||||
|
recording_start=base_time,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == meeting.id
|
||||||
|
|
||||||
|
async def test_recording_slightly_after_meeting_start(self, test_room, base_time):
|
||||||
|
"""Recording started 1 minute after meeting (participants joined late)."""
|
||||||
|
meeting = await meetings_controller.create(
|
||||||
|
id="meeting-late",
|
||||||
|
room_name="daily-test-20260114090100",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
recording_start = base_time + timedelta(minutes=1)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-20260114090100",
|
||||||
|
recording_start=recording_start,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == meeting.id
|
||||||
|
|
||||||
|
async def test_duplicate_room_names_picks_closest(self, test_room, base_time):
|
||||||
|
"""
|
||||||
|
Two meetings with same room_name (duplicate/race condition).
|
||||||
|
Should pick closest by timestamp.
|
||||||
|
"""
|
||||||
|
meeting1 = await meetings_controller.create(
|
||||||
|
id="meeting-1-first",
|
||||||
|
room_name="daily-duplicate-room",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
meeting2 = await meetings_controller.create(
|
||||||
|
id="meeting-2-second",
|
||||||
|
room_name="daily-duplicate-room", # Same room_name!
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time + timedelta(seconds=0.99), # 0.99s later
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Recording started 0.5s after meeting1
|
||||||
|
# Distance: meeting1 = 0.5s, meeting2 = 0.49s → meeting2 is closer
|
||||||
|
recording_start = base_time + timedelta(seconds=0.5)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-duplicate-room",
|
||||||
|
recording_start=recording_start,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == meeting2.id # meeting2 is closer (0.49s vs 0.5s)
|
||||||
|
|
||||||
|
async def test_outside_time_window_returns_none(self, test_room, base_time):
|
||||||
|
"""Recording outside 1-week window returns None."""
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-old",
|
||||||
|
room_name="daily-test-old",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Recording 8 days later (outside 7-day window)
|
||||||
|
recording_start = base_time + timedelta(days=8)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-old",
|
||||||
|
recording_start=recording_start,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
async def test_tie_breaker_deterministic(self, test_room, base_time):
|
||||||
|
"""When time delta identical, tie-breaker by meeting.id is deterministic."""
|
||||||
|
meeting_z = await meetings_controller.create(
|
||||||
|
id="zzz-last-uuid",
|
||||||
|
room_name="daily-test-tie",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
meeting_a = await meetings_controller.create(
|
||||||
|
id="aaa-first-uuid",
|
||||||
|
room_name="daily-test-tie",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time, # Exact same start_date
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-tie",
|
||||||
|
recording_start=base_time,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
# Tie-breaker: lexicographically first UUID
|
||||||
|
assert result.id == "aaa-first-uuid"
|
||||||
|
|
||||||
|
async def test_timezone_naive_datetime_raises(self, test_room, base_time):
|
||||||
|
"""Timezone-naive datetime raises ValueError."""
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-tz",
|
||||||
|
room_name="daily-test-tz",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Naive datetime (no timezone)
|
||||||
|
naive_dt = datetime(2026, 1, 14, 9, 0, 0)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="timezone-aware"):
|
||||||
|
await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-tz",
|
||||||
|
recording_start=naive_dt,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def test_one_week_boundary_after_included(self, test_room, base_time):
|
||||||
|
"""Meeting 1-week AFTER recording is included (window_end boundary)."""
|
||||||
|
meeting_time = base_time + timedelta(hours=168)
|
||||||
|
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-boundary-after",
|
||||||
|
room_name="daily-test-boundary-after",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=meeting_time,
|
||||||
|
end_date=meeting_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-boundary-after",
|
||||||
|
recording_start=base_time,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == "meeting-boundary-after"
|
||||||
|
|
||||||
|
async def test_one_week_boundary_before_included(self, test_room, base_time):
|
||||||
|
"""Meeting 1-week BEFORE recording is included (window_start boundary)."""
|
||||||
|
meeting_time = base_time - timedelta(hours=168)
|
||||||
|
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-boundary-before",
|
||||||
|
room_name="daily-test-boundary-before",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=meeting_time,
|
||||||
|
end_date=meeting_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-boundary-before",
|
||||||
|
recording_start=base_time,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == "meeting-boundary-before"
|
||||||
|
|
||||||
|
async def test_recording_before_meeting_start(self, test_room, base_time):
|
||||||
|
"""Recording started before meeting (clock skew or early join)."""
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-early",
|
||||||
|
room_name="daily-test-early",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
recording_start = base_time - timedelta(minutes=2)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-early",
|
||||||
|
recording_start=recording_start,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == "meeting-early"
|
||||||
|
|
||||||
|
async def test_mixed_inside_outside_window(self, test_room, base_time):
|
||||||
|
"""Multiple meetings, only one inside window - returns the inside one."""
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-old",
|
||||||
|
room_name="daily-test-mixed",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time - timedelta(days=10),
|
||||||
|
end_date=base_time - timedelta(days=10, hours=-1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-inside",
|
||||||
|
room_name="daily-test-mixed",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time - timedelta(days=2),
|
||||||
|
end_date=base_time - timedelta(days=2, hours=-1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
await meetings_controller.create(
|
||||||
|
id="meeting-future",
|
||||||
|
room_name="daily-test-mixed",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time + timedelta(days=10),
|
||||||
|
end_date=base_time + timedelta(days=10, hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await meetings_controller.get_by_room_name_and_time(
|
||||||
|
room_name="daily-test-mixed",
|
||||||
|
recording_start=base_time,
|
||||||
|
time_window_hours=168,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.id == "meeting-inside"
|
||||||
|
|
||||||
|
|
||||||
|
class TestAtomicCloudRecordingUpdate:
|
||||||
|
"""Test atomic update prevents race conditions."""
|
||||||
|
|
||||||
|
async def test_first_update_succeeds(self, test_room, base_time):
|
||||||
|
"""First call to set_cloud_recording_if_missing succeeds."""
|
||||||
|
meeting = await meetings_controller.create(
|
||||||
|
id="meeting-atomic-1",
|
||||||
|
room_name="daily-test-atomic",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
success = await meetings_controller.set_cloud_recording_if_missing(
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
s3_key="first-s3-key",
|
||||||
|
duration=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert success is True
|
||||||
|
|
||||||
|
updated = await meetings_controller.get_by_id(meeting.id)
|
||||||
|
assert updated.daily_composed_video_s3_key == "first-s3-key"
|
||||||
|
assert updated.daily_composed_video_duration == 100
|
||||||
|
|
||||||
|
async def test_second_update_fails_atomically(self, test_room, base_time):
|
||||||
|
"""Second call to update same meeting doesn't overwrite (atomic check)."""
|
||||||
|
meeting = await meetings_controller.create(
|
||||||
|
id="meeting-atomic-2",
|
||||||
|
room_name="daily-test-atomic2",
|
||||||
|
room_url="https://example.daily.co/test",
|
||||||
|
host_room_url="https://example.daily.co/test?t=host",
|
||||||
|
start_date=base_time,
|
||||||
|
end_date=base_time + timedelta(hours=1),
|
||||||
|
room=test_room,
|
||||||
|
)
|
||||||
|
|
||||||
|
success1 = await meetings_controller.set_cloud_recording_if_missing(
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
s3_key="first-s3-key",
|
||||||
|
duration=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert success1 is True
|
||||||
|
|
||||||
|
after_first = await meetings_controller.get_by_id(meeting.id)
|
||||||
|
assert after_first.daily_composed_video_s3_key == "first-s3-key"
|
||||||
|
|
||||||
|
success2 = await meetings_controller.set_cloud_recording_if_missing(
|
||||||
|
meeting_id=meeting.id,
|
||||||
|
s3_key="bucket/path/should-not-overwrite",
|
||||||
|
duration=200,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert success2 is False
|
||||||
|
|
||||||
|
final = await meetings_controller.get_by_id(meeting.id)
|
||||||
|
assert final.daily_composed_video_s3_key == "first-s3-key"
|
||||||
|
assert final.daily_composed_video_duration == 100
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
from unittest.mock import patch
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from httpx import ASGITransport, AsyncClient
|
from httpx import ASGITransport, AsyncClient
|
||||||
@@ -142,17 +142,17 @@ async def test_whereby_recording_uses_file_pipeline(client):
|
|||||||
"reflector.services.transcript_process.task_pipeline_file_process"
|
"reflector.services.transcript_process.task_pipeline_file_process"
|
||||||
) as mock_file_pipeline,
|
) as mock_file_pipeline,
|
||||||
patch(
|
patch(
|
||||||
"reflector.services.transcript_process.task_pipeline_multitrack_process"
|
"reflector.services.transcript_process.HatchetClientManager"
|
||||||
) as mock_multitrack_pipeline,
|
) as mock_hatchet,
|
||||||
):
|
):
|
||||||
response = await client.post(f"/transcripts/{transcript.id}/process")
|
response = await client.post(f"/transcripts/{transcript.id}/process")
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json()["status"] == "ok"
|
assert response.json()["status"] == "ok"
|
||||||
|
|
||||||
# Whereby recordings should use file pipeline
|
# Whereby recordings should use file pipeline, not Hatchet
|
||||||
mock_file_pipeline.delay.assert_called_once_with(transcript_id=transcript.id)
|
mock_file_pipeline.delay.assert_called_once_with(transcript_id=transcript.id)
|
||||||
mock_multitrack_pipeline.delay.assert_not_called()
|
mock_hatchet.start_workflow.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("setup_database")
|
@pytest.mark.usefixtures("setup_database")
|
||||||
@@ -177,8 +177,6 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
|||||||
recording_trigger="automatic-2nd-participant",
|
recording_trigger="automatic-2nd-participant",
|
||||||
is_shared=False,
|
is_shared=False,
|
||||||
)
|
)
|
||||||
# Force Celery backend for test
|
|
||||||
await rooms_controller.update(room, {"use_celery": True})
|
|
||||||
|
|
||||||
transcript = await transcripts_controller.add(
|
transcript = await transcripts_controller.add(
|
||||||
"",
|
"",
|
||||||
@@ -213,18 +211,23 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
|||||||
"reflector.services.transcript_process.task_pipeline_file_process"
|
"reflector.services.transcript_process.task_pipeline_file_process"
|
||||||
) as mock_file_pipeline,
|
) as mock_file_pipeline,
|
||||||
patch(
|
patch(
|
||||||
"reflector.services.transcript_process.task_pipeline_multitrack_process"
|
"reflector.services.transcript_process.HatchetClientManager"
|
||||||
) as mock_multitrack_pipeline,
|
) as mock_hatchet,
|
||||||
):
|
):
|
||||||
|
mock_hatchet.start_workflow = AsyncMock(return_value="test-workflow-id")
|
||||||
|
|
||||||
response = await client.post(f"/transcripts/{transcript.id}/process")
|
response = await client.post(f"/transcripts/{transcript.id}/process")
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json()["status"] == "ok"
|
assert response.json()["status"] == "ok"
|
||||||
|
|
||||||
# Daily.co multitrack recordings should use multitrack pipeline
|
# Daily.co multitrack recordings should use Hatchet workflow
|
||||||
mock_multitrack_pipeline.delay.assert_called_once_with(
|
mock_hatchet.start_workflow.assert_called_once()
|
||||||
transcript_id=transcript.id,
|
call_kwargs = mock_hatchet.start_workflow.call_args.kwargs
|
||||||
bucket_name="daily-bucket",
|
assert call_kwargs["workflow_name"] == "DiarizationPipeline"
|
||||||
track_keys=track_keys,
|
assert call_kwargs["input_data"]["transcript_id"] == transcript.id
|
||||||
)
|
assert call_kwargs["input_data"]["bucket_name"] == "daily-bucket"
|
||||||
|
assert call_kwargs["input_data"]["tracks"] == [
|
||||||
|
{"s3_key": k} for k in track_keys
|
||||||
|
]
|
||||||
mock_file_pipeline.delay.assert_not_called()
|
mock_file_pipeline.delay.assert_not_called()
|
||||||
|
|||||||
@@ -115,9 +115,7 @@ def appserver(tmpdir, setup_database, celery_session_app, celery_session_worker)
|
|||||||
settings.DATA_DIR = DATA_DIR
|
settings.DATA_DIR = DATA_DIR
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
# Using celery_includes from conftest.py which includes both pipelines
|
||||||
def celery_includes():
|
|
||||||
return ["reflector.pipelines.main_live_pipeline"]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures("setup_database")
|
@pytest.mark.usefixtures("setup_database")
|
||||||
|
|||||||
@@ -56,7 +56,12 @@ def appserver_ws_user(setup_database):
|
|||||||
|
|
||||||
if server_instance:
|
if server_instance:
|
||||||
server_instance.should_exit = True
|
server_instance.should_exit = True
|
||||||
server_thread.join(timeout=30)
|
server_thread.join(timeout=2.0)
|
||||||
|
|
||||||
|
# Reset global singleton for test isolation
|
||||||
|
from reflector.ws_manager import reset_ws_manager
|
||||||
|
|
||||||
|
reset_ws_manager()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
@@ -133,6 +138,8 @@ async def test_user_ws_accepts_valid_token_and_receives_events(appserver_ws_user
|
|||||||
|
|
||||||
# Connect and then trigger an event via HTTP create
|
# Connect and then trigger an event via HTTP create
|
||||||
async with aconnect_ws(base_ws, subprotocols=subprotocols) as ws:
|
async with aconnect_ws(base_ws, subprotocols=subprotocols) as ws:
|
||||||
|
await asyncio.sleep(0.2)
|
||||||
|
|
||||||
# Emit an event to the user's room via a standard HTTP action
|
# Emit an event to the user's room via a standard HTTP action
|
||||||
from httpx import AsyncClient
|
from httpx import AsyncClient
|
||||||
|
|
||||||
@@ -150,6 +157,7 @@ async def test_user_ws_accepts_valid_token_and_receives_events(appserver_ws_user
|
|||||||
"email": "user-abc@example.com",
|
"email": "user-abc@example.com",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Use in-memory client (global singleton makes it share ws_manager)
|
||||||
async with AsyncClient(app=app, base_url=f"http://{host}:{port}/v1") as ac:
|
async with AsyncClient(app=app, base_url=f"http://{host}:{port}/v1") as ac:
|
||||||
# Create a transcript as this user so that the server publishes TRANSCRIPT_CREATED to user room
|
# Create a transcript as this user so that the server publishes TRANSCRIPT_CREATED to user room
|
||||||
resp = await ac.post("/transcripts", json={"name": "WS Test"})
|
resp = await ac.post("/transcripts", json={"name": "WS Test"})
|
||||||
|
|||||||
331
server/tests/test_ws_dag_broadcast.py
Normal file
331
server/tests/test_ws_dag_broadcast.py
Normal file
@@ -0,0 +1,331 @@
|
|||||||
|
"""WebSocket broadcast delivery tests for STATUS and DAG_STATUS events.
|
||||||
|
|
||||||
|
Tests the full chain identified in DEBUG.md:
|
||||||
|
broadcast_event() → ws_manager.send_json() → Redis/in-memory pub/sub
|
||||||
|
→ _pubsub_data_reader() → socket.send_json() → WebSocket client
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
1. STATUS event delivery to transcript room WS
|
||||||
|
2. DAG_STATUS event delivery to transcript room WS
|
||||||
|
3. Full broadcast_event() chain (requires broadcast.py patching)
|
||||||
|
4. _pubsub_data_reader resilience when a client disconnects
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from httpx import AsyncClient
|
||||||
|
from httpx_ws import aconnect_ws
|
||||||
|
from uvicorn import Config, Server
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def appserver_ws_broadcast(setup_database, monkeypatch):
|
||||||
|
"""Start real uvicorn server for WebSocket broadcast tests.
|
||||||
|
|
||||||
|
Also patches broadcast.py's get_ws_manager (missing from conftest autouse fixture).
|
||||||
|
"""
|
||||||
|
# Patch broadcast.py's get_ws_manager — conftest.py misses this module.
|
||||||
|
# Without this, broadcast_event() creates a real Redis ws_manager.
|
||||||
|
import reflector.ws_manager as ws_mod
|
||||||
|
from reflector.app import app
|
||||||
|
from reflector.db import get_database
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"reflector.hatchet.broadcast.get_ws_manager", ws_mod.get_ws_manager
|
||||||
|
)
|
||||||
|
|
||||||
|
host = "127.0.0.1"
|
||||||
|
port = 1259
|
||||||
|
server_started = threading.Event()
|
||||||
|
server_exception = None
|
||||||
|
server_instance = None
|
||||||
|
|
||||||
|
def run_server():
|
||||||
|
nonlocal server_exception, server_instance
|
||||||
|
try:
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
config = Config(app=app, host=host, port=port, loop=loop)
|
||||||
|
server_instance = Server(config)
|
||||||
|
|
||||||
|
async def start_server():
|
||||||
|
database = get_database()
|
||||||
|
await database.connect()
|
||||||
|
try:
|
||||||
|
await server_instance.serve()
|
||||||
|
finally:
|
||||||
|
await database.disconnect()
|
||||||
|
|
||||||
|
server_started.set()
|
||||||
|
loop.run_until_complete(start_server())
|
||||||
|
except Exception as e:
|
||||||
|
server_exception = e
|
||||||
|
server_started.set()
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
server_thread = threading.Thread(target=run_server, daemon=True)
|
||||||
|
server_thread.start()
|
||||||
|
|
||||||
|
server_started.wait(timeout=30)
|
||||||
|
if server_exception:
|
||||||
|
raise server_exception
|
||||||
|
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
yield host, port
|
||||||
|
|
||||||
|
if server_instance:
|
||||||
|
server_instance.should_exit = True
|
||||||
|
server_thread.join(timeout=2.0)
|
||||||
|
|
||||||
|
from reflector.ws_manager import reset_ws_manager
|
||||||
|
|
||||||
|
reset_ws_manager()
|
||||||
|
|
||||||
|
|
||||||
|
async def _create_transcript(host: str, port: int, name: str) -> str:
|
||||||
|
"""Create a transcript via ASGI transport and return its ID."""
|
||||||
|
from reflector.app import app
|
||||||
|
|
||||||
|
async with AsyncClient(app=app, base_url=f"http://{host}:{port}/v1") as ac:
|
||||||
|
resp = await ac.post("/transcripts", json={"name": name})
|
||||||
|
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||||
|
return resp.json()["id"]
|
||||||
|
|
||||||
|
|
||||||
|
async def _drain_historical_events(ws, timeout: float = 0.5) -> list[dict]:
|
||||||
|
"""Read all historical events sent on WS connect (non-blocking drain)."""
|
||||||
|
events = []
|
||||||
|
deadline = asyncio.get_event_loop().time() + timeout
|
||||||
|
while asyncio.get_event_loop().time() < deadline:
|
||||||
|
try:
|
||||||
|
msg = await asyncio.wait_for(ws.receive_json(), timeout=0.1)
|
||||||
|
events.append(msg)
|
||||||
|
except (asyncio.TimeoutError, Exception):
|
||||||
|
break
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 1: STATUS event delivery via ws_manager.send_json
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_transcript_ws_receives_status_via_send_json(appserver_ws_broadcast):
|
||||||
|
"""STATUS event published via ws_manager.send_json() arrives at transcript room WS."""
|
||||||
|
host, port = appserver_ws_broadcast
|
||||||
|
transcript_id = await _create_transcript(host, port, "Status send_json test")
|
||||||
|
|
||||||
|
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
|
||||||
|
async with aconnect_ws(ws_url) as ws:
|
||||||
|
await _drain_historical_events(ws)
|
||||||
|
|
||||||
|
import reflector.ws_manager as ws_mod
|
||||||
|
|
||||||
|
ws_manager = ws_mod.get_ws_manager()
|
||||||
|
await ws_manager.send_json(
|
||||||
|
room_id=f"ts:{transcript_id}",
|
||||||
|
message={"event": "STATUS", "data": {"value": "processing"}},
|
||||||
|
)
|
||||||
|
|
||||||
|
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
|
||||||
|
assert msg["event"] == "STATUS"
|
||||||
|
assert msg["data"]["value"] == "processing"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 2: DAG_STATUS event delivery via ws_manager.send_json
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_transcript_ws_receives_dag_status_via_send_json(appserver_ws_broadcast):
|
||||||
|
"""DAG_STATUS event published via ws_manager.send_json() arrives at transcript room WS."""
|
||||||
|
host, port = appserver_ws_broadcast
|
||||||
|
transcript_id = await _create_transcript(host, port, "DAG_STATUS send_json test")
|
||||||
|
|
||||||
|
dag_payload = {
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {
|
||||||
|
"workflow_run_id": "test-run-123",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"name": "get_recording",
|
||||||
|
"status": "completed",
|
||||||
|
"started_at": "2025-01-01T00:00:00Z",
|
||||||
|
"finished_at": "2025-01-01T00:00:05Z",
|
||||||
|
"duration_seconds": 5.0,
|
||||||
|
"parents": [],
|
||||||
|
"error": None,
|
||||||
|
"children_total": None,
|
||||||
|
"children_completed": None,
|
||||||
|
"progress_pct": None,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "process_tracks",
|
||||||
|
"status": "running",
|
||||||
|
"started_at": "2025-01-01T00:00:05Z",
|
||||||
|
"finished_at": None,
|
||||||
|
"duration_seconds": None,
|
||||||
|
"parents": ["get_recording"],
|
||||||
|
"error": None,
|
||||||
|
"children_total": 3,
|
||||||
|
"children_completed": 1,
|
||||||
|
"progress_pct": 33.3,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
|
||||||
|
async with aconnect_ws(ws_url) as ws:
|
||||||
|
await _drain_historical_events(ws)
|
||||||
|
|
||||||
|
import reflector.ws_manager as ws_mod
|
||||||
|
|
||||||
|
ws_manager = ws_mod.get_ws_manager()
|
||||||
|
await ws_manager.send_json(
|
||||||
|
room_id=f"ts:{transcript_id}",
|
||||||
|
message=dag_payload,
|
||||||
|
)
|
||||||
|
|
||||||
|
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
|
||||||
|
assert msg["event"] == "DAG_STATUS"
|
||||||
|
assert msg["data"]["workflow_run_id"] == "test-run-123"
|
||||||
|
assert len(msg["data"]["tasks"]) == 2
|
||||||
|
assert msg["data"]["tasks"][0]["name"] == "get_recording"
|
||||||
|
assert msg["data"]["tasks"][0]["status"] == "completed"
|
||||||
|
assert msg["data"]["tasks"][1]["name"] == "process_tracks"
|
||||||
|
assert msg["data"]["tasks"][1]["children_completed"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 3: Full broadcast_event() chain for STATUS
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_broadcast_event_delivers_status_to_transcript_ws(appserver_ws_broadcast):
|
||||||
|
"""broadcast_event() end-to-end: STATUS event reaches transcript room WS."""
|
||||||
|
host, port = appserver_ws_broadcast
|
||||||
|
transcript_id = await _create_transcript(host, port, "broadcast_event STATUS test")
|
||||||
|
|
||||||
|
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
|
||||||
|
async with aconnect_ws(ws_url) as ws:
|
||||||
|
await _drain_historical_events(ws)
|
||||||
|
|
||||||
|
from reflector.db.transcripts import TranscriptEvent
|
||||||
|
from reflector.hatchet.broadcast import broadcast_event
|
||||||
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
log = logger.bind(transcript_id=transcript_id)
|
||||||
|
event = TranscriptEvent(event="STATUS", data={"value": "processing"})
|
||||||
|
await broadcast_event(transcript_id, event, logger=log)
|
||||||
|
|
||||||
|
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
|
||||||
|
assert msg["event"] == "STATUS"
|
||||||
|
assert msg["data"]["value"] == "processing"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 4: Full broadcast_event() chain for DAG_STATUS
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_broadcast_event_delivers_dag_status_to_transcript_ws(
|
||||||
|
appserver_ws_broadcast,
|
||||||
|
):
|
||||||
|
"""broadcast_event() end-to-end: DAG_STATUS event reaches transcript room WS."""
|
||||||
|
host, port = appserver_ws_broadcast
|
||||||
|
transcript_id = await _create_transcript(host, port, "broadcast_event DAG test")
|
||||||
|
|
||||||
|
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
|
||||||
|
async with aconnect_ws(ws_url) as ws:
|
||||||
|
await _drain_historical_events(ws)
|
||||||
|
|
||||||
|
from reflector.db.transcripts import TranscriptEvent
|
||||||
|
from reflector.hatchet.broadcast import broadcast_event
|
||||||
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
log = logger.bind(transcript_id=transcript_id)
|
||||||
|
event = TranscriptEvent(
|
||||||
|
event="DAG_STATUS",
|
||||||
|
data={
|
||||||
|
"workflow_run_id": "test-run-456",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"name": "get_recording",
|
||||||
|
"status": "running",
|
||||||
|
"started_at": None,
|
||||||
|
"finished_at": None,
|
||||||
|
"duration_seconds": None,
|
||||||
|
"parents": [],
|
||||||
|
"error": None,
|
||||||
|
"children_total": None,
|
||||||
|
"children_completed": None,
|
||||||
|
"progress_pct": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await broadcast_event(transcript_id, event, logger=log)
|
||||||
|
|
||||||
|
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
|
||||||
|
assert msg["event"] == "DAG_STATUS"
|
||||||
|
assert msg["data"]["tasks"][0]["name"] == "get_recording"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 5: Multiple rapid events arrive in order
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_events_arrive_in_order(appserver_ws_broadcast):
|
||||||
|
"""Multiple STATUS then DAG_STATUS events arrive in correct order."""
|
||||||
|
host, port = appserver_ws_broadcast
|
||||||
|
transcript_id = await _create_transcript(host, port, "ordering test")
|
||||||
|
|
||||||
|
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
|
||||||
|
async with aconnect_ws(ws_url) as ws:
|
||||||
|
await _drain_historical_events(ws)
|
||||||
|
|
||||||
|
import reflector.ws_manager as ws_mod
|
||||||
|
|
||||||
|
ws_manager = ws_mod.get_ws_manager()
|
||||||
|
|
||||||
|
await ws_manager.send_json(
|
||||||
|
room_id=f"ts:{transcript_id}",
|
||||||
|
message={"event": "STATUS", "data": {"value": "processing"}},
|
||||||
|
)
|
||||||
|
await ws_manager.send_json(
|
||||||
|
room_id=f"ts:{transcript_id}",
|
||||||
|
message={
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {"workflow_run_id": "r1", "tasks": []},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await ws_manager.send_json(
|
||||||
|
room_id=f"ts:{transcript_id}",
|
||||||
|
message={
|
||||||
|
"event": "DAG_STATUS",
|
||||||
|
"data": {
|
||||||
|
"workflow_run_id": "r1",
|
||||||
|
"tasks": [{"name": "a", "status": "running"}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await ws_manager.send_json(
|
||||||
|
room_id=f"ts:{transcript_id}",
|
||||||
|
message={"event": "STATUS", "data": {"value": "ended"}},
|
||||||
|
)
|
||||||
|
|
||||||
|
msgs = []
|
||||||
|
for _ in range(4):
|
||||||
|
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
|
||||||
|
msgs.append(msg)
|
||||||
|
|
||||||
|
assert msgs[0]["event"] == "STATUS"
|
||||||
|
assert msgs[0]["data"]["value"] == "processing"
|
||||||
|
assert msgs[1]["event"] == "DAG_STATUS"
|
||||||
|
assert msgs[1]["data"]["tasks"] == []
|
||||||
|
assert msgs[2]["event"] == "DAG_STATUS"
|
||||||
|
assert len(msgs[2]["data"]["tasks"]) == 1
|
||||||
|
assert msgs[3]["event"] == "STATUS"
|
||||||
|
assert msgs[3]["data"]["value"] == "ended"
|
||||||
45
server/uv.lock
generated
45
server/uv.lock
generated
@@ -159,21 +159,20 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiortc"
|
name = "aiortc"
|
||||||
version = "1.13.0"
|
version = "1.14.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aioice" },
|
{ name = "aioice" },
|
||||||
{ name = "av" },
|
{ name = "av" },
|
||||||
{ name = "cffi" },
|
|
||||||
{ name = "cryptography" },
|
{ name = "cryptography" },
|
||||||
{ name = "google-crc32c" },
|
{ name = "google-crc32c" },
|
||||||
{ name = "pyee" },
|
{ name = "pyee" },
|
||||||
{ name = "pylibsrtp" },
|
{ name = "pylibsrtp" },
|
||||||
{ name = "pyopenssl" },
|
{ name = "pyopenssl" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/62/03/bc947d74c548e0c17cf94e5d5bdacaed0ee9e5b2bb7b8b8cf1ac7a7c01ec/aiortc-1.13.0.tar.gz", hash = "sha256:5d209975c22d0910fb5a0f0e2caa828f2da966c53580f7c7170ac3a16a871620", size = 1179894 }
|
sdist = { url = "https://files.pythonhosted.org/packages/51/9c/4e027bfe0195de0442da301e2389329496745d40ae44d2d7c4571c4290ce/aiortc-1.14.0.tar.gz", hash = "sha256:adc8a67ace10a085721e588e06a00358ed8eaf5f6b62f0a95358ff45628dd762", size = 1180864 }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/87/29/765633cab5f1888890f5f172d1d53009b9b14e079cdfa01a62d9896a9ea9/aiortc-1.13.0-py3-none-any.whl", hash = "sha256:9ccccec98796f6a96bd1c3dd437a06da7e0f57521c96bd56e4b965a91b03a0a0", size = 92910 },
|
{ url = "https://files.pythonhosted.org/packages/57/ab/31646a49209568cde3b97eeade0d28bb78b400e6645c56422c101df68932/aiortc-1.14.0-py3-none-any.whl", hash = "sha256:4b244d7e482f4e1f67e685b3468269628eca1ec91fa5b329ab517738cfca086e", size = 93183 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -327,28 +326,24 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "av"
|
name = "av"
|
||||||
version = "14.4.0"
|
version = "16.1.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/86/f6/0b473dab52dfdea05f28f3578b1c56b6c796ce85e76951bab7c4e38d5a74/av-14.4.0.tar.gz", hash = "sha256:3ecbf803a7fdf67229c0edada0830d6bfaea4d10bfb24f0c3f4e607cd1064b42", size = 3892203 }
|
sdist = { url = "https://files.pythonhosted.org/packages/78/cd/3a83ffbc3cc25b39721d174487fb0d51a76582f4a1703f98e46170ce83d4/av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd", size = 4285203 }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/18/8a/d57418b686ffd05fabd5a0a9cfa97e63b38c35d7101af00e87c51c8cc43c/av-14.4.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b21d5586a88b9fce0ab78e26bd1c38f8642f8e2aad5b35e619f4d202217c701", size = 19965048 },
|
{ url = "https://files.pythonhosted.org/packages/48/d0/b71b65d1b36520dcb8291a2307d98b7fc12329a45614a303ff92ada4d723/av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f", size = 26927747 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f5/aa/3f878b0301efe587e9b07bb773dd6b47ef44ca09a3cffb4af50c08a170f3/av-14.4.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:cf8762d90b0f94a20c9f6e25a94f1757db5a256707964dfd0b1d4403e7a16835", size = 23750064 },
|
{ url = "https://files.pythonhosted.org/packages/2f/79/720a5a6ccdee06eafa211b945b0a450e3a0b8fc3d12922f0f3c454d870d2/av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b", size = 21492232 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/9a/b4/6fe94a31f9ed3a927daa72df67c7151968587106f30f9f8fcd792b186633/av-14.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0ac9f08920c7bbe0795319689d901e27cb3d7870b9a0acae3f26fc9daa801a6", size = 33648775 },
|
{ url = "https://files.pythonhosted.org/packages/8e/4f/a1ba8d922f2f6d1a3d52419463ef26dd6c4d43ee364164a71b424b5ae204/av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879", size = 39291737 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/6c/f3/7f3130753521d779450c935aec3f4beefc8d4645471159f27b54e896470c/av-14.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a56d9ad2afdb638ec0404e962dc570960aae7e08ae331ad7ff70fbe99a6cf40e", size = 32216915 },
|
{ url = "https://files.pythonhosted.org/packages/1a/31/fc62b9fe8738d2693e18d99f040b219e26e8df894c10d065f27c6b4f07e3/av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e", size = 40846822 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f8/9a/8ffabfcafb42154b4b3a67d63f9b69e68fa8c34cb39ddd5cb813dd049ed4/av-14.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bed513cbcb3437d0ae47743edc1f5b4a113c0b66cdd4e1aafc533abf5b2fbf2", size = 35287279 },
|
{ url = "https://files.pythonhosted.org/packages/53/10/ab446583dbce730000e8e6beec6ec3c2753e628c7f78f334a35cad0317f4/av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83", size = 40675604 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ad/11/7023ba0a2ca94a57aedf3114ab8cfcecb0819b50c30982a4c5be4d31df41/av-14.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d030c2d3647931e53d51f2f6e0fcf465263e7acf9ec6e4faa8dbfc77975318c3", size = 36294683 },
|
{ url = "https://files.pythonhosted.org/packages/31/d7/1003be685277005f6d63fd9e64904ee222fe1f7a0ea70af313468bb597db/av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63", size = 42015955 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/3d/fa/b8ac9636bd5034e2b899354468bef9f4dadb067420a16d8a493a514b7817/av-14.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1cc21582a4f606271d8c2036ec7a6247df0831050306c55cf8a905701d0f0474", size = 34552391 },
|
{ url = "https://files.pythonhosted.org/packages/2f/4a/fa2a38ee9306bf4579f556f94ecbc757520652eb91294d2a99c7cf7623b9/av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62", size = 31750339 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/fb/29/0db48079c207d1cba7a2783896db5aec3816e17de55942262c244dffbc0f/av-14.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce7c9cd452153d36f1b1478f904ed5f9ab191d76db873bdd3a597193290805d4", size = 37265250 },
|
{ url = "https://files.pythonhosted.org/packages/9c/84/2535f55edcd426cebec02eb37b811b1b0c163f26b8d3f53b059e2ec32665/av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6", size = 26945785 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/1c/55/715858c3feb7efa4d667ce83a829c8e6ee3862e297fb2b568da3f968639d/av-14.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd261e31cc6b43ca722f80656c39934199d8f2eb391e0147e704b6226acebc29", size = 27925845 },
|
{ url = "https://files.pythonhosted.org/packages/b6/17/ffb940c9e490bf42e86db4db1ff426ee1559cd355a69609ec1efe4d3a9eb/av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35", size = 21481147 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a6/75/b8641653780336c90ba89e5352cac0afa6256a86a150c7703c0b38851c6d/av-14.4.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:a53e682b239dd23b4e3bc9568cfb1168fc629ab01925fdb2e7556eb426339e94", size = 19954125 },
|
{ url = "https://files.pythonhosted.org/packages/15/c1/e0d58003d2d83c3921887d5c8c9b8f5f7de9b58dc2194356a2656a45cfdc/av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86", size = 39517197 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/99/e6/37fe6fa5853a48d54d749526365780a63a4bc530be6abf2115e3a21e292a/av-14.4.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5aa0b901751a32703fa938d2155d56ce3faf3630e4a48d238b35d2f7e49e5395", size = 23751479 },
|
{ url = "https://files.pythonhosted.org/packages/32/77/787797b43475d1b90626af76f80bfb0c12cfec5e11eafcfc4151b8c80218/av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2", size = 41174337 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f7/75/9a5f0e6bda5f513b62bafd1cff2b495441a8b07ab7fb7b8e62f0c0d1683f/av-14.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3b316fed3597675fe2aacfed34e25fc9d5bb0196dc8c0b014ae5ed4adda48de", size = 33801401 },
|
{ url = "https://files.pythonhosted.org/packages/8e/ac/d90df7f1e3b97fc5554cf45076df5045f1e0a6adf13899e10121229b826c/av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a", size = 40817720 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/6a/c9/e4df32a2ad1cb7f3a112d0ed610c5e43c89da80b63c60d60e3dc23793ec0/av-14.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a587b5c5014c3c0e16143a0f8d99874e46b5d0c50db6111aa0b54206b5687c81", size = 32364330 },
|
{ url = "https://files.pythonhosted.org/packages/80/6f/13c3a35f9dbcebafd03fe0c4cbd075d71ac8968ec849a3cfce406c35a9d2/av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829", size = 42267396 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ca/f0/64e7444a41817fde49a07d0239c033f7e9280bec4a4bb4784f5c79af95e6/av-14.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d53f75e8ac1ec8877a551c0db32a83c0aaeae719d05285281eaaba211bbc30", size = 35519508 },
|
{ url = "https://files.pythonhosted.org/packages/c8/b9/275df9607f7fb44317ccb1d4be74827185c0d410f52b6e2cd770fe209118/av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd", size = 31752045 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c2/a8/a370099daa9033a3b6f9b9bd815304b3d8396907a14d09845f27467ba138/av-14.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c8558cfde79dd8fc92d97c70e0f0fa8c94c7a66f68ae73afdf58598f0fe5e10d", size = 36448593 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/27/bb/edb6ceff8fa7259cb6330c51dbfbc98dd1912bd6eb5f7bc05a4bb14a9d6e/av-14.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:455b6410dea0ab2d30234ffb28df7d62ca3cdf10708528e247bec3a4cdcced09", size = 34701485 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a7/8a/957da1f581aa1faa9a5dfa8b47ca955edb47f2b76b949950933b457bfa1d/av-14.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1661efbe9d975f927b8512d654704223d936f39016fad2ddab00aee7c40f412c", size = 37521981 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/28/76/3f1cf0568592f100fd68eb40ed8c491ce95ca3c1378cc2d4c1f6d1bd295d/av-14.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:fbbeef1f421a3461086853d6464ad5526b56ffe8ccb0ab3fd0a1f121dfbf26ad", size = 27925944 },
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3267,7 +3262,7 @@ requires-dist = [
|
|||||||
{ name = "aiohttp-cors", specifier = ">=0.7.0" },
|
{ name = "aiohttp-cors", specifier = ">=0.7.0" },
|
||||||
{ name = "aiortc", specifier = ">=1.5.0" },
|
{ name = "aiortc", specifier = ">=1.5.0" },
|
||||||
{ name = "alembic", specifier = ">=1.11.3" },
|
{ name = "alembic", specifier = ">=1.11.3" },
|
||||||
{ name = "av", specifier = ">=10.0.0" },
|
{ name = "av", specifier = ">=15.0.0" },
|
||||||
{ name = "celery", specifier = ">=5.3.4" },
|
{ name = "celery", specifier = ">=5.3.4" },
|
||||||
{ name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
|
{ name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
|
||||||
{ name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
|
{ name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
|
||||||
|
|||||||
61
www/app/(app)/browse/_components/DagProgressDots.tsx
Normal file
61
www/app/(app)/browse/_components/DagProgressDots.tsx
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import React from "react";
|
||||||
|
import { Box, Flex } from "@chakra-ui/react";
|
||||||
|
import type { DagTask } from "../../../lib/UserEventsProvider";
|
||||||
|
|
||||||
|
const pulseKeyframes = `
|
||||||
|
@keyframes dagDotPulse {
|
||||||
|
0%, 100% { opacity: 1; }
|
||||||
|
50% { opacity: 0.3; }
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
function humanizeTaskName(name: string): string {
|
||||||
|
return name
|
||||||
|
.split("_")
|
||||||
|
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
||||||
|
.join(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
function dotProps(status: DagTask["status"]): Record<string, unknown> {
|
||||||
|
switch (status) {
|
||||||
|
case "completed":
|
||||||
|
return { bg: "green.500" };
|
||||||
|
case "running":
|
||||||
|
return {
|
||||||
|
bg: "blue.500",
|
||||||
|
style: { animation: "dagDotPulse 1.5s ease-in-out infinite" },
|
||||||
|
};
|
||||||
|
case "failed":
|
||||||
|
return { bg: "red.500" };
|
||||||
|
case "cancelled":
|
||||||
|
return { bg: "gray.400" };
|
||||||
|
case "queued":
|
||||||
|
default:
|
||||||
|
return {
|
||||||
|
bg: "transparent",
|
||||||
|
border: "1px solid",
|
||||||
|
borderColor: "gray.400",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function DagProgressDots({ tasks }: { tasks: DagTask[] }) {
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<style>{pulseKeyframes}</style>
|
||||||
|
<Flex gap="2px" alignItems="center" flexWrap="wrap">
|
||||||
|
{tasks.map((task) => (
|
||||||
|
<Box
|
||||||
|
key={task.name}
|
||||||
|
w="4px"
|
||||||
|
h="4px"
|
||||||
|
borderRadius="full"
|
||||||
|
flexShrink={0}
|
||||||
|
title={humanizeTaskName(task.name)}
|
||||||
|
{...dotProps(task.status)}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</Flex>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -19,6 +19,7 @@ import {
|
|||||||
generateTextFragment,
|
generateTextFragment,
|
||||||
} from "../../../lib/textHighlight";
|
} from "../../../lib/textHighlight";
|
||||||
import type { components } from "../../../reflector-api";
|
import type { components } from "../../../reflector-api";
|
||||||
|
import type { DagTask } from "../../../lib/UserEventsProvider";
|
||||||
|
|
||||||
type SearchResult = components["schemas"]["SearchResult"];
|
type SearchResult = components["schemas"]["SearchResult"];
|
||||||
type SourceKind = components["schemas"]["SourceKind"];
|
type SourceKind = components["schemas"]["SourceKind"];
|
||||||
@@ -29,6 +30,7 @@ interface TranscriptCardsProps {
|
|||||||
isLoading?: boolean;
|
isLoading?: boolean;
|
||||||
onDelete: (transcriptId: string) => void;
|
onDelete: (transcriptId: string) => void;
|
||||||
onReprocess: (transcriptId: string) => void;
|
onReprocess: (transcriptId: string) => void;
|
||||||
|
dagStatusMap?: Map<string, DagTask[]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
function highlightText(text: string, query: string): React.ReactNode {
|
function highlightText(text: string, query: string): React.ReactNode {
|
||||||
@@ -102,11 +104,13 @@ function TranscriptCard({
|
|||||||
query,
|
query,
|
||||||
onDelete,
|
onDelete,
|
||||||
onReprocess,
|
onReprocess,
|
||||||
|
dagStatusMap,
|
||||||
}: {
|
}: {
|
||||||
result: SearchResult;
|
result: SearchResult;
|
||||||
query: string;
|
query: string;
|
||||||
onDelete: (transcriptId: string) => void;
|
onDelete: (transcriptId: string) => void;
|
||||||
onReprocess: (transcriptId: string) => void;
|
onReprocess: (transcriptId: string) => void;
|
||||||
|
dagStatusMap?: Map<string, DagTask[]>;
|
||||||
}) {
|
}) {
|
||||||
const [isExpanded, setIsExpanded] = useState(false);
|
const [isExpanded, setIsExpanded] = useState(false);
|
||||||
|
|
||||||
@@ -137,7 +141,16 @@ function TranscriptCard({
|
|||||||
<Box borderWidth={1} p={4} borderRadius="md" fontSize="sm">
|
<Box borderWidth={1} p={4} borderRadius="md" fontSize="sm">
|
||||||
<Flex justify="space-between" alignItems="flex-start" gap="2">
|
<Flex justify="space-between" alignItems="flex-start" gap="2">
|
||||||
<Box>
|
<Box>
|
||||||
<TranscriptStatusIcon status={result.status} />
|
<TranscriptStatusIcon
|
||||||
|
status={result.status}
|
||||||
|
dagStatus={
|
||||||
|
dagStatusMap?.get(result.id) ??
|
||||||
|
((result as Record<string, unknown>).dag_status as
|
||||||
|
| DagTask[]
|
||||||
|
| null) ??
|
||||||
|
null
|
||||||
|
}
|
||||||
|
/>
|
||||||
</Box>
|
</Box>
|
||||||
<Box flex="1">
|
<Box flex="1">
|
||||||
{/* Title with highlighting and text fragment for deep linking */}
|
{/* Title with highlighting and text fragment for deep linking */}
|
||||||
@@ -284,6 +297,7 @@ export default function TranscriptCards({
|
|||||||
isLoading,
|
isLoading,
|
||||||
onDelete,
|
onDelete,
|
||||||
onReprocess,
|
onReprocess,
|
||||||
|
dagStatusMap,
|
||||||
}: TranscriptCardsProps) {
|
}: TranscriptCardsProps) {
|
||||||
return (
|
return (
|
||||||
<Box position="relative">
|
<Box position="relative">
|
||||||
@@ -315,6 +329,7 @@ export default function TranscriptCards({
|
|||||||
query={query}
|
query={query}
|
||||||
onDelete={onDelete}
|
onDelete={onDelete}
|
||||||
onReprocess={onReprocess}
|
onReprocess={onReprocess}
|
||||||
|
dagStatusMap={dagStatusMap}
|
||||||
/>
|
/>
|
||||||
))}
|
))}
|
||||||
</Stack>
|
</Stack>
|
||||||
|
|||||||
@@ -8,13 +8,17 @@ import {
|
|||||||
FaGear,
|
FaGear,
|
||||||
} from "react-icons/fa6";
|
} from "react-icons/fa6";
|
||||||
import { TranscriptStatus } from "../../../lib/transcript";
|
import { TranscriptStatus } from "../../../lib/transcript";
|
||||||
|
import type { DagTask } from "../../../lib/UserEventsProvider";
|
||||||
|
import DagProgressDots from "./DagProgressDots";
|
||||||
|
|
||||||
interface TranscriptStatusIconProps {
|
interface TranscriptStatusIconProps {
|
||||||
status: TranscriptStatus;
|
status: TranscriptStatus;
|
||||||
|
dagStatus?: DagTask[] | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function TranscriptStatusIcon({
|
export default function TranscriptStatusIcon({
|
||||||
status,
|
status,
|
||||||
|
dagStatus,
|
||||||
}: TranscriptStatusIconProps) {
|
}: TranscriptStatusIconProps) {
|
||||||
switch (status) {
|
switch (status) {
|
||||||
case "ended":
|
case "ended":
|
||||||
@@ -36,6 +40,9 @@ export default function TranscriptStatusIcon({
|
|||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
case "processing":
|
case "processing":
|
||||||
|
if (dagStatus && dagStatus.length > 0) {
|
||||||
|
return <DagProgressDots tasks={dagStatus} />;
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<Box as="span" title="Processing in progress">
|
<Box as="span" title="Processing in progress">
|
||||||
<Icon color="gray.500" as={FaGear} />
|
<Icon color="gray.500" as={FaGear} />
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ import DeleteTranscriptDialog from "./_components/DeleteTranscriptDialog";
|
|||||||
import { formatLocalDate } from "../../lib/time";
|
import { formatLocalDate } from "../../lib/time";
|
||||||
import { RECORD_A_MEETING_URL } from "../../api/urls";
|
import { RECORD_A_MEETING_URL } from "../../api/urls";
|
||||||
import { useUserName } from "../../lib/useUserName";
|
import { useUserName } from "../../lib/useUserName";
|
||||||
|
import { useDagStatusMap } from "../../lib/UserEventsProvider";
|
||||||
|
|
||||||
const SEARCH_FORM_QUERY_INPUT_NAME = "query" as const;
|
const SEARCH_FORM_QUERY_INPUT_NAME = "query" as const;
|
||||||
|
|
||||||
@@ -273,6 +274,7 @@ export default function TranscriptBrowser() {
|
|||||||
}, [JSON.stringify(searchFilters)]);
|
}, [JSON.stringify(searchFilters)]);
|
||||||
|
|
||||||
const userName = useUserName();
|
const userName = useUserName();
|
||||||
|
const dagStatusMap = useDagStatusMap();
|
||||||
const [deletionLoading, setDeletionLoading] = useState(false);
|
const [deletionLoading, setDeletionLoading] = useState(false);
|
||||||
const cancelRef = React.useRef(null);
|
const cancelRef = React.useRef(null);
|
||||||
const [transcriptToDeleteId, setTranscriptToDeleteId] =
|
const [transcriptToDeleteId, setTranscriptToDeleteId] =
|
||||||
@@ -408,6 +410,7 @@ export default function TranscriptBrowser() {
|
|||||||
isLoading={searchLoading}
|
isLoading={searchLoading}
|
||||||
onDelete={setTranscriptToDeleteId}
|
onDelete={setTranscriptToDeleteId}
|
||||||
onReprocess={handleProcessTranscript}
|
onReprocess={handleProcessTranscript}
|
||||||
|
dagStatusMap={dagStatusMap}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
{!searchLoading && results.length === 0 && (
|
{!searchLoading && results.length === 0 && (
|
||||||
|
|||||||
@@ -302,10 +302,10 @@ export default function RoomsList() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const platform: "whereby" | "daily" | null =
|
const platform: "whereby" | "daily" =
|
||||||
room.platform === "whereby" || room.platform === "daily"
|
room.platform === "whereby" || room.platform === "daily"
|
||||||
? room.platform
|
? room.platform
|
||||||
: null;
|
: "daily";
|
||||||
|
|
||||||
const roomData = {
|
const roomData = {
|
||||||
name: room.name,
|
name: room.name,
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import {
|
|||||||
import { useError } from "../../../../(errors)/errorContext";
|
import { useError } from "../../../../(errors)/errorContext";
|
||||||
import { useRouter } from "next/navigation";
|
import { useRouter } from "next/navigation";
|
||||||
import { Box, Grid } from "@chakra-ui/react";
|
import { Box, Grid } from "@chakra-ui/react";
|
||||||
|
import { parseNonEmptyString } from "../../../../lib/utils";
|
||||||
|
|
||||||
export type TranscriptCorrect = {
|
export type TranscriptCorrect = {
|
||||||
params: Promise<{
|
params: Promise<{
|
||||||
@@ -25,8 +26,7 @@ export type TranscriptCorrect = {
|
|||||||
|
|
||||||
export default function TranscriptCorrect(props: TranscriptCorrect) {
|
export default function TranscriptCorrect(props: TranscriptCorrect) {
|
||||||
const params = use(props.params);
|
const params = use(props.params);
|
||||||
|
const transcriptId = parseNonEmptyString(params.transcriptId);
|
||||||
const { transcriptId } = params;
|
|
||||||
|
|
||||||
const updateTranscriptMutation = useTranscriptUpdate();
|
const updateTranscriptMutation = useTranscriptUpdate();
|
||||||
const transcript = useTranscriptGet(transcriptId);
|
const transcript = useTranscriptGet(transcriptId);
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ import React from "react";
|
|||||||
import Markdown from "react-markdown";
|
import Markdown from "react-markdown";
|
||||||
import "../../../styles/markdown.css";
|
import "../../../styles/markdown.css";
|
||||||
import type { components } from "../../../reflector-api";
|
import type { components } from "../../../reflector-api";
|
||||||
type GetTranscript = components["schemas"]["GetTranscript"];
|
type GetTranscriptWithParticipants =
|
||||||
|
components["schemas"]["GetTranscriptWithParticipants"];
|
||||||
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
||||||
import { useTranscriptUpdate } from "../../../lib/apiHooks";
|
import { useTranscriptUpdate } from "../../../lib/apiHooks";
|
||||||
import {
|
import {
|
||||||
@@ -18,7 +19,7 @@ import { LuPen } from "react-icons/lu";
|
|||||||
import { useError } from "../../../(errors)/errorContext";
|
import { useError } from "../../../(errors)/errorContext";
|
||||||
|
|
||||||
type FinalSummaryProps = {
|
type FinalSummaryProps = {
|
||||||
transcript: GetTranscript;
|
transcript: GetTranscriptWithParticipants;
|
||||||
topics: GetTranscriptTopic[];
|
topics: GetTranscriptTopic[];
|
||||||
onUpdate: (newSummary: string) => void;
|
onUpdate: (newSummary: string) => void;
|
||||||
finalSummaryRef: React.Dispatch<React.SetStateAction<HTMLDivElement | null>>;
|
finalSummaryRef: React.Dispatch<React.SetStateAction<HTMLDivElement | null>>;
|
||||||
|
|||||||
@@ -9,7 +9,9 @@ import React, { useEffect, useState, use } from "react";
|
|||||||
import FinalSummary from "./finalSummary";
|
import FinalSummary from "./finalSummary";
|
||||||
import TranscriptTitle from "../transcriptTitle";
|
import TranscriptTitle from "../transcriptTitle";
|
||||||
import Player from "../player";
|
import Player from "../player";
|
||||||
|
import { useWebSockets } from "../useWebSockets";
|
||||||
import { useRouter } from "next/navigation";
|
import { useRouter } from "next/navigation";
|
||||||
|
import { parseNonEmptyString } from "../../../lib/utils";
|
||||||
import {
|
import {
|
||||||
Box,
|
Box,
|
||||||
Flex,
|
Flex,
|
||||||
@@ -30,7 +32,7 @@ type TranscriptDetails = {
|
|||||||
|
|
||||||
export default function TranscriptDetails(details: TranscriptDetails) {
|
export default function TranscriptDetails(details: TranscriptDetails) {
|
||||||
const params = use(details.params);
|
const params = use(details.params);
|
||||||
const transcriptId = params.transcriptId;
|
const transcriptId = parseNonEmptyString(params.transcriptId);
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const statusToRedirect = [
|
const statusToRedirect = [
|
||||||
"idle",
|
"idle",
|
||||||
@@ -49,6 +51,7 @@ export default function TranscriptDetails(details: TranscriptDetails) {
|
|||||||
transcriptId,
|
transcriptId,
|
||||||
waiting || mp3.audioDeleted === true,
|
waiting || mp3.audioDeleted === true,
|
||||||
);
|
);
|
||||||
|
useWebSockets(transcriptId);
|
||||||
const useActiveTopic = useState<Topic | null>(null);
|
const useActiveTopic = useState<Topic | null>(null);
|
||||||
const [finalSummaryElement, setFinalSummaryElement] =
|
const [finalSummaryElement, setFinalSummaryElement] =
|
||||||
useState<HTMLDivElement | null>(null);
|
useState<HTMLDivElement | null>(null);
|
||||||
|
|||||||
@@ -0,0 +1,190 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useEffect, useState } from "react";
|
||||||
|
import { Table, Box, Icon, Spinner, Text, Badge } from "@chakra-ui/react";
|
||||||
|
import { FaCheck, FaXmark, FaClock, FaMinus } from "react-icons/fa6";
|
||||||
|
import type { DagTask, DagTaskStatus } from "../../useWebSockets";
|
||||||
|
|
||||||
|
function humanizeTaskName(name: string): string {
|
||||||
|
return name
|
||||||
|
.split("_")
|
||||||
|
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
||||||
|
.join(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDuration(seconds: number): string {
|
||||||
|
if (seconds < 60) {
|
||||||
|
return `${Math.round(seconds)}s`;
|
||||||
|
}
|
||||||
|
const minutes = Math.floor(seconds / 60);
|
||||||
|
const remainingSeconds = Math.round(seconds % 60);
|
||||||
|
return `${minutes}m ${remainingSeconds}s`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function StatusIcon({ status }: { status: DagTaskStatus }) {
|
||||||
|
switch (status) {
|
||||||
|
case "completed":
|
||||||
|
return (
|
||||||
|
<Box as="span" title="Completed">
|
||||||
|
<Icon color="green.500" as={FaCheck} />
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
case "running":
|
||||||
|
return <Spinner size="sm" color="blue.500" />;
|
||||||
|
case "failed":
|
||||||
|
return (
|
||||||
|
<Box as="span" title="Failed">
|
||||||
|
<Icon color="red.500" as={FaXmark} />
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
case "queued":
|
||||||
|
return (
|
||||||
|
<Box as="span" title="Queued">
|
||||||
|
<Icon color="gray.400" as={FaClock} />
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
case "cancelled":
|
||||||
|
return (
|
||||||
|
<Box as="span" title="Cancelled">
|
||||||
|
<Icon color="gray.400" as={FaMinus} />
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function ElapsedTimer({ startedAt }: { startedAt: string }) {
|
||||||
|
const [elapsed, setElapsed] = useState<number>(() => {
|
||||||
|
return (Date.now() - new Date(startedAt).getTime()) / 1000;
|
||||||
|
});
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const interval = setInterval(() => {
|
||||||
|
setElapsed((Date.now() - new Date(startedAt).getTime()) / 1000);
|
||||||
|
}, 1000);
|
||||||
|
return () => clearInterval(interval);
|
||||||
|
}, [startedAt]);
|
||||||
|
|
||||||
|
return <Text fontSize="sm">{formatDuration(elapsed)}</Text>;
|
||||||
|
}
|
||||||
|
|
||||||
|
function DurationCell({ task }: { task: DagTask }) {
|
||||||
|
if (task.status === "completed" && task.duration_seconds !== null) {
|
||||||
|
return <Text fontSize="sm">{formatDuration(task.duration_seconds)}</Text>;
|
||||||
|
}
|
||||||
|
if (task.status === "running" && task.started_at) {
|
||||||
|
return <ElapsedTimer startedAt={task.started_at} />;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<Text fontSize="sm" color="gray.400">
|
||||||
|
--
|
||||||
|
</Text>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function ProgressCell({ task }: { task: DagTask }) {
|
||||||
|
if (task.progress_pct === null && task.children_total === null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box>
|
||||||
|
{task.progress_pct !== null && (
|
||||||
|
<Box
|
||||||
|
w="100%"
|
||||||
|
h="6px"
|
||||||
|
bg="gray.200"
|
||||||
|
borderRadius="full"
|
||||||
|
overflow="hidden"
|
||||||
|
>
|
||||||
|
<Box
|
||||||
|
h="100%"
|
||||||
|
w={`${Math.min(100, Math.max(0, task.progress_pct))}%`}
|
||||||
|
bg={task.status === "failed" ? "red.400" : "blue.400"}
|
||||||
|
borderRadius="full"
|
||||||
|
transition="width 0.3s ease"
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
{task.children_total !== null && (
|
||||||
|
<Badge
|
||||||
|
size="sm"
|
||||||
|
colorPalette="gray"
|
||||||
|
mt={task.progress_pct !== null ? 1 : 0}
|
||||||
|
>
|
||||||
|
{task.children_completed ?? 0}/{task.children_total}
|
||||||
|
</Badge>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function TaskRow({ task }: { task: DagTask }) {
|
||||||
|
const [expanded, setExpanded] = useState(false);
|
||||||
|
const hasFailed = task.status === "failed" && task.error;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<Table.Row
|
||||||
|
cursor={hasFailed ? "pointer" : "default"}
|
||||||
|
onClick={hasFailed ? () => setExpanded((prev) => !prev) : undefined}
|
||||||
|
_hover={hasFailed ? { bg: "gray.50" } : undefined}
|
||||||
|
>
|
||||||
|
<Table.Cell>
|
||||||
|
<Text fontSize="sm" fontWeight="medium">
|
||||||
|
{humanizeTaskName(task.name)}
|
||||||
|
</Text>
|
||||||
|
</Table.Cell>
|
||||||
|
<Table.Cell>
|
||||||
|
<StatusIcon status={task.status} />
|
||||||
|
</Table.Cell>
|
||||||
|
<Table.Cell>
|
||||||
|
<DurationCell task={task} />
|
||||||
|
</Table.Cell>
|
||||||
|
<Table.Cell>
|
||||||
|
<ProgressCell task={task} />
|
||||||
|
</Table.Cell>
|
||||||
|
</Table.Row>
|
||||||
|
{hasFailed && expanded && (
|
||||||
|
<Table.Row>
|
||||||
|
<Table.Cell colSpan={4}>
|
||||||
|
<Box bg="red.50" p={3} borderRadius="md">
|
||||||
|
<Text fontSize="xs" color="red.700" whiteSpace="pre-wrap">
|
||||||
|
{task.error}
|
||||||
|
</Text>
|
||||||
|
</Box>
|
||||||
|
</Table.Cell>
|
||||||
|
</Table.Row>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function DagProgressTable({ tasks }: { tasks: DagTask[] }) {
|
||||||
|
return (
|
||||||
|
<Box w="100%" overflowX="auto">
|
||||||
|
<Table.Root size="sm">
|
||||||
|
<Table.Header>
|
||||||
|
<Table.Row>
|
||||||
|
<Table.ColumnHeader fontWeight="600">Task</Table.ColumnHeader>
|
||||||
|
<Table.ColumnHeader fontWeight="600" width="80px">
|
||||||
|
Status
|
||||||
|
</Table.ColumnHeader>
|
||||||
|
<Table.ColumnHeader fontWeight="600" width="100px">
|
||||||
|
Duration
|
||||||
|
</Table.ColumnHeader>
|
||||||
|
<Table.ColumnHeader fontWeight="600" width="140px">
|
||||||
|
Progress
|
||||||
|
</Table.ColumnHeader>
|
||||||
|
</Table.Row>
|
||||||
|
</Table.Header>
|
||||||
|
<Table.Body>
|
||||||
|
{tasks.map((task) => (
|
||||||
|
<TaskRow key={task.name} task={task} />
|
||||||
|
))}
|
||||||
|
</Table.Body>
|
||||||
|
</Table.Root>
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -10,6 +10,11 @@ import {
|
|||||||
} from "@chakra-ui/react";
|
} from "@chakra-ui/react";
|
||||||
import { useRouter } from "next/navigation";
|
import { useRouter } from "next/navigation";
|
||||||
import { useTranscriptGet } from "../../../../lib/apiHooks";
|
import { useTranscriptGet } from "../../../../lib/apiHooks";
|
||||||
|
import { parseNonEmptyString } from "../../../../lib/utils";
|
||||||
|
import { useWebSockets } from "../../useWebSockets";
|
||||||
|
import type { DagTask } from "../../useWebSockets";
|
||||||
|
import { useDagStatusMap } from "../../../../lib/UserEventsProvider";
|
||||||
|
import DagProgressTable from "./DagProgressTable";
|
||||||
|
|
||||||
type TranscriptProcessing = {
|
type TranscriptProcessing = {
|
||||||
params: Promise<{
|
params: Promise<{
|
||||||
@@ -19,13 +24,25 @@ type TranscriptProcessing = {
|
|||||||
|
|
||||||
export default function TranscriptProcessing(details: TranscriptProcessing) {
|
export default function TranscriptProcessing(details: TranscriptProcessing) {
|
||||||
const params = use(details.params);
|
const params = use(details.params);
|
||||||
const transcriptId = params.transcriptId;
|
const transcriptId = parseNonEmptyString(params.transcriptId);
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
|
|
||||||
const transcript = useTranscriptGet(transcriptId);
|
const transcript = useTranscriptGet(transcriptId);
|
||||||
|
const { status: wsStatus, dagStatus: wsDagStatus } =
|
||||||
|
useWebSockets(transcriptId);
|
||||||
|
const userDagStatusMap = useDagStatusMap();
|
||||||
|
const userDagStatus = userDagStatusMap.get(transcriptId) ?? null;
|
||||||
|
|
||||||
|
const restDagStatus: DagTask[] | null =
|
||||||
|
((transcript.data as Record<string, unknown>)?.dag_status as
|
||||||
|
| DagTask[]
|
||||||
|
| null) ?? null;
|
||||||
|
|
||||||
|
// Prefer transcript room WS (most granular), then user room WS, then REST
|
||||||
|
const dagStatus = wsDagStatus ?? userDagStatus ?? restDagStatus;
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const status = transcript.data?.status;
|
const status = wsStatus?.value ?? transcript.data?.status;
|
||||||
if (!status) return;
|
if (!status) return;
|
||||||
|
|
||||||
if (status === "ended" || status === "error") {
|
if (status === "ended" || status === "error") {
|
||||||
@@ -40,6 +57,7 @@ export default function TranscriptProcessing(details: TranscriptProcessing) {
|
|||||||
router.replace(dest);
|
router.replace(dest);
|
||||||
}
|
}
|
||||||
}, [
|
}, [
|
||||||
|
wsStatus?.value,
|
||||||
transcript.data?.status,
|
transcript.data?.status,
|
||||||
transcript.data?.source_kind,
|
transcript.data?.source_kind,
|
||||||
router,
|
router,
|
||||||
@@ -73,11 +91,29 @@ export default function TranscriptProcessing(details: TranscriptProcessing) {
|
|||||||
w={{ base: "full", md: "container.xl" }}
|
w={{ base: "full", md: "container.xl" }}
|
||||||
>
|
>
|
||||||
<Center h={"full"} w="full">
|
<Center h={"full"} w="full">
|
||||||
<VStack gap={10} bg="gray.100" p={10} borderRadius="md" maxW="500px">
|
<VStack
|
||||||
<Spinner size="xl" color="blue.500" />
|
gap={10}
|
||||||
<Heading size={"md"} textAlign="center">
|
bg="gray.100"
|
||||||
Processing recording
|
p={10}
|
||||||
</Heading>
|
borderRadius="md"
|
||||||
|
maxW="600px"
|
||||||
|
w="full"
|
||||||
|
>
|
||||||
|
{dagStatus ? (
|
||||||
|
<>
|
||||||
|
<Heading size={"md"} textAlign="center">
|
||||||
|
Processing recording
|
||||||
|
</Heading>
|
||||||
|
<DagProgressTable tasks={dagStatus} />
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<Spinner size="xl" color="blue.500" />
|
||||||
|
<Heading size={"md"} textAlign="center">
|
||||||
|
Processing recording
|
||||||
|
</Heading>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
<Text color="gray.600" textAlign="center">
|
<Text color="gray.600" textAlign="center">
|
||||||
You can safely return to the library while your recording is being
|
You can safely return to the library while your recording is being
|
||||||
processed.
|
processed.
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import { Box, Text, Grid, Heading, VStack, Flex } from "@chakra-ui/react";
|
|||||||
import LiveTrancription from "../../liveTranscription";
|
import LiveTrancription from "../../liveTranscription";
|
||||||
import { useTranscriptGet } from "../../../../lib/apiHooks";
|
import { useTranscriptGet } from "../../../../lib/apiHooks";
|
||||||
import { TranscriptStatus } from "../../../../lib/transcript";
|
import { TranscriptStatus } from "../../../../lib/transcript";
|
||||||
|
import { parseNonEmptyString } from "../../../../lib/utils";
|
||||||
|
|
||||||
type TranscriptDetails = {
|
type TranscriptDetails = {
|
||||||
params: Promise<{
|
params: Promise<{
|
||||||
@@ -21,13 +22,14 @@ type TranscriptDetails = {
|
|||||||
|
|
||||||
const TranscriptRecord = (details: TranscriptDetails) => {
|
const TranscriptRecord = (details: TranscriptDetails) => {
|
||||||
const params = use(details.params);
|
const params = use(details.params);
|
||||||
const transcript = useTranscriptGet(params.transcriptId);
|
const transcriptId = parseNonEmptyString(params.transcriptId);
|
||||||
|
const transcript = useTranscriptGet(transcriptId);
|
||||||
const [transcriptStarted, setTranscriptStarted] = useState(false);
|
const [transcriptStarted, setTranscriptStarted] = useState(false);
|
||||||
const useActiveTopic = useState<Topic | null>(null);
|
const useActiveTopic = useState<Topic | null>(null);
|
||||||
|
|
||||||
const webSockets = useWebSockets(params.transcriptId);
|
const webSockets = useWebSockets(transcriptId);
|
||||||
|
|
||||||
const mp3 = useMp3(params.transcriptId, true);
|
const mp3 = useMp3(transcriptId, true);
|
||||||
|
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import useMp3 from "../../useMp3";
|
|||||||
import { Center, VStack, Text, Heading } from "@chakra-ui/react";
|
import { Center, VStack, Text, Heading } from "@chakra-ui/react";
|
||||||
import FileUploadButton from "../../fileUploadButton";
|
import FileUploadButton from "../../fileUploadButton";
|
||||||
import { useTranscriptGet } from "../../../../lib/apiHooks";
|
import { useTranscriptGet } from "../../../../lib/apiHooks";
|
||||||
|
import { parseNonEmptyString } from "../../../../lib/utils";
|
||||||
|
|
||||||
type TranscriptUpload = {
|
type TranscriptUpload = {
|
||||||
params: Promise<{
|
params: Promise<{
|
||||||
@@ -16,12 +17,13 @@ type TranscriptUpload = {
|
|||||||
|
|
||||||
const TranscriptUpload = (details: TranscriptUpload) => {
|
const TranscriptUpload = (details: TranscriptUpload) => {
|
||||||
const params = use(details.params);
|
const params = use(details.params);
|
||||||
const transcript = useTranscriptGet(params.transcriptId);
|
const transcriptId = parseNonEmptyString(params.transcriptId);
|
||||||
|
const transcript = useTranscriptGet(transcriptId);
|
||||||
const [transcriptStarted, setTranscriptStarted] = useState(false);
|
const [transcriptStarted, setTranscriptStarted] = useState(false);
|
||||||
|
|
||||||
const webSockets = useWebSockets(params.transcriptId);
|
const webSockets = useWebSockets(transcriptId);
|
||||||
|
|
||||||
const mp3 = useMp3(params.transcriptId, true);
|
const mp3 = useMp3(transcriptId, true);
|
||||||
|
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
|
|
||||||
|
|||||||
@@ -2,10 +2,11 @@ import type { components } from "../../reflector-api";
|
|||||||
import { useTranscriptCreate } from "../../lib/apiHooks";
|
import { useTranscriptCreate } from "../../lib/apiHooks";
|
||||||
|
|
||||||
type CreateTranscript = components["schemas"]["CreateTranscript"];
|
type CreateTranscript = components["schemas"]["CreateTranscript"];
|
||||||
type GetTranscript = components["schemas"]["GetTranscript"];
|
type GetTranscriptWithParticipants =
|
||||||
|
components["schemas"]["GetTranscriptWithParticipants"];
|
||||||
|
|
||||||
type UseCreateTranscript = {
|
type UseCreateTranscript = {
|
||||||
transcript: GetTranscript | null;
|
transcript: GetTranscriptWithParticipants | null;
|
||||||
loading: boolean;
|
loading: boolean;
|
||||||
error: Error | null;
|
error: Error | null;
|
||||||
create: (transcriptCreationDetails: CreateTranscript) => Promise<void>;
|
create: (transcriptCreationDetails: CreateTranscript) => Promise<void>;
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ import { useEffect, useState } from "react";
|
|||||||
|
|
||||||
import { ShareMode, toShareMode } from "../../lib/shareMode";
|
import { ShareMode, toShareMode } from "../../lib/shareMode";
|
||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
type GetTranscript = components["schemas"]["GetTranscript"];
|
type GetTranscriptWithParticipants =
|
||||||
|
components["schemas"]["GetTranscriptWithParticipants"];
|
||||||
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
||||||
type UpdateTranscript = components["schemas"]["UpdateTranscript"];
|
type UpdateTranscript = components["schemas"]["UpdateTranscript"];
|
||||||
import {
|
import {
|
||||||
@@ -27,7 +28,7 @@ import { featureEnabled } from "../../lib/features";
|
|||||||
|
|
||||||
type ShareAndPrivacyProps = {
|
type ShareAndPrivacyProps = {
|
||||||
finalSummaryElement: HTMLDivElement | null;
|
finalSummaryElement: HTMLDivElement | null;
|
||||||
transcript: GetTranscript;
|
transcript: GetTranscriptWithParticipants;
|
||||||
topics: GetTranscriptTopic[];
|
topics: GetTranscriptTopic[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import { useState, useEffect, useMemo } from "react";
|
import { useState, useEffect, useMemo } from "react";
|
||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
|
|
||||||
type GetTranscript = components["schemas"]["GetTranscript"];
|
type GetTranscriptWithParticipants =
|
||||||
|
components["schemas"]["GetTranscriptWithParticipants"];
|
||||||
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
||||||
import {
|
import {
|
||||||
BoxProps,
|
BoxProps,
|
||||||
@@ -26,7 +27,7 @@ import {
|
|||||||
import { featureEnabled } from "../../lib/features";
|
import { featureEnabled } from "../../lib/features";
|
||||||
|
|
||||||
type ShareZulipProps = {
|
type ShareZulipProps = {
|
||||||
transcript: GetTranscript;
|
transcript: GetTranscriptWithParticipants;
|
||||||
topics: GetTranscriptTopic[];
|
topics: GetTranscriptTopic[];
|
||||||
disabled: boolean;
|
disabled: boolean;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
|
import { parseMaybeNonEmptyString } from "../../lib/utils";
|
||||||
|
|
||||||
type UpdateTranscript = components["schemas"]["UpdateTranscript"];
|
type UpdateTranscript = components["schemas"]["UpdateTranscript"];
|
||||||
type GetTranscript = components["schemas"]["GetTranscript"];
|
type GetTranscriptWithParticipants =
|
||||||
|
components["schemas"]["GetTranscriptWithParticipants"];
|
||||||
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
||||||
import {
|
import {
|
||||||
useTranscriptUpdate,
|
useTranscriptUpdate,
|
||||||
@@ -20,7 +22,7 @@ type TranscriptTitle = {
|
|||||||
onUpdate: (newTitle: string) => void;
|
onUpdate: (newTitle: string) => void;
|
||||||
|
|
||||||
// share props
|
// share props
|
||||||
transcript: GetTranscript | null;
|
transcript: GetTranscriptWithParticipants | null;
|
||||||
topics: GetTranscriptTopic[] | null;
|
topics: GetTranscriptTopic[] | null;
|
||||||
finalSummaryElement: HTMLDivElement | null;
|
finalSummaryElement: HTMLDivElement | null;
|
||||||
};
|
};
|
||||||
@@ -31,7 +33,7 @@ const TranscriptTitle = (props: TranscriptTitle) => {
|
|||||||
const [isEditing, setIsEditing] = useState(false);
|
const [isEditing, setIsEditing] = useState(false);
|
||||||
const updateTranscriptMutation = useTranscriptUpdate();
|
const updateTranscriptMutation = useTranscriptUpdate();
|
||||||
const participantsQuery = useTranscriptParticipants(
|
const participantsQuery = useTranscriptParticipants(
|
||||||
props.transcript?.id || null,
|
props.transcript?.id ? parseMaybeNonEmptyString(props.transcript.id) : null,
|
||||||
);
|
);
|
||||||
|
|
||||||
const updateTitle = async (newTitle: string, transcriptId: string) => {
|
const updateTitle = async (newTitle: string, transcriptId: string) => {
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { useEffect, useState } from "react";
|
import { useEffect, useState } from "react";
|
||||||
import { useTranscriptGet } from "../../lib/apiHooks";
|
import { useTranscriptGet } from "../../lib/apiHooks";
|
||||||
|
import { parseMaybeNonEmptyString } from "../../lib/utils";
|
||||||
import { useAuth } from "../../lib/AuthProvider";
|
import { useAuth } from "../../lib/AuthProvider";
|
||||||
import { API_URL } from "../../lib/apiClient";
|
import { API_URL } from "../../lib/apiClient";
|
||||||
|
|
||||||
@@ -27,7 +28,7 @@ const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => {
|
|||||||
data: transcript,
|
data: transcript,
|
||||||
isLoading: transcriptMetadataLoading,
|
isLoading: transcriptMetadataLoading,
|
||||||
error: transcriptError,
|
error: transcriptError,
|
||||||
} = useTranscriptGet(later ? null : transcriptId);
|
} = useTranscriptGet(later ? null : parseMaybeNonEmptyString(transcriptId));
|
||||||
|
|
||||||
const [serviceWorker, setServiceWorker] =
|
const [serviceWorker, setServiceWorker] =
|
||||||
useState<ServiceWorkerRegistration | null>(null);
|
useState<ServiceWorkerRegistration | null>(null);
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
type Participant = components["schemas"]["Participant"];
|
type Participant = components["schemas"]["Participant"];
|
||||||
import { useTranscriptParticipants } from "../../lib/apiHooks";
|
import { useTranscriptParticipants } from "../../lib/apiHooks";
|
||||||
|
import { parseMaybeNonEmptyString } from "../../lib/utils";
|
||||||
|
|
||||||
type ErrorParticipants = {
|
type ErrorParticipants = {
|
||||||
error: Error;
|
error: Error;
|
||||||
@@ -32,7 +33,7 @@ const useParticipants = (transcriptId: string): UseParticipants => {
|
|||||||
isLoading: loading,
|
isLoading: loading,
|
||||||
error,
|
error,
|
||||||
refetch,
|
refetch,
|
||||||
} = useTranscriptParticipants(transcriptId || null);
|
} = useTranscriptParticipants(parseMaybeNonEmptyString(transcriptId));
|
||||||
|
|
||||||
// Type-safe return based on state
|
// Type-safe return based on state
|
||||||
if (error) {
|
if (error) {
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
import { useTranscriptTopicsWithWordsPerSpeaker } from "../../lib/apiHooks";
|
import { useTranscriptTopicsWithWordsPerSpeaker } from "../../lib/apiHooks";
|
||||||
|
import { parseMaybeNonEmptyString } from "../../lib/utils";
|
||||||
|
|
||||||
type GetTranscriptTopicWithWordsPerSpeaker =
|
type GetTranscriptTopicWithWordsPerSpeaker =
|
||||||
components["schemas"]["GetTranscriptTopicWithWordsPerSpeaker"];
|
components["schemas"]["GetTranscriptTopicWithWordsPerSpeaker"];
|
||||||
@@ -38,7 +39,7 @@ const useTopicWithWords = (
|
|||||||
error,
|
error,
|
||||||
refetch,
|
refetch,
|
||||||
} = useTranscriptTopicsWithWordsPerSpeaker(
|
} = useTranscriptTopicsWithWordsPerSpeaker(
|
||||||
transcriptId || null,
|
parseMaybeNonEmptyString(transcriptId),
|
||||||
topicId || null,
|
topicId || null,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { useTranscriptTopics } from "../../lib/apiHooks";
|
import { useTranscriptTopics } from "../../lib/apiHooks";
|
||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
|
import { parseMaybeNonEmptyString } from "../../lib/utils";
|
||||||
|
|
||||||
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
|
||||||
|
|
||||||
@@ -10,7 +11,11 @@ type TranscriptTopics = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const useTopics = (id: string): TranscriptTopics => {
|
const useTopics = (id: string): TranscriptTopics => {
|
||||||
const { data: topics, isLoading: loading, error } = useTranscriptTopics(id);
|
const {
|
||||||
|
data: topics,
|
||||||
|
isLoading: loading,
|
||||||
|
error,
|
||||||
|
} = useTranscriptTopics(parseMaybeNonEmptyString(id));
|
||||||
|
|
||||||
return {
|
return {
|
||||||
topics: topics || null,
|
topics: topics || null,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
import { useTranscriptWaveform } from "../../lib/apiHooks";
|
import { useTranscriptWaveform } from "../../lib/apiHooks";
|
||||||
|
import { parseMaybeNonEmptyString } from "../../lib/utils";
|
||||||
|
|
||||||
type AudioWaveform = components["schemas"]["AudioWaveform"];
|
type AudioWaveform = components["schemas"]["AudioWaveform"];
|
||||||
|
|
||||||
@@ -14,7 +15,7 @@ const useWaveform = (id: string, skip: boolean): AudioWaveFormResponse => {
|
|||||||
data: waveform,
|
data: waveform,
|
||||||
isLoading: loading,
|
isLoading: loading,
|
||||||
error,
|
error,
|
||||||
} = useTranscriptWaveform(skip ? null : id);
|
} = useTranscriptWaveform(skip ? null : parseMaybeNonEmptyString(id));
|
||||||
|
|
||||||
return {
|
return {
|
||||||
waveform: waveform || null,
|
waveform: waveform || null,
|
||||||
|
|||||||
@@ -7,6 +7,15 @@ type GetTranscriptSegmentTopic =
|
|||||||
components["schemas"]["GetTranscriptSegmentTopic"];
|
components["schemas"]["GetTranscriptSegmentTopic"];
|
||||||
import { useQueryClient } from "@tanstack/react-query";
|
import { useQueryClient } from "@tanstack/react-query";
|
||||||
import { $api, WEBSOCKET_URL } from "../../lib/apiClient";
|
import { $api, WEBSOCKET_URL } from "../../lib/apiClient";
|
||||||
|
import {
|
||||||
|
invalidateTranscript,
|
||||||
|
invalidateTranscriptTopics,
|
||||||
|
invalidateTranscriptWaveform,
|
||||||
|
} from "../../lib/apiHooks";
|
||||||
|
import { NonEmptyString } from "../../lib/utils";
|
||||||
|
|
||||||
|
import type { DagTask } from "../../lib/dagTypes";
|
||||||
|
export type { DagTask, DagTaskStatus } from "../../lib/dagTypes";
|
||||||
|
|
||||||
export type UseWebSockets = {
|
export type UseWebSockets = {
|
||||||
transcriptTextLive: string;
|
transcriptTextLive: string;
|
||||||
@@ -18,6 +27,7 @@ export type UseWebSockets = {
|
|||||||
status: Status | null;
|
status: Status | null;
|
||||||
waveform: AudioWaveform | null;
|
waveform: AudioWaveform | null;
|
||||||
duration: number | null;
|
duration: number | null;
|
||||||
|
dagStatus: DagTask[] | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
||||||
@@ -34,6 +44,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
summary: "",
|
summary: "",
|
||||||
});
|
});
|
||||||
const [status, setStatus] = useState<Status | null>(null);
|
const [status, setStatus] = useState<Status | null>(null);
|
||||||
|
const [dagStatus, setDagStatus] = useState<DagTask[] | null>(null);
|
||||||
const { setError } = useError();
|
const { setError } = useError();
|
||||||
|
|
||||||
const queryClient = useQueryClient();
|
const queryClient = useQueryClient();
|
||||||
@@ -369,15 +380,10 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
});
|
});
|
||||||
console.debug("TOPIC event:", message.data);
|
console.debug("TOPIC event:", message.data);
|
||||||
// Invalidate topics query to sync with WebSocket data
|
// Invalidate topics query to sync with WebSocket data
|
||||||
queryClient.invalidateQueries({
|
invalidateTranscriptTopics(
|
||||||
queryKey: $api.queryOptions(
|
queryClient,
|
||||||
"get",
|
transcriptId as NonEmptyString,
|
||||||
"/v1/transcripts/{transcript_id}/topics",
|
);
|
||||||
{
|
|
||||||
params: { path: { transcript_id: transcriptId } },
|
|
||||||
},
|
|
||||||
).queryKey,
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "FINAL_SHORT_SUMMARY":
|
case "FINAL_SHORT_SUMMARY":
|
||||||
@@ -388,15 +394,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
if (message.data) {
|
if (message.data) {
|
||||||
setFinalSummary(message.data);
|
setFinalSummary(message.data);
|
||||||
// Invalidate transcript query to sync summary
|
// Invalidate transcript query to sync summary
|
||||||
queryClient.invalidateQueries({
|
invalidateTranscript(queryClient, transcriptId as NonEmptyString);
|
||||||
queryKey: $api.queryOptions(
|
|
||||||
"get",
|
|
||||||
"/v1/transcripts/{transcript_id}",
|
|
||||||
{
|
|
||||||
params: { path: { transcript_id: transcriptId } },
|
|
||||||
},
|
|
||||||
).queryKey,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -405,15 +403,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
if (message.data) {
|
if (message.data) {
|
||||||
setTitle(message.data.title);
|
setTitle(message.data.title);
|
||||||
// Invalidate transcript query to sync title
|
// Invalidate transcript query to sync title
|
||||||
queryClient.invalidateQueries({
|
invalidateTranscript(queryClient, transcriptId as NonEmptyString);
|
||||||
queryKey: $api.queryOptions(
|
|
||||||
"get",
|
|
||||||
"/v1/transcripts/{transcript_id}",
|
|
||||||
{
|
|
||||||
params: { path: { transcript_id: transcriptId } },
|
|
||||||
},
|
|
||||||
).queryKey,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -424,6 +414,10 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
);
|
);
|
||||||
if (message.data) {
|
if (message.data) {
|
||||||
setWaveForm(message.data.waveform);
|
setWaveForm(message.data.waveform);
|
||||||
|
invalidateTranscriptWaveform(
|
||||||
|
queryClient,
|
||||||
|
transcriptId as NonEmptyString,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case "DURATION":
|
case "DURATION":
|
||||||
@@ -442,11 +436,31 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
setStatus(message.data);
|
setStatus(message.data);
|
||||||
|
invalidateTranscript(queryClient, transcriptId as NonEmptyString);
|
||||||
if (message.data.value === "ended") {
|
if (message.data.value === "ended") {
|
||||||
ws.close();
|
ws.close();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case "DAG_STATUS":
|
||||||
|
if (message.data?.tasks) {
|
||||||
|
setDagStatus(message.data.tasks);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "DAG_TASK_PROGRESS":
|
||||||
|
if (message.data) {
|
||||||
|
setDagStatus(
|
||||||
|
(prev) =>
|
||||||
|
prev?.map((t) =>
|
||||||
|
t.name === message.data.task_name
|
||||||
|
? { ...t, progress_pct: message.data.progress_pct }
|
||||||
|
: t,
|
||||||
|
) ?? null,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
setError(
|
setError(
|
||||||
new Error(`Received unknown WebSocket event: ${message.event}`),
|
new Error(`Received unknown WebSocket event: ${message.event}`),
|
||||||
@@ -504,5 +518,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
status,
|
status,
|
||||||
waveform,
|
waveform,
|
||||||
duration,
|
duration,
|
||||||
|
dagStatus,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ import { useRouter } from "next/navigation";
|
|||||||
import { formatDateTime, formatStartedAgo } from "../lib/timeUtils";
|
import { formatDateTime, formatStartedAgo } from "../lib/timeUtils";
|
||||||
import MeetingMinimalHeader from "../components/MeetingMinimalHeader";
|
import MeetingMinimalHeader from "../components/MeetingMinimalHeader";
|
||||||
import { NonEmptyString } from "../lib/utils";
|
import { NonEmptyString } from "../lib/utils";
|
||||||
import { MeetingId } from "../lib/types";
|
import { MeetingId, assertMeetingId } from "../lib/types";
|
||||||
|
|
||||||
type Meeting = components["schemas"]["Meeting"];
|
type Meeting = components["schemas"]["Meeting"];
|
||||||
|
|
||||||
@@ -315,7 +315,9 @@ export default function MeetingSelection({
|
|||||||
variant="outline"
|
variant="outline"
|
||||||
colorScheme="red"
|
colorScheme="red"
|
||||||
size="md"
|
size="md"
|
||||||
onClick={() => handleEndMeeting(meeting.id)}
|
onClick={() =>
|
||||||
|
handleEndMeeting(assertMeetingId(meeting.id))
|
||||||
|
}
|
||||||
loading={deactivateMeetingMutation.isPending}
|
loading={deactivateMeetingMutation.isPending}
|
||||||
>
|
>
|
||||||
<Icon as={LuX} me={2} />
|
<Icon as={LuX} me={2} />
|
||||||
@@ -460,7 +462,9 @@ export default function MeetingSelection({
|
|||||||
variant="outline"
|
variant="outline"
|
||||||
colorScheme="red"
|
colorScheme="red"
|
||||||
size="md"
|
size="md"
|
||||||
onClick={() => handleEndMeeting(meeting.id)}
|
onClick={() =>
|
||||||
|
handleEndMeeting(assertMeetingId(meeting.id))
|
||||||
|
}
|
||||||
loading={deactivateMeetingMutation.isPending}
|
loading={deactivateMeetingMutation.isPending}
|
||||||
>
|
>
|
||||||
<Icon as={LuX} me={2} />
|
<Icon as={LuX} me={2} />
|
||||||
|
|||||||
@@ -22,14 +22,29 @@ import DailyIframe, {
|
|||||||
import type { components } from "../../reflector-api";
|
import type { components } from "../../reflector-api";
|
||||||
import { useAuth } from "../../lib/AuthProvider";
|
import { useAuth } from "../../lib/AuthProvider";
|
||||||
import { useConsentDialog } from "../../lib/consent";
|
import { useConsentDialog } from "../../lib/consent";
|
||||||
import { useRoomJoinMeeting } from "../../lib/apiHooks";
|
import {
|
||||||
|
useRoomJoinMeeting,
|
||||||
|
useMeetingStartRecording,
|
||||||
|
} from "../../lib/apiHooks";
|
||||||
import { omit } from "remeda";
|
import { omit } from "remeda";
|
||||||
import { assertExists } from "../../lib/utils";
|
import {
|
||||||
import { assertMeetingId } from "../../lib/types";
|
assertExists,
|
||||||
|
NonEmptyString,
|
||||||
|
parseNonEmptyString,
|
||||||
|
} from "../../lib/utils";
|
||||||
|
import { assertMeetingId, DailyRecordingType } from "../../lib/types";
|
||||||
|
import { useUuidV5 } from "react-uuid-hook";
|
||||||
|
|
||||||
const CONSENT_BUTTON_ID = "recording-consent";
|
const CONSENT_BUTTON_ID = "recording-consent";
|
||||||
const RECORDING_INDICATOR_ID = "recording-indicator";
|
const RECORDING_INDICATOR_ID = "recording-indicator";
|
||||||
|
|
||||||
|
// Namespace UUID for UUIDv5 generation of raw-tracks instanceIds
|
||||||
|
// DO NOT CHANGE: Breaks instanceId determinism across deployments
|
||||||
|
const RAW_TRACKS_NAMESPACE = "a1b2c3d4-e5f6-7890-abcd-ef1234567890";
|
||||||
|
|
||||||
|
const RECORDING_START_DELAY_MS = 2000;
|
||||||
|
const RECORDING_START_MAX_RETRIES = 5;
|
||||||
|
|
||||||
type Meeting = components["schemas"]["Meeting"];
|
type Meeting = components["schemas"]["Meeting"];
|
||||||
type Room = components["schemas"]["RoomDetails"];
|
type Room = components["schemas"]["RoomDetails"];
|
||||||
|
|
||||||
@@ -73,9 +88,7 @@ const useFrame = (
|
|||||||
cbs: {
|
cbs: {
|
||||||
onLeftMeeting: () => void;
|
onLeftMeeting: () => void;
|
||||||
onCustomButtonClick: (ev: DailyEventObjectCustomButtonClick) => void;
|
onCustomButtonClick: (ev: DailyEventObjectCustomButtonClick) => void;
|
||||||
onJoinMeeting: (
|
onJoinMeeting: () => void;
|
||||||
startRecording: (args: { type: "raw-tracks" }) => void,
|
|
||||||
) => void;
|
|
||||||
},
|
},
|
||||||
) => {
|
) => {
|
||||||
const [{ frame, joined }, setState] = useState(USE_FRAME_INIT_STATE);
|
const [{ frame, joined }, setState] = useState(USE_FRAME_INIT_STATE);
|
||||||
@@ -126,7 +139,7 @@ const useFrame = (
|
|||||||
console.error("frame is null in joined-meeting callback");
|
console.error("frame is null in joined-meeting callback");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cbs.onJoinMeeting(frame.startRecording.bind(frame));
|
cbs.onJoinMeeting();
|
||||||
};
|
};
|
||||||
frame.on("joined-meeting", joinCb);
|
frame.on("joined-meeting", joinCb);
|
||||||
return () => {
|
return () => {
|
||||||
@@ -173,8 +186,15 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) {
|
|||||||
const authLastUserId = auth.lastUserId;
|
const authLastUserId = auth.lastUserId;
|
||||||
const [container, setContainer] = useState<HTMLDivElement | null>(null);
|
const [container, setContainer] = useState<HTMLDivElement | null>(null);
|
||||||
const joinMutation = useRoomJoinMeeting();
|
const joinMutation = useRoomJoinMeeting();
|
||||||
|
const startRecordingMutation = useMeetingStartRecording();
|
||||||
const [joinedMeeting, setJoinedMeeting] = useState<Meeting | null>(null);
|
const [joinedMeeting, setJoinedMeeting] = useState<Meeting | null>(null);
|
||||||
|
|
||||||
|
// Generate deterministic instanceIds so all participants use SAME IDs
|
||||||
|
const cloudInstanceId = parseNonEmptyString(meeting.id);
|
||||||
|
const rawTracksInstanceId = parseNonEmptyString(
|
||||||
|
useUuidV5(meeting.id, RAW_TRACKS_NAMESPACE)[0],
|
||||||
|
);
|
||||||
|
|
||||||
const roomName = params?.roomName as string;
|
const roomName = params?.roomName as string;
|
||||||
|
|
||||||
const {
|
const {
|
||||||
@@ -228,19 +248,72 @@ export default function DailyRoom({ meeting, room }: DailyRoomProps) {
|
|||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
const handleFrameJoinMeeting = useCallback(
|
const handleFrameJoinMeeting = useCallback(() => {
|
||||||
(startRecording: (args: { type: "raw-tracks" }) => void) => {
|
if (meeting.recording_type === "cloud") {
|
||||||
try {
|
console.log("Starting dual recording via REST API", {
|
||||||
if (meeting.recording_type === "cloud") {
|
cloudInstanceId,
|
||||||
console.log("Starting cloud recording");
|
rawTracksInstanceId,
|
||||||
startRecording({ type: "raw-tracks" });
|
});
|
||||||
}
|
|
||||||
} catch (error) {
|
// Start both cloud and raw-tracks via backend REST API (with retry on 404)
|
||||||
console.error("Failed to start recording:", error);
|
// Daily.co needs time to register call as "hosting" for REST API
|
||||||
}
|
const startRecordingWithRetry = (
|
||||||
},
|
type: DailyRecordingType,
|
||||||
[meeting.recording_type],
|
instanceId: NonEmptyString,
|
||||||
);
|
attempt: number = 1,
|
||||||
|
) => {
|
||||||
|
setTimeout(() => {
|
||||||
|
startRecordingMutation.mutate(
|
||||||
|
{
|
||||||
|
params: {
|
||||||
|
path: {
|
||||||
|
meeting_id: meeting.id,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
body: {
|
||||||
|
type,
|
||||||
|
instanceId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
onError: (error: any) => {
|
||||||
|
const errorText = error?.detail || error?.message || "";
|
||||||
|
const is404NotHosting = errorText.includes(
|
||||||
|
"does not seem to be hosting a call",
|
||||||
|
);
|
||||||
|
const isActiveStream = errorText.includes(
|
||||||
|
"has an active stream",
|
||||||
|
);
|
||||||
|
|
||||||
|
if (is404NotHosting && attempt < RECORDING_START_MAX_RETRIES) {
|
||||||
|
console.log(
|
||||||
|
`${type}: Call not hosting yet, retry ${attempt + 1}/${RECORDING_START_MAX_RETRIES} in ${RECORDING_START_DELAY_MS}ms...`,
|
||||||
|
);
|
||||||
|
startRecordingWithRetry(type, instanceId, attempt + 1);
|
||||||
|
} else if (isActiveStream) {
|
||||||
|
console.log(
|
||||||
|
`${type}: Recording already active (started by another participant)`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
console.error(`Failed to start ${type} recording:`, error);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}, RECORDING_START_DELAY_MS);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start both recordings
|
||||||
|
startRecordingWithRetry("cloud", cloudInstanceId);
|
||||||
|
startRecordingWithRetry("raw-tracks", rawTracksInstanceId);
|
||||||
|
}
|
||||||
|
}, [
|
||||||
|
meeting.recording_type,
|
||||||
|
meeting.id,
|
||||||
|
startRecordingMutation,
|
||||||
|
cloudInstanceId,
|
||||||
|
rawTracksInstanceId,
|
||||||
|
]);
|
||||||
|
|
||||||
const recordingIconUrl = useMemo(
|
const recordingIconUrl = useMemo(
|
||||||
() => new URL("/recording-icon.svg", window.location.origin),
|
() => new URL("/recording-icon.svg", window.location.origin),
|
||||||
|
|||||||
@@ -1,11 +1,25 @@
|
|||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import React, { useEffect, useRef } from "react";
|
import React, { useEffect, useRef, useState } from "react";
|
||||||
import { useQueryClient } from "@tanstack/react-query";
|
import { useQueryClient } from "@tanstack/react-query";
|
||||||
import { WEBSOCKET_URL } from "./apiClient";
|
import { WEBSOCKET_URL } from "./apiClient";
|
||||||
import { useAuth } from "./AuthProvider";
|
import { useAuth } from "./AuthProvider";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import { invalidateTranscriptLists, TRANSCRIPT_SEARCH_URL } from "./apiHooks";
|
import {
|
||||||
|
invalidateTranscript,
|
||||||
|
invalidateTranscriptLists,
|
||||||
|
TRANSCRIPT_SEARCH_URL,
|
||||||
|
} from "./apiHooks";
|
||||||
|
import type { NonEmptyString } from "./utils";
|
||||||
|
|
||||||
|
import type { DagTask } from "./dagTypes";
|
||||||
|
export type { DagTask, DagTaskStatus } from "./dagTypes";
|
||||||
|
|
||||||
|
const DagStatusContext = React.createContext<Map<string, DagTask[]>>(new Map());
|
||||||
|
|
||||||
|
export function useDagStatusMap() {
|
||||||
|
return React.useContext(DagStatusContext);
|
||||||
|
}
|
||||||
|
|
||||||
const UserEvent = z.object({
|
const UserEvent = z.object({
|
||||||
event: z.string(),
|
event: z.string(),
|
||||||
@@ -95,6 +109,9 @@ export function UserEventsProvider({
|
|||||||
const queryClient = useQueryClient();
|
const queryClient = useQueryClient();
|
||||||
const tokenRef = useRef<string | null>(null);
|
const tokenRef = useRef<string | null>(null);
|
||||||
const detachRef = useRef<(() => void) | null>(null);
|
const detachRef = useRef<(() => void) | null>(null);
|
||||||
|
const [dagStatusMap, setDagStatusMap] = useState<Map<string, DagTask[]>>(
|
||||||
|
new Map(),
|
||||||
|
);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
// Only tear down when the user is truly unauthenticated
|
// Only tear down when the user is truly unauthenticated
|
||||||
@@ -133,20 +150,52 @@ export function UserEventsProvider({
|
|||||||
if (!detachRef.current) {
|
if (!detachRef.current) {
|
||||||
const onMessage = (event: MessageEvent) => {
|
const onMessage = (event: MessageEvent) => {
|
||||||
try {
|
try {
|
||||||
const msg = UserEvent.parse(JSON.parse(event.data));
|
const fullMsg = JSON.parse(event.data);
|
||||||
|
const msg = UserEvent.parse(fullMsg);
|
||||||
const eventName = msg.event;
|
const eventName = msg.event;
|
||||||
|
|
||||||
const invalidateList = () => invalidateTranscriptLists(queryClient);
|
const invalidateList = () => invalidateTranscriptLists(queryClient);
|
||||||
|
|
||||||
switch (eventName) {
|
switch (eventName) {
|
||||||
case "TRANSCRIPT_CREATED":
|
case "TRANSCRIPT_CREATED":
|
||||||
case "TRANSCRIPT_DELETED":
|
case "TRANSCRIPT_DELETED":
|
||||||
case "TRANSCRIPT_STATUS":
|
|
||||||
case "TRANSCRIPT_FINAL_TITLE":
|
case "TRANSCRIPT_FINAL_TITLE":
|
||||||
case "TRANSCRIPT_DURATION":
|
case "TRANSCRIPT_DURATION":
|
||||||
invalidateList().then(() => {});
|
invalidateList().then(() => {});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case "TRANSCRIPT_STATUS": {
|
||||||
|
invalidateList().then(() => {});
|
||||||
|
const transcriptId = fullMsg.data?.id as string | undefined;
|
||||||
|
if (transcriptId) {
|
||||||
|
invalidateTranscript(
|
||||||
|
queryClient,
|
||||||
|
transcriptId as NonEmptyString,
|
||||||
|
).then(() => {});
|
||||||
|
}
|
||||||
|
const status = fullMsg.data?.value as string | undefined;
|
||||||
|
if (transcriptId && status && status !== "processing") {
|
||||||
|
setDagStatusMap((prev) => {
|
||||||
|
const next = new Map(prev);
|
||||||
|
next.delete(transcriptId);
|
||||||
|
return next;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "TRANSCRIPT_DAG_STATUS": {
|
||||||
|
const transcriptId = fullMsg.data?.id as string | undefined;
|
||||||
|
const tasks = fullMsg.data?.tasks as DagTask[] | undefined;
|
||||||
|
if (transcriptId && tasks) {
|
||||||
|
setDagStatusMap((prev) => {
|
||||||
|
const next = new Map(prev);
|
||||||
|
next.set(transcriptId, tasks);
|
||||||
|
return next;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Ignore other content events for list updates
|
// Ignore other content events for list updates
|
||||||
break;
|
break;
|
||||||
@@ -176,5 +225,9 @@ export function UserEventsProvider({
|
|||||||
};
|
};
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
return <>{children}</>;
|
return (
|
||||||
|
<DagStatusContext.Provider value={dagStatusMap}>
|
||||||
|
{children}
|
||||||
|
</DagStatusContext.Provider>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { QueryClient, useQueryClient } from "@tanstack/react-query";
|
|||||||
import type { components } from "../reflector-api";
|
import type { components } from "../reflector-api";
|
||||||
import { useAuth } from "./AuthProvider";
|
import { useAuth } from "./AuthProvider";
|
||||||
import { MeetingId } from "./types";
|
import { MeetingId } from "./types";
|
||||||
|
import { NonEmptyString } from "./utils";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX error types returned from the hooks are not always correct; declared types are ValidationError but real type could be string or any other
|
* XXX error types returned from the hooks are not always correct; declared types are ValidationError but real type could be string or any other
|
||||||
@@ -103,7 +104,7 @@ export function useTranscriptProcess() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export function useTranscriptGet(transcriptId: string | null) {
|
export function useTranscriptGet(transcriptId: NonEmptyString | null) {
|
||||||
return $api.useQuery(
|
return $api.useQuery(
|
||||||
"get",
|
"get",
|
||||||
"/v1/transcripts/{transcript_id}",
|
"/v1/transcripts/{transcript_id}",
|
||||||
@@ -120,6 +121,16 @@ export function useTranscriptGet(transcriptId: string | null) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const invalidateTranscript = (
|
||||||
|
queryClient: QueryClient,
|
||||||
|
transcriptId: NonEmptyString,
|
||||||
|
) =>
|
||||||
|
queryClient.invalidateQueries({
|
||||||
|
queryKey: $api.queryOptions("get", "/v1/transcripts/{transcript_id}", {
|
||||||
|
params: { path: { transcript_id: transcriptId } },
|
||||||
|
}).queryKey,
|
||||||
|
});
|
||||||
|
|
||||||
export function useRoomGet(roomId: string | null) {
|
export function useRoomGet(roomId: string | null) {
|
||||||
const { isAuthenticated } = useAuthReady();
|
const { isAuthenticated } = useAuthReady();
|
||||||
|
|
||||||
@@ -297,7 +308,7 @@ export function useTranscriptUploadAudio() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function useTranscriptWaveform(transcriptId: string | null) {
|
export function useTranscriptWaveform(transcriptId: NonEmptyString | null) {
|
||||||
return $api.useQuery(
|
return $api.useQuery(
|
||||||
"get",
|
"get",
|
||||||
"/v1/transcripts/{transcript_id}/audio/waveform",
|
"/v1/transcripts/{transcript_id}/audio/waveform",
|
||||||
@@ -312,7 +323,21 @@ export function useTranscriptWaveform(transcriptId: string | null) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function useTranscriptMP3(transcriptId: string | null) {
|
export const invalidateTranscriptWaveform = (
|
||||||
|
queryClient: QueryClient,
|
||||||
|
transcriptId: NonEmptyString,
|
||||||
|
) =>
|
||||||
|
queryClient.invalidateQueries({
|
||||||
|
queryKey: $api.queryOptions(
|
||||||
|
"get",
|
||||||
|
"/v1/transcripts/{transcript_id}/audio/waveform",
|
||||||
|
{
|
||||||
|
params: { path: { transcript_id: transcriptId } },
|
||||||
|
},
|
||||||
|
).queryKey,
|
||||||
|
});
|
||||||
|
|
||||||
|
export function useTranscriptMP3(transcriptId: NonEmptyString | null) {
|
||||||
const { isAuthenticated } = useAuthReady();
|
const { isAuthenticated } = useAuthReady();
|
||||||
|
|
||||||
return $api.useQuery(
|
return $api.useQuery(
|
||||||
@@ -329,7 +354,7 @@ export function useTranscriptMP3(transcriptId: string | null) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function useTranscriptTopics(transcriptId: string | null) {
|
export function useTranscriptTopics(transcriptId: NonEmptyString | null) {
|
||||||
return $api.useQuery(
|
return $api.useQuery(
|
||||||
"get",
|
"get",
|
||||||
"/v1/transcripts/{transcript_id}/topics",
|
"/v1/transcripts/{transcript_id}/topics",
|
||||||
@@ -344,7 +369,23 @@ export function useTranscriptTopics(transcriptId: string | null) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function useTranscriptTopicsWithWords(transcriptId: string | null) {
|
export const invalidateTranscriptTopics = (
|
||||||
|
queryClient: QueryClient,
|
||||||
|
transcriptId: NonEmptyString,
|
||||||
|
) =>
|
||||||
|
queryClient.invalidateQueries({
|
||||||
|
queryKey: $api.queryOptions(
|
||||||
|
"get",
|
||||||
|
"/v1/transcripts/{transcript_id}/topics",
|
||||||
|
{
|
||||||
|
params: { path: { transcript_id: transcriptId } },
|
||||||
|
},
|
||||||
|
).queryKey,
|
||||||
|
});
|
||||||
|
|
||||||
|
export function useTranscriptTopicsWithWords(
|
||||||
|
transcriptId: NonEmptyString | null,
|
||||||
|
) {
|
||||||
const { isAuthenticated } = useAuthReady();
|
const { isAuthenticated } = useAuthReady();
|
||||||
|
|
||||||
return $api.useQuery(
|
return $api.useQuery(
|
||||||
@@ -362,7 +403,7 @@ export function useTranscriptTopicsWithWords(transcriptId: string | null) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function useTranscriptTopicsWithWordsPerSpeaker(
|
export function useTranscriptTopicsWithWordsPerSpeaker(
|
||||||
transcriptId: string | null,
|
transcriptId: NonEmptyString | null,
|
||||||
topicId: string | null,
|
topicId: string | null,
|
||||||
) {
|
) {
|
||||||
const { isAuthenticated } = useAuthReady();
|
const { isAuthenticated } = useAuthReady();
|
||||||
@@ -384,7 +425,7 @@ export function useTranscriptTopicsWithWordsPerSpeaker(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function useTranscriptParticipants(transcriptId: string | null) {
|
export function useTranscriptParticipants(transcriptId: NonEmptyString | null) {
|
||||||
const { isAuthenticated } = useAuthReady();
|
const { isAuthenticated } = useAuthReady();
|
||||||
|
|
||||||
return $api.useQuery(
|
return $api.useQuery(
|
||||||
@@ -567,6 +608,20 @@ export function useTranscriptSpeakerMerge() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function useMeetingStartRecording() {
|
||||||
|
const { setError } = useError();
|
||||||
|
|
||||||
|
return $api.useMutation(
|
||||||
|
"post",
|
||||||
|
"/v1/meetings/{meeting_id}/recordings/start",
|
||||||
|
{
|
||||||
|
onError: (error) => {
|
||||||
|
setError(error as Error, "Failed to start recording");
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export function useMeetingAudioConsent() {
|
export function useMeetingAudioConsent() {
|
||||||
const { setError } = useError();
|
const { setError } = useError();
|
||||||
|
|
||||||
|
|||||||
19
www/app/lib/dagTypes.ts
Normal file
19
www/app/lib/dagTypes.ts
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
export type DagTaskStatus =
|
||||||
|
| "queued"
|
||||||
|
| "running"
|
||||||
|
| "completed"
|
||||||
|
| "failed"
|
||||||
|
| "cancelled";
|
||||||
|
|
||||||
|
export type DagTask = {
|
||||||
|
name: string;
|
||||||
|
status: DagTaskStatus;
|
||||||
|
started_at: string | null;
|
||||||
|
finished_at: string | null;
|
||||||
|
duration_seconds: number | null;
|
||||||
|
parents: string[];
|
||||||
|
error: string | null;
|
||||||
|
children_total: number | null;
|
||||||
|
children_completed: number | null;
|
||||||
|
progress_pct: number | null;
|
||||||
|
};
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import { components } from "../reflector-api";
|
import { components } from "../reflector-api";
|
||||||
|
|
||||||
type ApiTranscriptStatus = components["schemas"]["GetTranscript"]["status"];
|
type ApiTranscriptStatus =
|
||||||
|
components["schemas"]["GetTranscriptWithParticipants"]["status"];
|
||||||
|
|
||||||
export type TranscriptStatus = ApiTranscriptStatus;
|
export type TranscriptStatus = ApiTranscriptStatus;
|
||||||
|
|||||||
@@ -89,3 +89,5 @@ export const assertMeetingId = (s: string): MeetingId => {
|
|||||||
// just cast for now
|
// just cast for now
|
||||||
return nes as MeetingId;
|
return nes as MeetingId;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type DailyRecordingType = "cloud" | "raw-tracks";
|
||||||
|
|||||||
79
www/app/reflector-api.d.ts
vendored
79
www/app/reflector-api.d.ts
vendored
@@ -75,6 +75,31 @@ export interface paths {
|
|||||||
patch: operations["v1_meeting_deactivate"];
|
patch: operations["v1_meeting_deactivate"];
|
||||||
trace?: never;
|
trace?: never;
|
||||||
};
|
};
|
||||||
|
"/v1/meetings/{meeting_id}/recordings/start": {
|
||||||
|
parameters: {
|
||||||
|
query?: never;
|
||||||
|
header?: never;
|
||||||
|
path?: never;
|
||||||
|
cookie?: never;
|
||||||
|
};
|
||||||
|
get?: never;
|
||||||
|
put?: never;
|
||||||
|
/**
|
||||||
|
* Start Recording
|
||||||
|
* @description Start cloud or raw-tracks recording via Daily.co REST API.
|
||||||
|
*
|
||||||
|
* Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation of allowing only 1 recording at a time.
|
||||||
|
* Uses different instanceIds for cloud vs raw-tracks (same won't work)
|
||||||
|
*
|
||||||
|
* Note: No authentication required - anonymous users supported. TODO this is a DOS vector
|
||||||
|
*/
|
||||||
|
post: operations["v1_start_recording"];
|
||||||
|
delete?: never;
|
||||||
|
options?: never;
|
||||||
|
head?: never;
|
||||||
|
patch?: never;
|
||||||
|
trace?: never;
|
||||||
|
};
|
||||||
"/v1/rooms": {
|
"/v1/rooms": {
|
||||||
parameters: {
|
parameters: {
|
||||||
query?: never;
|
query?: never;
|
||||||
@@ -1544,6 +1569,10 @@ export interface components {
|
|||||||
* @enum {string}
|
* @enum {string}
|
||||||
*/
|
*/
|
||||||
platform: "whereby" | "daily";
|
platform: "whereby" | "daily";
|
||||||
|
/** Daily Composed Video S3 Key */
|
||||||
|
daily_composed_video_s3_key?: string | null;
|
||||||
|
/** Daily Composed Video Duration */
|
||||||
|
daily_composed_video_duration?: number | null;
|
||||||
};
|
};
|
||||||
/** MeetingConsentRequest */
|
/** MeetingConsentRequest */
|
||||||
MeetingConsentRequest: {
|
MeetingConsentRequest: {
|
||||||
@@ -1818,6 +1847,19 @@ export interface components {
|
|||||||
/** Words */
|
/** Words */
|
||||||
words: components["schemas"]["Word"][];
|
words: components["schemas"]["Word"][];
|
||||||
};
|
};
|
||||||
|
/** StartRecordingRequest */
|
||||||
|
StartRecordingRequest: {
|
||||||
|
/**
|
||||||
|
* Type
|
||||||
|
* @enum {string}
|
||||||
|
*/
|
||||||
|
type: "cloud" | "raw-tracks";
|
||||||
|
/**
|
||||||
|
* Instanceid
|
||||||
|
* Format: uuid
|
||||||
|
*/
|
||||||
|
instanceId: string;
|
||||||
|
};
|
||||||
/** Stream */
|
/** Stream */
|
||||||
Stream: {
|
Stream: {
|
||||||
/** Stream Id */
|
/** Stream Id */
|
||||||
@@ -2126,6 +2168,43 @@ export interface operations {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
v1_start_recording: {
|
||||||
|
parameters: {
|
||||||
|
query?: never;
|
||||||
|
header?: never;
|
||||||
|
path: {
|
||||||
|
meeting_id: string;
|
||||||
|
};
|
||||||
|
cookie?: never;
|
||||||
|
};
|
||||||
|
requestBody: {
|
||||||
|
content: {
|
||||||
|
"application/json": components["schemas"]["StartRecordingRequest"];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
responses: {
|
||||||
|
/** @description Successful Response */
|
||||||
|
200: {
|
||||||
|
headers: {
|
||||||
|
[name: string]: unknown;
|
||||||
|
};
|
||||||
|
content: {
|
||||||
|
"application/json": {
|
||||||
|
[key: string]: unknown;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
/** @description Validation Error */
|
||||||
|
422: {
|
||||||
|
headers: {
|
||||||
|
[name: string]: unknown;
|
||||||
|
};
|
||||||
|
content: {
|
||||||
|
"application/json": components["schemas"]["HTTPValidationError"];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
v1_rooms_list: {
|
v1_rooms_list: {
|
||||||
parameters: {
|
parameters: {
|
||||||
query?: {
|
query?: {
|
||||||
|
|||||||
@@ -46,6 +46,7 @@
|
|||||||
"react-markdown": "^9.0.0",
|
"react-markdown": "^9.0.0",
|
||||||
"react-qr-code": "^2.0.12",
|
"react-qr-code": "^2.0.12",
|
||||||
"react-select-search": "^4.1.7",
|
"react-select-search": "^4.1.7",
|
||||||
|
"react-uuid-hook": "^0.0.6",
|
||||||
"redlock": "5.0.0-beta.2",
|
"redlock": "5.0.0-beta.2",
|
||||||
"remeda": "^2.31.1",
|
"remeda": "^2.31.1",
|
||||||
"sass": "^1.63.6",
|
"sass": "^1.63.6",
|
||||||
|
|||||||
25
www/pnpm-lock.yaml
generated
25
www/pnpm-lock.yaml
generated
@@ -106,6 +106,9 @@ importers:
|
|||||||
react-select-search:
|
react-select-search:
|
||||||
specifier: ^4.1.7
|
specifier: ^4.1.7
|
||||||
version: 4.1.8(prop-types@15.8.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
|
version: 4.1.8(prop-types@15.8.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
|
||||||
|
react-uuid-hook:
|
||||||
|
specifier: ^0.0.6
|
||||||
|
version: 0.0.6(react@18.3.1)
|
||||||
redlock:
|
redlock:
|
||||||
specifier: 5.0.0-beta.2
|
specifier: 5.0.0-beta.2
|
||||||
version: 5.0.0-beta.2
|
version: 5.0.0-beta.2
|
||||||
@@ -7628,6 +7631,14 @@ packages:
|
|||||||
"@types/react":
|
"@types/react":
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
|
react-uuid-hook@0.0.6:
|
||||||
|
resolution:
|
||||||
|
{
|
||||||
|
integrity: sha512-u9+EvFbqpWfLE/ReYFry0vYu1BAg1fY9ekr0XLSDNnfWyrnVFytpurwz5qYsIB0psevuvrpZHIcvu7AjUwqinA==,
|
||||||
|
}
|
||||||
|
peerDependencies:
|
||||||
|
react: ">=16.8.0"
|
||||||
|
|
||||||
react@18.3.1:
|
react@18.3.1:
|
||||||
resolution:
|
resolution:
|
||||||
{
|
{
|
||||||
@@ -8771,6 +8782,13 @@ packages:
|
|||||||
integrity: sha512-Fykw5U4eZESbq739BeLvEBFRuJODfrlmjx5eJux7W817LjRaq4b7/i4t2zxQmhcX+fAj4nMfRdTzO4tmwLKn0w==,
|
integrity: sha512-Fykw5U4eZESbq739BeLvEBFRuJODfrlmjx5eJux7W817LjRaq4b7/i4t2zxQmhcX+fAj4nMfRdTzO4tmwLKn0w==,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uuid@13.0.0:
|
||||||
|
resolution:
|
||||||
|
{
|
||||||
|
integrity: sha512-XQegIaBTVUjSHliKqcnFqYypAd4S+WCYt5NIeRs6w/UAry7z8Y9j5ZwRRL4kzq9U3sD6v+85er9FvkEaBpji2w==,
|
||||||
|
}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
uuid@8.3.2:
|
uuid@8.3.2:
|
||||||
resolution:
|
resolution:
|
||||||
{
|
{
|
||||||
@@ -14570,6 +14588,11 @@ snapshots:
|
|||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
"@types/react": 18.2.20
|
"@types/react": 18.2.20
|
||||||
|
|
||||||
|
react-uuid-hook@0.0.6(react@18.3.1):
|
||||||
|
dependencies:
|
||||||
|
react: 18.3.1
|
||||||
|
uuid: 13.0.0
|
||||||
|
|
||||||
react@18.3.1:
|
react@18.3.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
loose-envify: 1.4.0
|
loose-envify: 1.4.0
|
||||||
@@ -15401,6 +15424,8 @@ snapshots:
|
|||||||
|
|
||||||
uuid-validate@0.0.3: {}
|
uuid-validate@0.0.3: {}
|
||||||
|
|
||||||
|
uuid@13.0.0: {}
|
||||||
|
|
||||||
uuid@8.3.2: {}
|
uuid@8.3.2: {}
|
||||||
|
|
||||||
uuid@9.0.1: {}
|
uuid@9.0.1: {}
|
||||||
|
|||||||
Reference in New Issue
Block a user