Compare commits

...

32 Commits

Author SHA1 Message Date
Igor Loskutov
b1eeb651f6 fix: send last DAG_STATUS on WebSocket connect instead of skipping all
Previously all DAG_STATUS events were skipped during historical replay
on WS connect, so reconnecting clients (React strict mode remount,
page navigation) lost current DAG state. Now sends only the most
recent DAG_STATUS event on connect.
2026-02-09 15:26:59 -05:00
Igor Loskutov
499de45fdb fix: processing page reads DAG status from user room WS fallback
The processing page only read dagStatus from the transcript room WS,
which loses events during page navigation and React strict mode
double-mounting (WS torn down and reconnected, historical replay
skips DAG_STATUS). Now also consumes useDagStatusMap() from
UserEventsProvider (user room), which uses a singleton WS that
survives remounts.

Priority: transcript room WS > user room WS > REST API.
2026-02-09 15:09:29 -05:00
Igor Loskutov
b4ccbe6928 test: add WebSocket broadcast delivery tests for STATUS and DAG_STATUS
Exercises the full broadcast → pub/sub → WebSocket delivery chain
that DEBUG.md identified as potentially broken. Covers send_json
direct delivery, broadcast_event() end-to-end, and event ordering.
Also patches broadcast.py's get_ws_manager (missing from conftest).
2026-02-09 14:58:13 -05:00
Igor Loskutov
38f100a83e fix: invalidate individual transcript query on TRANSCRIPT_STATUS user event
The UserEventsProvider only invalidated the list query on status changes.
The detail page's useTranscriptGet was never refreshed, so it never
redirected to /processing on reprocess.
2026-02-09 14:28:11 -05:00
Igor Loskutov
faec509a33 fix: invalidate transcript query on STATUS websocket event
Without this, the page never redirects to /processing when a reprocess
changes status from ended->processing, because the redirect logic only
watches the REST query data, not the WebSocket status state.
2026-02-09 14:22:53 -05:00
Igor Loskutov
4d9f5fa4b4 test: remove impossible-scenario tests from DAG REST enrichment
Remove 6 tests that covered malformed data shapes (data=None,
data=string, data=list, non-dict event elements) that cannot
occur since our own code always writes well-formed dicts via
model_dump(mode="json").
2026-02-09 14:10:19 -05:00
Igor Loskutov
455cb3d099 fix: use mode="json" in add_event to serialize datetimes in event data
Prevents 'Object of type datetime is not JSON serializable' when
broadcasting DAG_STATUS events to user room via WebSocket.
2026-02-09 14:06:49 -05:00
Igor Loskutov
2410688559 fix: pass DagStatusData model instead of dict to append_event_and_broadcast
add_event() calls .model_dump() on data, so it needs a Pydantic model not a dict.
2026-02-09 14:00:43 -05:00
Igor Loskutov
6dd96bfa5e test: enhance Task 4 REST enrichment tests — malformed data, GET extraction, search integration 2026-02-09 13:56:04 -05:00
Igor Loskutov
0acaa0de93 test: add with_error_handling decorator tests for broadcast integration 2026-02-09 13:56:04 -05:00
Igor Loskutov
c45d3182ee test: enhance Task 1+3 tests — production DAG, throttling
Add 4 new tests to test_dag_progress.py:

- test_production_dag_shape: Real 15-task pipeline topology with mixed
  statuses, verifying all tasks present, topological order invariant,
  and correct parent relationships (e.g. finalize has 4 parents)
- test_topological_sort_invariant_complex_dag: 7-node DAG with wide
  branching/merging to stress-test that all parents precede children
- test_logging_throttled_by_interval: Mocks time.monotonic to verify
  ctx.log() is throttled by interval while broadcasts are not
- test_uses_broadcast_event_not_append_event_and_broadcast: Verifies
  progress uses transient broadcast_event, not persisted append variant
2026-02-09 13:56:04 -05:00
Igor Loskutov
0c06cdd117 fix: consolidate DagTask types, fix REST fallback shape, fix lint noqa
- Extract shared DagTask/DagTaskStatus types into www/app/lib/dagTypes.ts
- Re-export from useWebSockets.ts and UserEventsProvider.tsx
- Fix browse page REST fallback: dag_status is list[dict] directly, not {tasks: [...]}
- Add missing # noqa: PLC0415 for fork-safe deferred imports
2026-02-09 13:25:40 -05:00
Igor Loskutov
ebae9124b6 feat: add DAG progress dots to browse page via WebSocket events
- Add TRANSCRIPT_DAG_STATUS handler to UserEventsProvider with
  DagStatusContext and useDagStatusMap hook for live DAG task updates
- Clean up dagStatusMap entries when TRANSCRIPT_STATUS transitions
  away from "processing"
- Create DagProgressDots component rendering color-coded dots per
  DAG task (green=completed, blue pulsing=running, hollow=queued,
  red=failed, gray=cancelled) with humanized tooltip names
- Wire dagStatusMap through browse page -> TranscriptCards ->
  TranscriptStatusIcon, falling back to REST dag_status field
2026-02-09 13:22:29 -05:00
Igor Loskutov
a6a5d35e44 feat: add DAG progress WebSocket handlers and processing page table
Add DAG_STATUS and DAG_TASK_PROGRESS event handlers to useWebSockets
hook with exported DagTask/DagTaskStatus types. Create DagProgressTable
component with status icons, live elapsed timers, progress bars, and
expandable error rows. Wire into processing page with REST fallback.
2026-02-09 13:22:25 -05:00
Igor Loskutov
025e6da539 feat: add dag_status REST enrichment to search and transcript GET 2026-02-09 13:12:05 -05:00
Igor Loskutov
4b79b0c989 feat: add broadcast_dag_status, decorator integration, and mixdown progress
- Add broadcast_dag_status() to dag_progress.py: fetches Hatchet run
  details, transforms to DagStatusData, and broadcasts DAG_STATUS event
  via WebSocket. Fire-and-forget with exception swallowing.
- Modify with_error_handling decorator to call broadcast_dag_status on
  both task success and failure.
- Add DAG_STATUS to USER_ROOM_EVENTS (broadcast.py) and reconnect
  filter (transcripts_websocket.py) to avoid replaying stale DAG state.
- Add initial DAG broadcast at workflow dispatch (transcript_process.py).
- Extend make_audio_progress_logger with optional transcript_id param
  for transient DAG_TASK_PROGRESS events during mixdown.
- All deferred imports for fork-safety, all broadcasts fire-and-forget.
2026-02-09 13:12:01 -05:00
Igor Loskutov
a359c845ff feat: add DagTask models and extract_dag_tasks transform
Foundation for DAG progress reporting to frontend. Ported topo sort
and task extraction from render_hatchet_run.py (Zulip worktree) to
produce structured Pydantic models instead of markdown.
2026-02-09 12:50:53 -05:00
cd2255cfbc chore(main): release 0.33.0 (#847) 2026-02-06 18:12:06 -05:00
15ab2e306e feat: Daily+hatchet default (#846)
* feat: set Daily as default video platform

Daily.co has been battle-tested and is ready to be the default.
Whereby remains available for rooms that explicitly set it.

* feat: enforce Hatchet for all multitrack processing

Remove use_celery option from rooms - multitrack (Daily) recordings
now always use Hatchet workflows. Celery remains for single-track
(Whereby) file processing only.

- Remove use_celery column from room table
- Simplify dispatch logic to always use Hatchet for multitracks
- Update tests to mock Hatchet instead of Celery

* fix: update whereby test to patch Hatchet instead of removed Celery import

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-02-05 18:38:08 -05:00
1ce1c7a910 fix: websocket tests (#825)
* fix websocket tests

* fix: restore timeout and fix celery test infrastructure

- Re-add timeout=1.0 to ws_manager pubsub loop (prevents CPU spin?)
- Use Redis for Celery tests (memory:// broker doesn't support chords)
- Add timeout param to in-memory subscriber mock
- Remove duplicate celery_includes fixture from rtc_ws tests

* fix: remove redundant inline imports in test files

* fix: update gitleaks ignore for moved s3_key line

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-02-05 14:23:31 -05:00
Rémi Pauchet
984795357e - fix nvidia repo blocked by apt (sha1) (#845)
- use build cache for apt and uv
- limit concurency for uv to prevent crashes with too many cores
2026-02-05 13:59:34 -05:00
fa3cf5da0f chore(main): release 0.32.2 (#842) 2026-02-03 22:05:22 -05:00
8707c6694a fix: use Daily API recording.duration as master source for transcript duration (#844)
Set duration early in get_participants from Daily API (seconds -> ms),
ensuring post_zulip has the value before mixdown_tracks completes.

Removes redundant duration update from mixdown_tracks.

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-02-03 17:15:03 -05:00
4acde4b7fd fix: increase TIMEOUT_MEDIUM from 2m to 5m for LLM tasks (#843)
Topic detection was timing out on longer transcripts when LLM
responses are slow. This affects detect_chunk_topic and other
LLM-calling tasks that use TIMEOUT_MEDIUM.

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-02-03 16:05:16 -05:00
a2ed7d60d5 fix: make caddy optional (#841) 2026-02-03 00:18:47 +01:00
a08f94a5bf chore(main): release 0.32.1 (#840) 2026-01-30 17:34:48 -05:00
Igor Loskutov
c05d1f03cd fix: match httpx pad with hatchet audio timeout 2026-01-30 15:56:18 -05:00
Igor Loskutov
23eb1371cb fix: daily multitrack pipeline finalze dependency fix 2026-01-30 15:19:27 -05:00
2592e369f6 chore(main): release 0.32.0 (#838) 2026-01-30 13:13:59 -05:00
7fde64e252 feat: modal padding (#837)
* Add Modal backend for audio padding

- Create reflector_padding.py Modal deployment (CPU-based)
- Add PaddingWorkflow with conditional Modal/local backend
- Update deploy-all.sh to include padding deployment

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-01-30 13:11:51 -05:00
2ca624f052 chore(main): release 0.31.0 (#835) 2026-01-26 13:07:29 -05:00
fc3ef6c893 feat: mixdown optional (#834)
* optional mixdown

* optional mixdown

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2026-01-23 15:51:18 -05:00
61 changed files with 3811 additions and 474 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.DS_Store
server/.env
server/.env.production
.env
Caddyfile
server/exportdanswer

View File

@@ -4,3 +4,4 @@ docs/docs/installation/daily-setup.md:curl-auth-header:277
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:74
gpu/self_hosted/DEV_SETUP.md:curl-auth-header:83
server/reflector/worker/process.py:generic-api-key:465
server/reflector/worker/process.py:generic-api-key:594

View File

@@ -1,5 +1,48 @@
# Changelog
## [0.33.0](https://github.com/Monadical-SAS/reflector/compare/v0.32.2...v0.33.0) (2026-02-05)
### Features
* Daily+hatchet default ([#846](https://github.com/Monadical-SAS/reflector/issues/846)) ([15ab2e3](https://github.com/Monadical-SAS/reflector/commit/15ab2e306eacf575494b4b5d2b2ad779d44a1c7f))
### Bug Fixes
* websocket tests ([#825](https://github.com/Monadical-SAS/reflector/issues/825)) ([1ce1c7a](https://github.com/Monadical-SAS/reflector/commit/1ce1c7a910b6c374115d2437b17f9d288ef094dc))
## [0.32.2](https://github.com/Monadical-SAS/reflector/compare/v0.32.1...v0.32.2) (2026-02-03)
### Bug Fixes
* increase TIMEOUT_MEDIUM from 2m to 5m for LLM tasks ([#843](https://github.com/Monadical-SAS/reflector/issues/843)) ([4acde4b](https://github.com/Monadical-SAS/reflector/commit/4acde4b7fdef88cc02ca12cf38c9020b05ed96ac))
* make caddy optional ([#841](https://github.com/Monadical-SAS/reflector/issues/841)) ([a2ed7d6](https://github.com/Monadical-SAS/reflector/commit/a2ed7d60d557b551a5b64e4dfd909b63a791d9fc))
* use Daily API recording.duration as master source for transcript duration ([#844](https://github.com/Monadical-SAS/reflector/issues/844)) ([8707c66](https://github.com/Monadical-SAS/reflector/commit/8707c6694a80c939b6214bbc13331741f192e082))
## [0.32.1](https://github.com/Monadical-SAS/reflector/compare/v0.32.0...v0.32.1) (2026-01-30)
### Bug Fixes
* daily multitrack pipeline finalze dependency fix ([23eb137](https://github.com/Monadical-SAS/reflector/commit/23eb1371cb9348c4b81eb12ad506b582f8a4799e))
* match httpx pad with hatchet audio timeout ([c05d1f0](https://github.com/Monadical-SAS/reflector/commit/c05d1f03cd8369fc06efd455527e50246887efd0))
## [0.32.0](https://github.com/Monadical-SAS/reflector/compare/v0.31.0...v0.32.0) (2026-01-30)
### Features
* modal padding ([#837](https://github.com/Monadical-SAS/reflector/issues/837)) ([7fde64e](https://github.com/Monadical-SAS/reflector/commit/7fde64e2529a1d37b0f7507c62d983a7bd0b5b89))
## [0.31.0](https://github.com/Monadical-SAS/reflector/compare/v0.30.0...v0.31.0) (2026-01-23)
### Features
* mixdown optional ([#834](https://github.com/Monadical-SAS/reflector/issues/834)) ([fc3ef6c](https://github.com/Monadical-SAS/reflector/commit/fc3ef6c8933231c731fad84e7477a476a6220a5e))
## [0.30.0](https://github.com/Monadical-SAS/reflector/compare/v0.29.0...v0.30.0) (2026-01-23)

View File

@@ -1,6 +1,8 @@
# Reflector Caddyfile
# Replace example.com with your actual domains
# CORS is handled by the backend - Caddy just proxies
# Reflector Caddyfile (optional reverse proxy)
# Use this only when you run Caddy via: docker compose -f docker-compose.prod.yml --profile caddy up -d
# If Coolify, Traefik, or nginx already use ports 80/443, do NOT start Caddy; point your proxy at web:3000 and server:1250.
#
# Replace example.com with your actual domains. CORS is handled by the backend - Caddy just proxies.
#
# For environment variable substitution, set:
# FRONTEND_DOMAIN=app.example.com

View File

@@ -1,9 +1,14 @@
# Production Docker Compose configuration
# Usage: docker compose -f docker-compose.prod.yml up -d
#
# Caddy (reverse proxy on ports 80/443) is OPTIONAL and behind the "caddy" profile:
# - With Caddy (self-hosted, you manage SSL): docker compose -f docker-compose.prod.yml --profile caddy up -d
# - Without Caddy (Coolify/Traefik/nginx already on 80/443): docker compose -f docker-compose.prod.yml up -d
# Then point your proxy at web:3000 (frontend) and server:1250 (API).
#
# Prerequisites:
# 1. Copy .env.example to .env and configure for both server/ and www/
# 2. Copy Caddyfile.example to Caddyfile and edit with your domains
# 2. If using Caddy: copy Caddyfile.example to Caddyfile and edit your domains
# 3. Deploy Modal GPU functions (see gpu/modal_deployments/deploy-all.sh)
services:
@@ -84,6 +89,8 @@ services:
retries: 3
caddy:
profiles:
- caddy
image: caddy:2-alpine
restart: unless-stopped
ports:

View File

@@ -11,15 +11,15 @@ This page documents the Docker Compose configuration for Reflector. For the comp
The `docker-compose.prod.yml` includes these services:
| Service | Image | Purpose |
|---------|-------|---------|
| `web` | `monadicalsas/reflector-frontend` | Next.js frontend |
| `server` | `monadicalsas/reflector-backend` | FastAPI backend |
| `worker` | `monadicalsas/reflector-backend` | Celery worker for background tasks |
| `beat` | `monadicalsas/reflector-backend` | Celery beat scheduler |
| `redis` | `redis:7.2-alpine` | Message broker and cache |
| `postgres` | `postgres:17-alpine` | Primary database |
| `caddy` | `caddy:2-alpine` | Reverse proxy with auto-SSL |
| Service | Image | Purpose |
| ---------- | --------------------------------- | --------------------------------------------------------------------------- |
| `web` | `monadicalsas/reflector-frontend` | Next.js frontend |
| `server` | `monadicalsas/reflector-backend` | FastAPI backend |
| `worker` | `monadicalsas/reflector-backend` | Celery worker for background tasks |
| `beat` | `monadicalsas/reflector-backend` | Celery beat scheduler |
| `redis` | `redis:7.2-alpine` | Message broker and cache |
| `postgres` | `postgres:17-alpine` | Primary database |
| `caddy` | `caddy:2-alpine` | Reverse proxy with auto-SSL (optional; see [Caddy profile](#caddy-profile)) |
## Environment Files
@@ -30,6 +30,7 @@ Reflector uses two separate environment files:
Used by: `server`, `worker`, `beat`
Key variables:
```env
# Database connection
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
@@ -54,6 +55,7 @@ TRANSCRIPT_MODAL_API_KEY=...
Used by: `web`
Key variables:
```env
# Domain configuration
SITE_URL=https://app.example.com
@@ -70,26 +72,42 @@ Note: `API_URL` is used client-side (browser), `SERVER_API_URL` is used server-s
## Volumes
| Volume | Purpose |
|--------|---------|
| `redis_data` | Redis persistence |
| `postgres_data` | PostgreSQL data |
| `server_data` | Uploaded files, local storage |
| `caddy_data` | SSL certificates |
| `caddy_config` | Caddy configuration |
| Volume | Purpose |
| --------------- | ----------------------------- |
| `redis_data` | Redis persistence |
| `postgres_data` | PostgreSQL data |
| `server_data` | Uploaded files, local storage |
| `caddy_data` | SSL certificates |
| `caddy_config` | Caddy configuration |
## Network
All services share the default network. The network is marked `attachable: true` to allow external containers (like Authentik) to join.
## Caddy profile
Caddy (ports 80 and 443) is **optional** and behind the `caddy` profile so it does not conflict with an existing reverse proxy (e.g. Coolify, Traefik, nginx).
- **With Caddy** (you want Reflector to handle SSL):
`docker compose -f docker-compose.prod.yml --profile caddy up -d`
- **Without Caddy** (Coolify or another proxy already on 80/443):
`docker compose -f docker-compose.prod.yml up -d`
Then configure your proxy to send traffic to `web:3000` (frontend) and `server:1250` (API).
## Common Commands
### Start all services
```bash
# Without Caddy (e.g. when using Coolify)
docker compose -f docker-compose.prod.yml up -d
# With Caddy as reverse proxy
docker compose -f docker-compose.prod.yml --profile caddy up -d
```
### View logs
```bash
# All services
docker compose -f docker-compose.prod.yml logs -f
@@ -99,6 +117,7 @@ docker compose -f docker-compose.prod.yml logs server --tail 50
```
### Restart a service
```bash
# Quick restart (doesn't reload .env changes)
docker compose -f docker-compose.prod.yml restart server
@@ -108,27 +127,32 @@ docker compose -f docker-compose.prod.yml up -d server
```
### Run database migrations
```bash
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
```
### Access database
```bash
docker compose -f docker-compose.prod.yml exec postgres psql -U reflector
```
### Pull latest images
```bash
docker compose -f docker-compose.prod.yml pull
docker compose -f docker-compose.prod.yml up -d
```
### Stop all services
```bash
docker compose -f docker-compose.prod.yml down
```
### Full reset (WARNING: deletes data)
```bash
docker compose -f docker-compose.prod.yml down -v
```
@@ -187,6 +211,7 @@ The Caddyfile supports environment variable substitution:
Set `FRONTEND_DOMAIN` and `API_DOMAIN` environment variables, or edit the file directly.
### Reload Caddy after changes
```bash
docker compose -f docker-compose.prod.yml exec caddy caddy reload --config /etc/caddy/Caddyfile
```

View File

@@ -26,7 +26,7 @@ flowchart LR
Before starting, you need:
- **Production server** - 4+ cores, 8GB+ RAM, public IP
- **Production server** - 4+ cores, 8GB+ RAM, public IP
- **Two domain names** - e.g., `app.example.com` (frontend) and `api.example.com` (backend)
- **GPU processing** - Choose one:
- Modal.com account, OR
@@ -60,16 +60,17 @@ Type: A Name: api Value: <your-server-ip>
Reflector requires GPU processing for transcription and speaker diarization. Choose one option:
| | **Modal.com (Cloud)** | **Self-Hosted GPU** |
|---|---|---|
| | **Modal.com (Cloud)** | **Self-Hosted GPU** |
| ------------ | --------------------------------- | ---------------------------- |
| **Best for** | No GPU hardware, zero maintenance | Own GPU server, full control |
| **Pricing** | Pay-per-use | Fixed infrastructure cost |
| **Pricing** | Pay-per-use | Fixed infrastructure cost |
### Option A: Modal.com (Serverless Cloud GPU)
#### Accept HuggingFace Licenses
Visit both pages and click "Accept":
- https://huggingface.co/pyannote/speaker-diarization-3.1
- https://huggingface.co/pyannote/segmentation-3.0
@@ -179,6 +180,7 @@ Save these credentials - you'll need them in the next step.
## Configure Environment
Reflector has two env files:
- `server/.env` - Backend configuration
- `www/.env` - Frontend configuration
@@ -190,6 +192,7 @@ nano server/.env
```
**Required settings:**
```env
# Database (defaults work with docker-compose.prod.yml)
DATABASE_URL=postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
@@ -249,6 +252,7 @@ nano www/.env
```
**Required settings:**
```env
# Your domains
SITE_URL=https://app.example.com
@@ -266,7 +270,11 @@ FEATURE_REQUIRE_LOGIN=false
---
## Configure Caddy
## Reverse proxy (Caddy or existing)
**If Coolify, Traefik, or nginx already use ports 80/443** (e.g. Coolify on your host): skip Caddy. Start the stack without the Caddy profile (see [Start Services](#start-services) below), then point your proxy at `web:3000` (frontend) and `server:1250` (API).
**If you want Reflector to provide the reverse proxy and SSL:**
```bash
cp Caddyfile.example Caddyfile
@@ -289,10 +297,18 @@ Replace `example.com` with your domains. The `{$VAR:default}` syntax uses Caddy'
## Start Services
**Without Caddy** (e.g. Coolify already on 80/443):
```bash
docker compose -f docker-compose.prod.yml up -d
```
**With Caddy** (Reflector handles SSL):
```bash
docker compose -f docker-compose.prod.yml --profile caddy up -d
```
Wait for containers to start (first run may take 1-2 minutes to pull images and initialize).
---
@@ -300,18 +316,21 @@ Wait for containers to start (first run may take 1-2 minutes to pull images and
## Verify Deployment
### Check services
```bash
docker compose -f docker-compose.prod.yml ps
# All should show "Up"
```
### Test API
```bash
curl https://api.example.com/health
# Should return: {"status":"healthy"}
```
### Test Frontend
- Visit https://app.example.com
- You should see the Reflector interface
- Try uploading an audio file to test transcription
@@ -327,6 +346,7 @@ By default, Reflector is open (no login required). **Authentication is required
See [Authentication Setup](./auth-setup) for full Authentik OAuth configuration.
Quick summary:
1. Deploy Authentik on your server
2. Create OAuth provider in Authentik
3. Extract public key for JWT verification
@@ -358,6 +378,7 @@ DAILYCO_STORAGE_AWS_ROLE_ARN=<arn:aws:iam::ACCOUNT:role/DailyCo>
```
Reload env and restart:
```bash
docker compose -f docker-compose.prod.yml up -d server worker
```
@@ -367,35 +388,43 @@ docker compose -f docker-compose.prod.yml up -d server worker
## Troubleshooting
### Check logs for errors
```bash
docker compose -f docker-compose.prod.yml logs server --tail 20
docker compose -f docker-compose.prod.yml logs worker --tail 20
```
### Services won't start
```bash
docker compose -f docker-compose.prod.yml logs
```
### CORS errors in browser
- Verify `CORS_ORIGIN` in `server/.env` matches your frontend domain exactly (including `https://`)
- Reload env: `docker compose -f docker-compose.prod.yml up -d server`
### SSL certificate errors
### SSL certificate errors (when using Caddy)
- Caddy auto-provisions Let's Encrypt certificates
- Ensure ports 80 and 443 are open
- Ensure ports 80 and 443 are open and not used by another proxy
- Check: `docker compose -f docker-compose.prod.yml logs caddy`
- If port 80 is already in use (e.g. by Coolify), run without Caddy: `docker compose -f docker-compose.prod.yml up -d` and use your existing proxy
### Transcription not working
- Check Modal dashboard: https://modal.com/apps
- Verify URLs in `server/.env` match deployed functions
- Check worker logs: `docker compose -f docker-compose.prod.yml logs worker`
### "Login required" but auth not configured
- Set `FEATURE_REQUIRE_LOGIN=false` in `www/.env`
- Rebuild frontend: `docker compose -f docker-compose.prod.yml up -d --force-recreate web`
### Database migrations or connectivity issues
Migrations run automatically on server startup. To check database connectivity or debug migration failures:
```bash
@@ -408,4 +437,3 @@ docker compose -f docker-compose.prod.yml exec server uv run python -c "from ref
# Manually run migrations (if needed)
docker compose -f docker-compose.prod.yml exec server uv run alembic upgrade head
```

View File

@@ -131,6 +131,15 @@ if [ -z "$DIARIZER_URL" ]; then
fi
echo " -> $DIARIZER_URL"
echo ""
echo "Deploying padding (CPU audio processing via Modal SDK)..."
modal deploy reflector_padding.py
if [ $? -ne 0 ]; then
echo "Error: Failed to deploy padding. Check Modal dashboard for details."
exit 1
fi
echo " -> reflector-padding.pad_track (Modal SDK function)"
# --- Output Configuration ---
echo ""
echo "=========================================="
@@ -147,4 +156,6 @@ echo ""
echo "DIARIZATION_BACKEND=modal"
echo "DIARIZATION_URL=$DIARIZER_URL"
echo "DIARIZATION_MODAL_API_KEY=$API_KEY"
echo ""
echo "# Padding uses Modal SDK (requires MODAL_TOKEN_ID/SECRET in worker containers)"
echo "# --- End Modal Configuration ---"

View File

@@ -0,0 +1,277 @@
"""
Reflector GPU backend - audio padding
======================================
CPU-intensive audio padding service for adding silence to audio tracks.
Uses PyAV filter graph (adelay) for precise track synchronization.
IMPORTANT: This padding logic is duplicated from server/reflector/utils/audio_padding.py
for Modal deployment isolation (Modal can't import from server/reflector/). If you modify
the PyAV filter graph or padding algorithm, you MUST update both:
- gpu/modal_deployments/reflector_padding.py (this file)
- server/reflector/utils/audio_padding.py
Constants duplicated from server/reflector/utils/audio_constants.py for same reason.
"""
import os
import tempfile
from fractions import Fraction
import math
import asyncio
import modal
S3_TIMEOUT = 60 # happens 2 times
PADDING_TIMEOUT = 600 + (S3_TIMEOUT * 2)
SCALEDOWN_WINDOW = 60 # The maximum duration (in seconds) that individual containers can remain idle when scaling down.
DISCONNECT_CHECK_INTERVAL = 2 # Check for client disconnect
app = modal.App("reflector-padding")
# CPU-based image
image = (
modal.Image.debian_slim(python_version="3.12")
.apt_install("ffmpeg") # Required by PyAV
.pip_install(
"av==13.1.0", # PyAV for audio processing
"requests==2.32.3", # HTTP for presigned URL downloads/uploads
"fastapi==0.115.12", # API framework
)
)
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
OPUS_STANDARD_SAMPLE_RATE = 48000
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
OPUS_DEFAULT_BIT_RATE = 128000
@app.function(
cpu=2.0,
timeout=PADDING_TIMEOUT,
scaledown_window=SCALEDOWN_WINDOW,
image=image,
)
@modal.asgi_app()
def web():
from fastapi import FastAPI, Request, HTTPException
from pydantic import BaseModel
class PaddingRequest(BaseModel):
track_url: str
output_url: str
start_time_seconds: float
track_index: int
class PaddingResponse(BaseModel):
size: int
cancelled: bool = False
web_app = FastAPI()
@web_app.post("/pad")
async def pad_track_endpoint(request: Request, req: PaddingRequest) -> PaddingResponse:
"""Modal web endpoint for padding audio tracks with disconnect detection.
"""
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
if not req.track_url:
raise HTTPException(status_code=400, detail="track_url cannot be empty")
if not req.output_url:
raise HTTPException(status_code=400, detail="output_url cannot be empty")
if req.start_time_seconds <= 0:
raise HTTPException(status_code=400, detail=f"start_time_seconds must be positive, got {req.start_time_seconds}")
if req.start_time_seconds > 18000:
raise HTTPException(status_code=400, detail=f"start_time_seconds exceeds maximum 18000s (5 hours)")
logger.info(f"Padding request: track {req.track_index}, delay={req.start_time_seconds}s")
# Thread-safe cancellation flag shared between async disconnect checker and blocking thread
import threading
cancelled = threading.Event()
async def check_disconnect():
"""Background task to check for client disconnect every 2 seconds."""
while not cancelled.is_set():
await asyncio.sleep(DISCONNECT_CHECK_INTERVAL)
if await request.is_disconnected():
logger.warning("Client disconnected, setting cancellation flag")
cancelled.set()
break
# Start disconnect checker in background
disconnect_task = asyncio.create_task(check_disconnect())
try:
result = await asyncio.get_event_loop().run_in_executor(
None, _pad_track_blocking, req, cancelled, logger
)
return PaddingResponse(**result)
finally:
cancelled.set()
disconnect_task.cancel()
try:
await disconnect_task
except asyncio.CancelledError:
pass
def _pad_track_blocking(req, cancelled, logger) -> dict:
"""Blocking CPU-bound padding work with periodic cancellation checks.
Args:
cancelled: threading.Event for thread-safe cancellation signaling
"""
import av
import requests
from av.audio.resampler import AudioResampler
import time
temp_dir = tempfile.mkdtemp()
input_path = None
output_path = None
last_check = time.time()
try:
logger.info("Downloading track for padding")
response = requests.get(req.track_url, stream=True, timeout=S3_TIMEOUT)
response.raise_for_status()
input_path = os.path.join(temp_dir, "track.webm")
total_bytes = 0
chunk_count = 0
with open(input_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
total_bytes += len(chunk)
chunk_count += 1
# Check for cancellation every arbitrary amount of chunks
if chunk_count % 12 == 0:
now = time.time()
if now - last_check >= DISCONNECT_CHECK_INTERVAL:
if cancelled.is_set():
logger.info("Cancelled during download, exiting early")
return {"size": 0, "cancelled": True}
last_check = now
logger.info(f"Track downloaded: {total_bytes} bytes")
if cancelled.is_set():
logger.info("Cancelled after download, exiting early")
return {"size": 0, "cancelled": True}
# Apply padding using PyAV
output_path = os.path.join(temp_dir, "padded.webm")
delay_ms = math.floor(req.start_time_seconds * 1000)
logger.info(f"Padding track {req.track_index} with {delay_ms}ms delay using PyAV")
in_container = av.open(input_path)
in_stream = next((s for s in in_container.streams if s.type == "audio"), None)
if in_stream is None:
raise ValueError("No audio stream in input")
with av.open(output_path, "w", format="webm") as out_container:
out_stream = out_container.add_stream("libopus", rate=OPUS_STANDARD_SAMPLE_RATE)
out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
graph = av.filter.Graph()
abuf_args = (
f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
f"sample_fmt=s16:"
f"channel_layout=stereo"
)
src = graph.add("abuffer", args=abuf_args, name="src")
aresample_f = graph.add("aresample", args="async=1", name="ares")
delays_arg = f"{delay_ms}|{delay_ms}"
adelay_f = graph.add("adelay", args=f"delays={delays_arg}:all=1", name="delay")
sink = graph.add("abuffersink", name="sink")
src.link_to(aresample_f)
aresample_f.link_to(adelay_f)
adelay_f.link_to(sink)
graph.configure()
resampler = AudioResampler(
format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
)
for frame in in_container.decode(in_stream):
# Check for cancellation periodically
now = time.time()
if now - last_check >= DISCONNECT_CHECK_INTERVAL:
if cancelled.is_set():
logger.info("Cancelled during processing, exiting early")
in_container.close()
return {"size": 0, "cancelled": True}
last_check = now
out_frames = resampler.resample(frame) or []
for rframe in out_frames:
rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
src.push(rframe)
while True:
try:
f_out = sink.pull()
except Exception:
break
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
for packet in out_stream.encode(f_out):
out_container.mux(packet)
# Flush filter graph
src.push(None)
while True:
try:
f_out = sink.pull()
except Exception:
break
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
for packet in out_stream.encode(f_out):
out_container.mux(packet)
# Flush encoder
for packet in out_stream.encode(None):
out_container.mux(packet)
in_container.close()
file_size = os.path.getsize(output_path)
logger.info(f"Padding complete: {file_size} bytes")
logger.info("Uploading padded track to S3")
with open(output_path, "rb") as f:
upload_response = requests.put(req.output_url, data=f, timeout=S3_TIMEOUT)
upload_response.raise_for_status()
logger.info(f"Upload complete: {file_size} bytes")
return {"size": file_size}
finally:
if input_path and os.path.exists(input_path):
try:
os.unlink(input_path)
except Exception as e:
logger.warning(f"Failed to cleanup input file: {e}")
if output_path and os.path.exists(output_path):
try:
os.unlink(output_path)
except Exception as e:
logger.warning(f"Failed to cleanup output file: {e}")
try:
os.rmdir(temp_dir)
except Exception as e:
logger.warning(f"Failed to cleanup temp directory: {e}")
return web_app

View File

@@ -4,27 +4,31 @@ ENV PYTHONUNBUFFERED=1 \
UV_LINK_MODE=copy \
UV_NO_CACHE=1
# patch until nvidia updates the sha1 repo
ADD sequoia.config /etc/crypto-policies/back-ends/sequoia.config
WORKDIR /tmp
RUN apt-get update \
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update \
&& apt-get install -y \
ffmpeg \
curl \
ca-certificates \
gnupg \
wget \
&& apt-get clean
wget
# Add NVIDIA CUDA repo for Debian 12 (bookworm) and install cuDNN 9 for CUDA 12
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb /cuda-keyring.deb
RUN dpkg -i /cuda-keyring.deb \
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
dpkg -i /cuda-keyring.deb \
&& rm /cuda-keyring.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
cuda-cudart-12-6 \
libcublas-12-6 \
libcudnn9-cuda-12 \
libcudnn9-dev-cuda-12 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
libcudnn9-dev-cuda-12
ADD https://astral.sh/uv/install.sh /uv-installer.sh
RUN sh /uv-installer.sh && rm /uv-installer.sh
ENV PATH="/root/.local/bin/:$PATH"
@@ -39,6 +43,13 @@ COPY ./app /app/app
COPY ./main.py /app/
COPY ./runserver.sh /app/
# prevent uv failing with too many open files on big cpus
ENV UV_CONCURRENT_INSTALLS=16
# first install
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --compile-bytecode --locked
EXPOSE 8000
CMD ["sh", "/app/runserver.sh"]

View File

@@ -0,0 +1,2 @@
[hash_algorithms]
sha1 = "always"

View File

@@ -0,0 +1,35 @@
"""drop_use_celery_column
Revision ID: 3aa20b96d963
Revises: e69f08ead8ea
Create Date: 2026-02-05 10:12:44.065279
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "3aa20b96d963"
down_revision: Union[str, None] = "e69f08ead8ea"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.drop_column("use_celery")
def downgrade() -> None:
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.add_column(
sa.Column(
"use_celery",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
)
)

View File

@@ -8,7 +8,7 @@ readme = "README.md"
dependencies = [
"aiohttp>=3.9.0",
"aiohttp-cors>=0.7.0",
"av>=10.0.0",
"av>=15.0.0",
"requests>=2.31.0",
"aiortc>=1.5.0",
"sortedcontainers>=2.4.0",

View File

@@ -57,12 +57,6 @@ rooms = sqlalchemy.Table(
sqlalchemy.String,
nullable=False,
),
sqlalchemy.Column(
"use_celery",
sqlalchemy.Boolean,
nullable=False,
server_default=false(),
),
sqlalchemy.Column(
"skip_consent",
sqlalchemy.Boolean,
@@ -97,7 +91,6 @@ class Room(BaseModel):
ics_last_sync: datetime | None = None
ics_last_etag: str | None = None
platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
use_celery: bool = False
skip_consent: bool = False

View File

@@ -1,6 +1,7 @@
"""Search functionality for transcripts and other entities."""
import itertools
import json
from dataclasses import dataclass
from datetime import datetime
from io import StringIO
@@ -172,6 +173,9 @@ class SearchResult(BaseModel):
total_match_count: NonNegativeInt = Field(
default=0, description="Total number of matches found in the transcript"
)
dag_status: list[dict] | None = Field(
default=None, description="Latest DAG task status for processing transcripts"
)
@field_serializer("created_at", when_used="json")
def serialize_datetime(self, dt: datetime) -> str:
@@ -328,6 +332,42 @@ class SnippetGenerator:
return summary_snippets + webvtt_snippets, total_matches
async def _fetch_dag_statuses(transcript_ids: list[str]) -> dict[str, list[dict]]:
"""Fetch latest DAG_STATUS event data for given transcript IDs.
Returns dict mapping transcript_id -> tasks list from the last DAG_STATUS event.
"""
if not transcript_ids:
return {}
db = get_database()
query = sqlalchemy.select(
[
transcripts.c.id,
transcripts.c.events,
]
).where(transcripts.c.id.in_(transcript_ids))
rows = await db.fetch_all(query)
result: dict[str, list[dict]] = {}
for row in rows:
events_raw = row["events"]
if not events_raw:
continue
# events is stored as JSON list
events = events_raw if isinstance(events_raw, list) else json.loads(events_raw)
# Find last DAG_STATUS event
for ev in reversed(events):
if isinstance(ev, dict) and ev.get("event") == "DAG_STATUS":
tasks = ev.get("data", {}).get("tasks")
if tasks:
result[row["id"]] = tasks
break
return result
class SearchController:
"""Controller for search operations across different entities."""
@@ -470,6 +510,14 @@ class SearchController:
logger.error(f"Error processing search results: {e}", exc_info=True)
raise
# Enrich processing transcripts with DAG status
processing_ids = [r.id for r in results if r.status == "processing"]
if processing_ids:
dag_statuses = await _fetch_dag_statuses(processing_ids)
for r in results:
if r.id in dag_statuses:
r.dag_status = dag_statuses[r.id]
return results, total

View File

@@ -234,7 +234,7 @@ class Transcript(BaseModel):
return dt.isoformat()
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
ev = TranscriptEvent(event=event, data=data.model_dump())
ev = TranscriptEvent(event=event, data=data.model_dump(mode="json"))
self.events.append(ev)
return ev

View File

@@ -15,7 +15,7 @@ from reflector.utils.string import NonEmptyString
from reflector.ws_manager import get_ws_manager
# Events that should also be sent to user room (matches Celery behavior)
USER_ROOM_EVENTS = {"STATUS", "FINAL_TITLE", "DURATION"}
USER_ROOM_EVENTS = {"STATUS", "FINAL_TITLE", "DURATION", "DAG_STATUS"}
async def broadcast_event(

View File

@@ -35,7 +35,9 @@ LLM_RATE_LIMIT_PER_SECOND = 10
# Task execution timeouts (seconds)
TIMEOUT_SHORT = 60 # Quick operations: API calls, DB updates
TIMEOUT_MEDIUM = 120 # Single LLM calls, waveform generation
TIMEOUT_MEDIUM = (
300 # Single LLM calls, waveform generation (5m for slow LLM responses)
)
TIMEOUT_LONG = 180 # Action items (larger context LLM)
TIMEOUT_AUDIO = 300 # Audio processing: padding, mixdown
TIMEOUT_AUDIO = 720 # Audio processing: padding, mixdown
TIMEOUT_HEAVY = 600 # Transcription, fan-out LLM tasks

View File

@@ -0,0 +1,230 @@
"""
DAG Progress Reporting — models and transform.
Converts Hatchet V1WorkflowRunDetails into structured DagTask list
for frontend WebSocket/REST consumption.
Ported from render_hatchet_run.py (feat-dag-zulip) which renders markdown;
this module produces structured Pydantic models instead.
"""
from datetime import datetime
from enum import StrEnum
from hatchet_sdk.clients.rest.models import (
V1TaskStatus,
V1WorkflowRunDetails,
WorkflowRunShapeItemForWorkflowRunDetails,
)
from pydantic import BaseModel
class DagTaskStatus(StrEnum):
QUEUED = "queued"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
_HATCHET_TO_DAG_STATUS: dict[V1TaskStatus, DagTaskStatus] = {
V1TaskStatus.QUEUED: DagTaskStatus.QUEUED,
V1TaskStatus.RUNNING: DagTaskStatus.RUNNING,
V1TaskStatus.COMPLETED: DagTaskStatus.COMPLETED,
V1TaskStatus.FAILED: DagTaskStatus.FAILED,
V1TaskStatus.CANCELLED: DagTaskStatus.CANCELLED,
}
class DagTask(BaseModel):
name: str
status: DagTaskStatus
started_at: datetime | None
finished_at: datetime | None
duration_seconds: float | None
parents: list[str]
error: str | None
children_total: int | None
children_completed: int | None
progress_pct: float | None
class DagStatusData(BaseModel):
workflow_run_id: str
tasks: list[DagTask]
def _topo_sort(
shape: list[WorkflowRunShapeItemForWorkflowRunDetails],
) -> list[str]:
"""Topological sort of step_ids from shape DAG (Kahn's algorithm).
Ported from render_hatchet_run.py.
"""
step_ids = {s.step_id for s in shape}
children_map: dict[str, list[str]] = {}
in_degree: dict[str, int] = {sid: 0 for sid in step_ids}
for s in shape:
children = [c for c in (s.children_step_ids or []) if c in step_ids]
children_map[s.step_id] = children
for c in children:
in_degree[c] += 1
queue = sorted(sid for sid, deg in in_degree.items() if deg == 0)
result: list[str] = []
while queue:
node = queue.pop(0)
result.append(node)
for c in children_map.get(node, []):
in_degree[c] -= 1
if in_degree[c] == 0:
queue.append(c)
queue.sort()
return result
def _extract_error_summary(error_message: str | None) -> str | None:
"""Extract first meaningful line from error message, skipping traceback frames."""
if not error_message or not error_message.strip():
return None
err_lines = error_message.strip().split("\n")
err_summary = err_lines[0]
for line in err_lines:
stripped = line.strip()
if stripped and not stripped.startswith(("Traceback", "File ", "{", ")")):
err_summary = stripped
return err_summary
def extract_dag_tasks(details: V1WorkflowRunDetails) -> list[DagTask]:
"""Extract structured DagTask list from Hatchet workflow run details.
Returns tasks in topological order with status, timestamps, parents,
error summaries, and fan-out children counts.
"""
shape = details.shape or []
tasks = details.tasks or []
if not shape:
return []
# Build lookups
step_to_shape: dict[str, WorkflowRunShapeItemForWorkflowRunDetails] = {
s.step_id: s for s in shape
}
step_to_name: dict[str, str] = {s.step_id: s.task_name for s in shape}
# Reverse edges: child -> parent names
parents_by_step: dict[str, list[str]] = {s.step_id: [] for s in shape}
for s in shape:
for child_id in s.children_step_ids or []:
if child_id in parents_by_step:
parents_by_step[child_id].append(step_to_name[s.step_id])
# Join tasks by step_id
from hatchet_sdk.clients.rest.models import V1TaskSummary # noqa: PLC0415
task_by_step: dict[str, V1TaskSummary] = {}
for t in tasks:
if t.step_id and t.step_id in step_to_name:
task_by_step[t.step_id] = t
ordered = _topo_sort(shape)
result: list[DagTask] = []
for step_id in ordered:
name = step_to_name[step_id]
t = task_by_step.get(step_id)
if not t:
result.append(
DagTask(
name=name,
status=DagTaskStatus.QUEUED,
started_at=None,
finished_at=None,
duration_seconds=None,
parents=parents_by_step.get(step_id, []),
error=None,
children_total=None,
children_completed=None,
progress_pct=None,
)
)
continue
status = _HATCHET_TO_DAG_STATUS.get(t.status, DagTaskStatus.QUEUED)
duration_seconds: float | None = None
if t.duration is not None:
duration_seconds = t.duration / 1000.0
# Fan-out children
children_total: int | None = None
children_completed: int | None = None
if t.num_spawned_children and t.num_spawned_children > 0:
children_total = t.num_spawned_children
children_completed = sum(
1 for c in (t.children or []) if c.status == V1TaskStatus.COMPLETED
)
result.append(
DagTask(
name=name,
status=status,
started_at=t.started_at,
finished_at=t.finished_at,
duration_seconds=duration_seconds,
parents=parents_by_step.get(step_id, []),
error=_extract_error_summary(t.error_message),
children_total=children_total,
children_completed=children_completed,
progress_pct=None,
)
)
return result
async def broadcast_dag_status(transcript_id: str, workflow_run_id: str) -> None:
"""Fetch current DAG state from Hatchet and broadcast via WebSocket.
Fire-and-forget: exceptions are logged but never raised.
All imports are deferred for fork-safety (Hatchet workers fork processes).
"""
try:
from reflector.db.transcripts import transcripts_controller # noqa: I001, PLC0415
from reflector.hatchet.broadcast import append_event_and_broadcast # noqa: PLC0415
from reflector.hatchet.client import HatchetClientManager # noqa: PLC0415
from reflector.hatchet.workflows.daily_multitrack_pipeline import ( # noqa: PLC0415
fresh_db_connection,
)
from reflector.logger import logger # noqa: PLC0415
async with fresh_db_connection():
client = HatchetClientManager.get_client()
details = await client.runs.aio_get(workflow_run_id)
dag_tasks = extract_dag_tasks(details)
dag_status = DagStatusData(workflow_run_id=workflow_run_id, tasks=dag_tasks)
transcript = await transcripts_controller.get_by_id(transcript_id)
if transcript:
await append_event_and_broadcast(
transcript_id,
transcript,
"DAG_STATUS",
dag_status,
logger,
)
except Exception:
from reflector.logger import logger # noqa: PLC0415
logger.warning(
"[DAG Progress] Failed to broadcast DAG status",
transcript_id=transcript_id,
workflow_run_id=workflow_run_id,
exc_info=True,
)

View File

@@ -184,7 +184,10 @@ class Loggable(Protocol):
def make_audio_progress_logger(
ctx: Loggable, task_name: TaskName, interval: float = 5.0
ctx: Loggable,
task_name: TaskName,
interval: float = 5.0,
transcript_id: str | None = None,
) -> Callable[[float | None, float], None]:
"""Create a throttled progress logger callback for audio processing.
@@ -192,6 +195,7 @@ def make_audio_progress_logger(
ctx: Object with .log() method (e.g., Hatchet Context).
task_name: Name to prefix in log messages.
interval: Minimum seconds between log messages.
transcript_id: If provided, broadcasts transient DAG_TASK_PROGRESS events.
Returns:
Callback(progress_pct, audio_position) that logs at most every `interval` seconds.
@@ -213,6 +217,27 @@ def make_audio_progress_logger(
)
last_log_time[0] = now
if transcript_id and progress_pct is not None:
try:
import asyncio # noqa: PLC0415
from reflector.db.transcripts import TranscriptEvent # noqa: PLC0415
from reflector.hatchet.broadcast import broadcast_event # noqa: PLC0415
loop = asyncio.get_event_loop()
loop.create_task(
broadcast_event(
transcript_id,
TranscriptEvent(
event="DAG_TASK_PROGRESS",
data={"task_name": task_name, "progress_pct": progress_pct},
),
logger=logger,
)
)
except Exception:
pass # transient, never fail the callback
return callback
@@ -237,8 +262,15 @@ def with_error_handling(
) -> Callable[[PipelineInput, Context], Coroutine[Any, Any, R]]:
@functools.wraps(func)
async def wrapper(input: PipelineInput, ctx: Context) -> R:
from reflector.hatchet.dag_progress import broadcast_dag_status # noqa: I001, PLC0415
try:
return await func(input, ctx)
result = await func(input, ctx)
try:
await broadcast_dag_status(input.transcript_id, ctx.workflow_run_id)
except Exception:
pass
return result
except Exception as e:
logger.error(
f"[Hatchet] {step_name} failed",
@@ -246,6 +278,10 @@ def with_error_handling(
error=str(e),
exc_info=True,
)
try:
await broadcast_dag_status(input.transcript_id, ctx.workflow_run_id)
except Exception:
pass
if set_error_status:
await set_workflow_error_status(input.transcript_id)
raise
@@ -322,6 +358,7 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
mtg_session_id = recording.mtg_session_id
async with fresh_db_connection():
from reflector.db.transcripts import ( # noqa: PLC0415
TranscriptDuration,
TranscriptParticipant,
transcripts_controller,
)
@@ -330,15 +367,26 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
if not transcript:
raise ValueError(f"Transcript {input.transcript_id} not found")
# Note: title NOT cleared - preserves existing titles
# Duration from Daily API (seconds -> milliseconds) - master source
duration_ms = recording.duration * 1000 if recording.duration else 0
await transcripts_controller.update(
transcript,
{
"events": [],
"topics": [],
"participants": [],
"duration": duration_ms,
},
)
await append_event_and_broadcast(
input.transcript_id,
transcript,
"DURATION",
TranscriptDuration(duration=duration_ms),
logger=logger,
)
mtg_session_id = assert_non_none_and_non_empty(
mtg_session_id, "mtg_session_id is required"
)
@@ -548,7 +596,9 @@ async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
target_sample_rate,
offsets_seconds=None,
logger=logger,
progress_callback=make_audio_progress_logger(ctx, TaskName.MIXDOWN_TRACKS),
progress_callback=make_audio_progress_logger(
ctx, TaskName.MIXDOWN_TRACKS, transcript_id=input.transcript_id
),
expected_duration_sec=recording_duration if recording_duration > 0 else None,
)
await writer.flush()
@@ -1095,7 +1145,7 @@ async def identify_action_items(
@daily_multitrack_pipeline.task(
parents=[generate_waveform, generate_title, generate_recap, identify_action_items],
parents=[process_tracks, generate_title, generate_recap, identify_action_items],
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
retries=3,
)
@@ -1108,12 +1158,8 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
"""
ctx.log("finalize: saving transcript and setting status to 'ended'")
mixdown_result = ctx.task_output(mixdown_tracks)
track_result = ctx.task_output(process_tracks)
duration = mixdown_result.duration
all_words = track_result.all_words
# Cleanup temporary padded S3 files (deferred until finalize for semantic parity with Celery)
created_padded_files = track_result.created_padded_files
if created_padded_files:
@@ -1133,7 +1179,6 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
async with fresh_db_connection():
from reflector.db.transcripts import ( # noqa: PLC0415
TranscriptDuration,
TranscriptText,
transcripts_controller,
)
@@ -1142,8 +1187,6 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
if transcript is None:
raise ValueError(f"Transcript {input.transcript_id} not found in database")
merged_transcript = TranscriptType(words=all_words, translation=None)
await append_event_and_broadcast(
input.transcript_id,
transcript,
@@ -1155,21 +1198,15 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
logger=logger,
)
# Save duration and clear workflow_run_id (workflow completed successfully)
# Note: title/long_summary/short_summary already saved by their callbacks
# Clear workflow_run_id (workflow completed successfully)
# Note: title/long_summary/short_summary/duration already saved by their callbacks
await transcripts_controller.update(
transcript,
{
"duration": duration,
"workflow_run_id": None, # Clear on success - no need to resume
},
)
duration_data = TranscriptDuration(duration=duration)
await append_event_and_broadcast(
input.transcript_id, transcript, "DURATION", duration_data, logger=logger
)
await set_status_and_broadcast(input.transcript_id, "ended", logger=logger)
ctx.log(

View File

@@ -0,0 +1,165 @@
"""
Hatchet child workflow: PaddingWorkflow
Handles individual audio track padding via Modal.com backend.
"""
from datetime import timedelta
import av
from hatchet_sdk import Context
from pydantic import BaseModel
from reflector.hatchet.client import HatchetClientManager
from reflector.hatchet.constants import TIMEOUT_AUDIO
from reflector.hatchet.workflows.models import PadTrackResult
from reflector.logger import logger
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
from reflector.utils.audio_padding import extract_stream_start_time_from_container
class PaddingInput(BaseModel):
"""Input for individual track padding."""
track_index: int
s3_key: str
bucket_name: str
transcript_id: str
hatchet = HatchetClientManager.get_client()
padding_workflow = hatchet.workflow(
name="PaddingWorkflow", input_validator=PaddingInput
)
@padding_workflow.task(execution_timeout=timedelta(seconds=TIMEOUT_AUDIO), retries=3)
async def pad_track(input: PaddingInput, ctx: Context) -> PadTrackResult:
"""Pad audio track with silence based on WebM container start_time."""
ctx.log(f"pad_track: track {input.track_index}, s3_key={input.s3_key}")
logger.info(
"[Hatchet] pad_track",
track_index=input.track_index,
s3_key=input.s3_key,
transcript_id=input.transcript_id,
)
try:
# Create fresh storage instance to avoid aioboto3 fork issues
from reflector.settings import settings # noqa: PLC0415
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
storage = AwsStorage(
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
)
source_url = await storage.get_file_url(
input.s3_key,
operation="get_object",
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
bucket=input.bucket_name,
)
# Extract start_time to determine if padding needed
with av.open(source_url) as in_container:
if in_container.duration:
try:
duration = timedelta(seconds=in_container.duration // 1_000_000)
ctx.log(
f"pad_track: track {input.track_index}, duration={duration}"
)
except (ValueError, TypeError, OverflowError) as e:
ctx.log(
f"pad_track: track {input.track_index}, duration error: {str(e)}"
)
start_time_seconds = extract_stream_start_time_from_container(
in_container, input.track_index, logger=logger
)
if start_time_seconds <= 0:
logger.info(
f"Track {input.track_index} requires no padding",
track_index=input.track_index,
)
return PadTrackResult(
padded_key=input.s3_key,
bucket_name=input.bucket_name,
size=0,
track_index=input.track_index,
)
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
# Presign PUT URL for output (Modal will upload directly)
output_url = await storage.get_file_url(
storage_path,
operation="put_object",
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
)
import httpx # noqa: PLC0415
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
AudioPaddingModalProcessor,
)
try:
processor = AudioPaddingModalProcessor()
result = await processor.pad_track(
track_url=source_url,
output_url=output_url,
start_time_seconds=start_time_seconds,
track_index=input.track_index,
)
file_size = result.size
ctx.log(f"pad_track: Modal returned size={file_size}")
except httpx.HTTPStatusError as e:
error_detail = e.response.text if hasattr(e.response, "text") else str(e)
logger.error(
"[Hatchet] Modal padding HTTP error",
transcript_id=input.transcript_id,
track_index=input.track_index,
status_code=e.response.status_code if hasattr(e, "response") else None,
error=error_detail,
exc_info=True,
)
raise Exception(
f"Modal padding failed: HTTP {e.response.status_code}"
) from e
except httpx.TimeoutException as e:
logger.error(
"[Hatchet] Modal padding timeout",
transcript_id=input.transcript_id,
track_index=input.track_index,
error=str(e),
exc_info=True,
)
raise Exception("Modal padding timeout") from e
logger.info(
"[Hatchet] pad_track complete",
track_index=input.track_index,
padded_key=storage_path,
)
return PadTrackResult(
padded_key=storage_path,
bucket_name=None, # None = use default transcript storage bucket
size=file_size,
track_index=input.track_index,
)
except Exception as e:
logger.error(
"[Hatchet] pad_track failed",
transcript_id=input.transcript_id,
track_index=input.track_index,
error=str(e),
exc_info=True,
)
raise

View File

@@ -14,9 +14,7 @@ Hatchet workers run in forked processes; fresh imports per task ensure
storage/DB connections are not shared across forks.
"""
import tempfile
from datetime import timedelta
from pathlib import Path
import av
from hatchet_sdk import Context
@@ -27,10 +25,7 @@ from reflector.hatchet.constants import TIMEOUT_AUDIO, TIMEOUT_HEAVY
from reflector.hatchet.workflows.models import PadTrackResult, TranscribeTrackResult
from reflector.logger import logger
from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
from reflector.utils.audio_padding import (
apply_audio_padding_to_file,
extract_stream_start_time_from_container,
)
from reflector.utils.audio_padding import extract_stream_start_time_from_container
class TrackInput(BaseModel):
@@ -83,63 +78,44 @@ async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
)
with av.open(source_url) as in_container:
if in_container.duration:
try:
duration = timedelta(seconds=in_container.duration // 1_000_000)
ctx.log(
f"pad_track: track {input.track_index}, duration={duration}"
)
except Exception:
ctx.log(f"pad_track: track {input.track_index}, duration=ERROR")
start_time_seconds = extract_stream_start_time_from_container(
in_container, input.track_index, logger=logger
)
# If no padding needed, return original S3 key
if start_time_seconds <= 0:
logger.info(
f"Track {input.track_index} requires no padding",
track_index=input.track_index,
)
return PadTrackResult(
padded_key=input.s3_key,
bucket_name=input.bucket_name,
size=0,
track_index=input.track_index,
)
# If no padding needed, return original S3 key
if start_time_seconds <= 0:
logger.info(
f"Track {input.track_index} requires no padding",
track_index=input.track_index,
)
return PadTrackResult(
padded_key=input.s3_key,
bucket_name=input.bucket_name,
size=0,
track_index=input.track_index,
)
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
temp_path = temp_file.name
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
try:
apply_audio_padding_to_file(
in_container,
temp_path,
start_time_seconds,
input.track_index,
logger=logger,
)
# Presign PUT URL for output (Modal uploads directly)
output_url = await storage.get_file_url(
storage_path,
operation="put_object",
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
)
file_size = Path(temp_path).stat().st_size
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
AudioPaddingModalProcessor,
)
logger.info(
f"About to upload padded track",
key=storage_path,
size=file_size,
)
with open(temp_path, "rb") as padded_file:
await storage.put_file(storage_path, padded_file)
logger.info(
f"Uploaded padded track to S3",
key=storage_path,
size=file_size,
)
finally:
Path(temp_path).unlink(missing_ok=True)
processor = AudioPaddingModalProcessor()
result = await processor.pad_track(
track_url=source_url,
output_url=output_url,
start_time_seconds=start_time_seconds,
track_index=input.track_index,
)
file_size = result.size
ctx.log(f"pad_track complete: track {input.track_index} -> {storage_path}")
logger.info(

View File

@@ -0,0 +1,113 @@
"""
Modal.com backend for audio padding.
"""
import asyncio
import os
import httpx
from pydantic import BaseModel
from reflector.hatchet.constants import TIMEOUT_AUDIO
from reflector.logger import logger
class PaddingResponse(BaseModel):
size: int
cancelled: bool = False
class AudioPaddingModalProcessor:
"""Audio padding processor using Modal.com CPU backend via HTTP."""
def __init__(
self, padding_url: str | None = None, modal_api_key: str | None = None
):
self.padding_url = padding_url or os.getenv("PADDING_URL")
if not self.padding_url:
raise ValueError(
"PADDING_URL required to use AudioPaddingModalProcessor. "
"Set PADDING_URL environment variable or pass padding_url parameter."
)
self.modal_api_key = modal_api_key or os.getenv("MODAL_API_KEY")
async def pad_track(
self,
track_url: str,
output_url: str,
start_time_seconds: float,
track_index: int,
) -> PaddingResponse:
"""Pad audio track with silence via Modal backend.
Args:
track_url: Presigned GET URL for source audio track
output_url: Presigned PUT URL for output WebM
start_time_seconds: Amount of silence to prepend
track_index: Track index for logging
"""
if not track_url:
raise ValueError("track_url cannot be empty")
if start_time_seconds <= 0:
raise ValueError(
f"start_time_seconds must be positive, got {start_time_seconds}"
)
log = logger.bind(track_index=track_index, padding_seconds=start_time_seconds)
log.info("Sending Modal padding HTTP request")
url = f"{self.padding_url}/pad"
headers = {}
if self.modal_api_key:
headers["Authorization"] = f"Bearer {self.modal_api_key}"
try:
async with httpx.AsyncClient(timeout=TIMEOUT_AUDIO) as client:
response = await client.post(
url,
headers=headers,
json={
"track_url": track_url,
"output_url": output_url,
"start_time_seconds": start_time_seconds,
"track_index": track_index,
},
follow_redirects=True,
)
if response.status_code != 200:
error_body = response.text
log.error(
"Modal padding API error",
status_code=response.status_code,
error_body=error_body,
)
response.raise_for_status()
result = response.json()
# Check if work was cancelled
if result.get("cancelled"):
log.warning("Modal padding was cancelled by disconnect detection")
raise asyncio.CancelledError(
"Padding cancelled due to client disconnect"
)
log.info("Modal padding complete", size=result["size"])
return PaddingResponse(**result)
except asyncio.CancelledError:
log.warning(
"Modal padding cancelled (Hatchet timeout, disconnect detected on Modal side)"
)
raise
except httpx.TimeoutException as e:
log.error("Modal padding timeout", error=str(e), exc_info=True)
raise Exception(f"Modal padding timeout: {e}") from e
except httpx.HTTPStatusError as e:
log.error("Modal padding HTTP error", error=str(e), exc_info=True)
raise Exception(f"Modal padding HTTP error: {e}") from e
except Exception as e:
log.error("Modal padding unexpected error", error=str(e), exc_info=True)
raise

View File

@@ -15,14 +15,10 @@ from hatchet_sdk.clients.rest.exceptions import ApiException, NotFoundException
from hatchet_sdk.clients.rest.models import V1TaskStatus
from reflector.db.recordings import recordings_controller
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import Transcript, transcripts_controller
from reflector.hatchet.client import HatchetClientManager
from reflector.logger import logger
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
from reflector.pipelines.main_multitrack_pipeline import (
task_pipeline_multitrack_process,
)
from reflector.utils.string import NonEmptyString
@@ -181,124 +177,111 @@ async def dispatch_transcript_processing(
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
"""
if isinstance(config, MultitrackProcessingConfig):
use_celery = False
if config.room_id:
room = await rooms_controller.get_by_id(config.room_id)
use_celery = room.use_celery if room else False
use_hatchet = not use_celery
if use_celery:
logger.info(
"Room uses legacy Celery processing",
room_id=config.room_id,
transcript_id=config.transcript_id,
# Multitrack processing always uses Hatchet (no Celery fallback)
# First check if we can replay (outside transaction since it's read-only)
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id and not force:
can_replay = await HatchetClientManager.can_replay(
transcript.workflow_run_id
)
if use_hatchet:
# First check if we can replay (outside transaction since it's read-only)
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id and not force:
can_replay = await HatchetClientManager.can_replay(
transcript.workflow_run_id
if can_replay:
await HatchetClientManager.replay_workflow(transcript.workflow_run_id)
logger.info(
"Replaying Hatchet workflow",
workflow_id=transcript.workflow_run_id,
)
if can_replay:
await HatchetClientManager.replay_workflow(
transcript.workflow_run_id
)
logger.info(
"Replaying Hatchet workflow",
workflow_id=transcript.workflow_run_id,
)
return None
else:
# Workflow can't replay (CANCELLED, COMPLETED, or 404 deleted)
# Log and proceed to start new workflow
try:
status = await HatchetClientManager.get_workflow_run_status(
transcript.workflow_run_id
)
logger.info(
"Old workflow not replayable, starting new",
old_workflow_id=transcript.workflow_run_id,
old_status=status.value,
)
except NotFoundException:
# Workflow deleted from Hatchet but ID still in DB
logger.info(
"Old workflow not found in Hatchet, starting new",
old_workflow_id=transcript.workflow_run_id,
)
# Force: cancel old workflow if exists
if force and transcript and transcript.workflow_run_id:
try:
await HatchetClientManager.cancel_workflow(
transcript.workflow_run_id
)
logger.info(
"Cancelled old workflow (--force)",
workflow_id=transcript.workflow_run_id,
)
except NotFoundException:
logger.info(
"Old workflow already deleted (--force)",
workflow_id=transcript.workflow_run_id,
)
await transcripts_controller.update(
transcript, {"workflow_run_id": None}
)
# Re-fetch and check for concurrent dispatch (optimistic approach).
# No database lock - worst case is duplicate dispatch, but Hatchet
# workflows are idempotent so this is acceptable.
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id:
# Another process started a workflow between validation and now
return None
else:
# Workflow can't replay (CANCELLED, COMPLETED, or 404 deleted)
# Log and proceed to start new workflow
try:
status = await HatchetClientManager.get_workflow_run_status(
transcript.workflow_run_id
)
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
logger.info(
"Concurrent workflow detected, skipping dispatch",
workflow_id=transcript.workflow_run_id,
)
return None
except ApiException:
# Workflow might be gone (404) or API issue - proceed with new workflow
pass
logger.info(
"Old workflow not replayable, starting new",
old_workflow_id=transcript.workflow_run_id,
old_status=status.value,
)
except NotFoundException:
# Workflow deleted from Hatchet but ID still in DB
logger.info(
"Old workflow not found in Hatchet, starting new",
old_workflow_id=transcript.workflow_run_id,
)
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": config.recording_id,
"tracks": [{"s3_key": k} for k in config.track_keys],
"bucket_name": config.bucket_name,
"transcript_id": config.transcript_id,
"room_id": config.room_id,
},
additional_metadata={
"transcript_id": config.transcript_id,
"recording_id": config.recording_id,
"daily_recording_id": config.recording_id,
},
# Force: cancel old workflow if exists
if force and transcript and transcript.workflow_run_id:
try:
await HatchetClientManager.cancel_workflow(transcript.workflow_run_id)
logger.info(
"Cancelled old workflow (--force)",
workflow_id=transcript.workflow_run_id,
)
except NotFoundException:
logger.info(
"Old workflow already deleted (--force)",
workflow_id=transcript.workflow_run_id,
)
await transcripts_controller.update(transcript, {"workflow_run_id": None})
# Re-fetch and check for concurrent dispatch (optimistic approach).
# No database lock - worst case is duplicate dispatch, but Hatchet
# workflows are idempotent so this is acceptable.
transcript = await transcripts_controller.get_by_id(config.transcript_id)
if transcript and transcript.workflow_run_id:
# Another process started a workflow between validation and now
try:
status = await HatchetClientManager.get_workflow_run_status(
transcript.workflow_run_id
)
if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
logger.info(
"Concurrent workflow detected, skipping dispatch",
workflow_id=transcript.workflow_run_id,
)
return None
except ApiException:
# Workflow might be gone (404) or API issue - proceed with new workflow
pass
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": config.recording_id,
"tracks": [{"s3_key": k} for k in config.track_keys],
"bucket_name": config.bucket_name,
"transcript_id": config.transcript_id,
"room_id": config.room_id,
},
additional_metadata={
"transcript_id": config.transcript_id,
"recording_id": config.recording_id,
"daily_recording_id": config.recording_id,
},
)
if transcript:
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
if transcript:
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
return None
try:
from reflector.hatchet.dag_progress import broadcast_dag_status # noqa: I001, PLC0415
await broadcast_dag_status(config.transcript_id, workflow_id)
except Exception:
logger.warning(
"[DAG Progress] Failed initial broadcast",
transcript_id=config.transcript_id,
workflow_id=workflow_id,
exc_info=True,
)
return None
# Celery pipeline (durable workflows disabled)
return task_pipeline_multitrack_process.delay(
transcript_id=config.transcript_id,
bucket_name=config.bucket_name,
track_keys=config.track_keys,
)
elif isinstance(config, FileProcessingConfig):
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
else:

View File

@@ -1,7 +1,7 @@
from pydantic.types import PositiveInt
from pydantic_settings import BaseSettings, SettingsConfigDict
from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
from reflector.schemas.platform import DAILY_PLATFORM, Platform
from reflector.utils.string import NonEmptyString
@@ -98,6 +98,10 @@ class Settings(BaseSettings):
# Diarization: local pyannote.audio
DIARIZATION_PYANNOTE_AUTH_TOKEN: str | None = None
# Audio Padding (Modal.com backend)
PADDING_URL: str | None = None
PADDING_MODAL_API_KEY: str | None = None
# Sentry
SENTRY_DSN: str | None = None
@@ -151,7 +155,7 @@ class Settings(BaseSettings):
None # Webhook UUID for this environment. Not used by production code
)
# Platform Configuration
DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM
DEFAULT_VIDEO_PLATFORM: Platform = DAILY_PLATFORM
# Zulip integration
ZULIP_REALM: str | None = None

View File

@@ -5,7 +5,9 @@ Used by both Hatchet workflows and Celery pipelines for consistent audio encodin
"""
# Opus codec settings
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
OPUS_STANDARD_SAMPLE_RATE = 48000
# ref B0F71CE8-FC59-4AA5-8414-DAFB836DB711
OPUS_DEFAULT_BIT_RATE = 128000 # 128kbps for good speech quality
# S3 presigned URL expiration

View File

@@ -111,6 +111,7 @@ class GetTranscriptMinimal(BaseModel):
room_id: str | None = None
room_name: str | None = None
audio_deleted: bool | None = None
dag_status: list[dict] | None = None
class TranscriptParticipantWithEmail(TranscriptParticipant):
@@ -491,6 +492,13 @@ async def transcript_get(
)
)
dag_status = None
if transcript.status == "processing" and transcript.events:
for ev in reversed(transcript.events):
if ev.event == "DAG_STATUS":
dag_status = ev.data.get("tasks") if isinstance(ev.data, dict) else None
break
base_data = {
"id": transcript.id,
"user_id": transcript.user_id,
@@ -512,6 +520,7 @@ async def transcript_get(
"room_id": transcript.room_id,
"room_name": room_name,
"audio_deleted": transcript.audio_deleted,
"dag_status": dag_status,
"participants": participants,
}

View File

@@ -41,13 +41,19 @@ async def transcript_events_websocket(
try:
# on first connection, send all events only to the current user
# Find the last DAG_STATUS to send after other historical events
last_dag_status = None
for event in transcript.events:
# for now, do not send TRANSCRIPT or STATUS options - theses are live event
# not necessary to be sent to the client; but keep the rest
name = event.event
if name in ("TRANSCRIPT", "STATUS"):
continue
if name == "DAG_STATUS":
last_dag_status = event
continue
await websocket.send_json(event.model_dump(mode="json"))
# Send only the most recent DAG_STATUS so reconnecting clients get current state
if last_dag_status is not None:
await websocket.send_json(last_dag_status.model_dump(mode="json"))
# XXX if transcript is final (locked=True and status=ended)
# XXX send a final event to the client and close the connection

View File

@@ -27,9 +27,6 @@ from reflector.db.transcripts import (
from reflector.hatchet.client import HatchetClientManager
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
from reflector.pipelines.main_live_pipeline import asynctask
from reflector.pipelines.main_multitrack_pipeline import (
task_pipeline_multitrack_process,
)
from reflector.pipelines.topic_processing import EmptyPipeline
from reflector.processors import AudioFileWriterProcessor
from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
@@ -351,49 +348,29 @@ async def _process_multitrack_recording_inner(
room_id=room.id,
)
use_celery = room and room.use_celery
use_hatchet = not use_celery
if use_celery:
logger.info(
"Room uses legacy Celery processing",
room_id=room.id,
transcript_id=transcript.id,
)
if use_hatchet:
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording_id,
"tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording_id,
"daily_recording_id": recording_id,
},
)
logger.info(
"Started Hatchet workflow",
workflow_id=workflow_id,
transcript_id=transcript.id,
)
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
return
# Celery pipeline (runs when durable workflows disabled)
task_pipeline_multitrack_process.delay(
transcript_id=transcript.id,
bucket_name=bucket_name,
track_keys=filter_cam_audio_tracks(track_keys),
# Multitrack processing always uses Hatchet (no Celery fallback)
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording_id,
"tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording_id,
"daily_recording_id": recording_id,
},
)
logger.info(
"Started Hatchet workflow",
workflow_id=workflow_id,
transcript_id=transcript.id,
)
await transcripts_controller.update(transcript, {"workflow_run_id": workflow_id})
@shared_task
@@ -1072,66 +1049,43 @@ async def reprocess_failed_daily_recordings():
)
continue
use_celery = room and room.use_celery
use_hatchet = not use_celery
if use_hatchet:
if not transcript:
logger.warning(
"No transcript for Hatchet reprocessing, skipping",
recording_id=recording.id,
)
continue
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording.id,
"tracks": [
{"s3_key": k}
for k in filter_cam_audio_tracks(recording.track_keys)
],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id if room else None,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording.id,
"reprocess": True,
},
)
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
logger.info(
"Queued Daily recording for Hatchet reprocessing",
# Multitrack reprocessing always uses Hatchet (no Celery fallback)
if not transcript:
logger.warning(
"No transcript for Hatchet reprocessing, skipping",
recording_id=recording.id,
workflow_id=workflow_id,
room_name=meeting.room_name,
track_count=len(recording.track_keys),
)
else:
logger.info(
"Queueing Daily recording for Celery reprocessing",
recording_id=recording.id,
room_name=meeting.room_name,
track_count=len(recording.track_keys),
transcript_status=transcript.status if transcript else None,
)
continue
# For reprocessing, pass actual recording time (though it's ignored - see _process_multitrack_recording_inner)
# Reprocessing uses recording.meeting_id directly instead of time-based matching
recording_start_ts = int(recording.recorded_at.timestamp())
workflow_id = await HatchetClientManager.start_workflow(
workflow_name="DiarizationPipeline",
input_data={
"recording_id": recording.id,
"tracks": [
{"s3_key": k}
for k in filter_cam_audio_tracks(recording.track_keys)
],
"bucket_name": bucket_name,
"transcript_id": transcript.id,
"room_id": room.id if room else None,
},
additional_metadata={
"transcript_id": transcript.id,
"recording_id": recording.id,
"reprocess": True,
},
)
await transcripts_controller.update(
transcript, {"workflow_run_id": workflow_id}
)
process_multitrack_recording.delay(
bucket_name=bucket_name,
daily_room_name=meeting.room_name,
recording_id=recording.id,
track_keys=recording.track_keys,
recording_start_ts=recording_start_ts,
)
logger.info(
"Queued Daily recording for Hatchet reprocessing",
recording_id=recording.id,
workflow_id=workflow_id,
room_name=meeting.room_name,
track_count=len(recording.track_keys),
)
reprocessed_count += 1

View File

@@ -11,7 +11,6 @@ broadcast messages to all connected websockets.
import asyncio
import json
import threading
import redis.asyncio as redis
from fastapi import WebSocket
@@ -98,6 +97,7 @@ class WebsocketManager:
async def _pubsub_data_reader(self, pubsub_subscriber):
while True:
# timeout=1.0 prevents tight CPU loop when no messages available
message = await pubsub_subscriber.get_message(
ignore_subscribe_messages=True
)
@@ -109,29 +109,38 @@ class WebsocketManager:
await socket.send_json(data)
# Process-global singleton to ensure only one WebsocketManager instance exists.
# Multiple instances would cause resource leaks and CPU issues.
_ws_manager: WebsocketManager | None = None
def get_ws_manager() -> WebsocketManager:
"""
Returns the WebsocketManager instance for managing websockets.
Returns the global WebsocketManager singleton.
This function initializes and returns the WebsocketManager instance,
which is responsible for managing websockets and handling websocket
connections.
Creates instance on first call, subsequent calls return cached instance.
Thread-safe via GIL. Concurrent initialization may create duplicate
instances but last write wins (acceptable for this use case).
Returns:
WebsocketManager: The initialized WebsocketManager instance.
Raises:
ImportError: If the 'reflector.settings' module cannot be imported.
RedisConnectionError: If there is an error connecting to the Redis server.
WebsocketManager: The global WebsocketManager instance.
"""
local = threading.local()
if hasattr(local, "ws_manager"):
return local.ws_manager
global _ws_manager
if _ws_manager is not None:
return _ws_manager
# No lock needed - GIL makes this safe enough
# Worst case: race creates two instances, last assignment wins
pubsub_client = RedisPubSubManager(
host=settings.REDIS_HOST,
port=settings.REDIS_PORT,
)
ws_manager = WebsocketManager(pubsub_client=pubsub_client)
local.ws_manager = ws_manager
return ws_manager
_ws_manager = WebsocketManager(pubsub_client=pubsub_client)
return _ws_manager
def reset_ws_manager() -> None:
"""Reset singleton for testing. DO NOT use in production."""
global _ws_manager
_ws_manager = None

View File

@@ -1,11 +1,10 @@
import os
from contextlib import asynccontextmanager
from tempfile import NamedTemporaryFile
from unittest.mock import patch
import pytest
from reflector.schemas.platform import WHEREBY_PLATFORM
from reflector.schemas.platform import DAILY_PLATFORM, WHEREBY_PLATFORM
@pytest.fixture(scope="session", autouse=True)
@@ -15,6 +14,7 @@ def register_mock_platform():
from reflector.video_platforms.registry import register_platform
register_platform(WHEREBY_PLATFORM, MockPlatformClient)
register_platform(DAILY_PLATFORM, MockPlatformClient)
yield
@@ -333,11 +333,14 @@ def celery_enable_logging():
@pytest.fixture(scope="session")
def celery_config():
with NamedTemporaryFile() as f:
yield {
"broker_url": "memory://",
"result_backend": f"db+sqlite:///{f.name}",
}
redis_host = os.environ.get("REDIS_HOST", "localhost")
redis_port = os.environ.get("REDIS_PORT", "6379")
# Use db 2 to avoid conflicts with main app
redis_url = f"redis://{redis_host}:{redis_port}/2"
yield {
"broker_url": redis_url,
"result_backend": redis_url,
}
@pytest.fixture(scope="session")
@@ -370,9 +373,12 @@ async def ws_manager_in_memory(monkeypatch):
def __init__(self, queue: asyncio.Queue):
self.queue = queue
async def get_message(self, ignore_subscribe_messages: bool = True):
async def get_message(
self, ignore_subscribe_messages: bool = True, timeout: float | None = None
):
wait_timeout = timeout if timeout is not None else 0.05
try:
return await asyncio.wait_for(self.queue.get(), timeout=0.05)
return await asyncio.wait_for(self.queue.get(), timeout=wait_timeout)
except Exception:
return None

View File

@@ -0,0 +1,959 @@
"""Tests for DAG progress models and transform function.
Tests the extract_dag_tasks function that converts Hatchet V1WorkflowRunDetails
into structured DagTask list for frontend consumption.
"""
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from reflector.hatchet.constants import TaskName
from reflector.hatchet.dag_progress import (
DagStatusData,
DagTask,
DagTaskStatus,
extract_dag_tasks,
)
def _make_shape_item(
step_id: str,
task_name: str,
children_step_ids: list[str] | None = None,
) -> MagicMock:
"""Create a mock WorkflowRunShapeItemForWorkflowRunDetails."""
item = MagicMock()
item.step_id = step_id
item.task_name = task_name
item.children_step_ids = children_step_ids or []
return item
def _make_task_summary(
step_id: str,
status: str = "QUEUED",
started_at: datetime | None = None,
finished_at: datetime | None = None,
duration: int | None = None,
error_message: str | None = None,
task_external_id: str | None = None,
num_spawned_children: int | None = None,
children: list | None = None,
) -> MagicMock:
"""Create a mock V1TaskSummary."""
from hatchet_sdk.clients.rest.models import V1TaskStatus
task = MagicMock()
task.step_id = step_id
task.status = V1TaskStatus(status)
task.started_at = started_at
task.finished_at = finished_at
task.duration = duration
task.error_message = error_message
task.task_external_id = task_external_id or f"ext-{step_id}"
task.num_spawned_children = num_spawned_children
task.children = children or []
return task
def _make_details(
shape: list,
tasks: list,
run_id: str = "test-run-id",
) -> MagicMock:
"""Create a mock V1WorkflowRunDetails."""
details = MagicMock()
details.shape = shape
details.tasks = tasks
details.task_events = []
details.run = MagicMock()
details.run.metadata = MagicMock()
details.run.metadata.id = run_id
return details
class TestExtractDagTasksBasic:
"""Test basic extraction of DAG tasks from workflow run details."""
def test_empty_shape_returns_empty_list(self):
details = _make_details(shape=[], tasks=[])
result = extract_dag_tasks(details)
assert result == []
def test_single_task_queued(self):
shape = [_make_shape_item("s1", "get_recording")]
tasks = [_make_task_summary("s1", status="QUEUED")]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert len(result) == 1
assert result[0].name == "get_recording"
assert result[0].status == DagTaskStatus.QUEUED
assert result[0].parents == []
assert result[0].started_at is None
assert result[0].finished_at is None
assert result[0].duration_seconds is None
assert result[0].error is None
assert result[0].children_total is None
assert result[0].children_completed is None
assert result[0].progress_pct is None
def test_completed_task_with_duration(self):
now = datetime.now(timezone.utc)
shape = [_make_shape_item("s1", "get_recording")]
tasks = [
_make_task_summary(
"s1",
status="COMPLETED",
started_at=now,
finished_at=now,
duration=1500, # milliseconds
)
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].status == DagTaskStatus.COMPLETED
assert result[0].duration_seconds == 1.5
assert result[0].started_at == now
assert result[0].finished_at == now
def test_failed_task_with_error(self):
shape = [_make_shape_item("s1", "get_recording")]
tasks = [
_make_task_summary(
"s1",
status="FAILED",
error_message="Traceback (most recent call last):\n File something\nConnectionError: connection refused",
)
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].status == DagTaskStatus.FAILED
assert result[0].error == "ConnectionError: connection refused"
def test_running_task(self):
now = datetime.now(timezone.utc)
shape = [_make_shape_item("s1", "mixdown_tracks")]
tasks = [
_make_task_summary(
"s1",
status="RUNNING",
started_at=now,
duration=5000,
)
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].status == DagTaskStatus.RUNNING
assert result[0].started_at == now
assert result[0].duration_seconds == 5.0
def test_cancelled_task(self):
shape = [_make_shape_item("s1", "post_zulip")]
tasks = [_make_task_summary("s1", status="CANCELLED")]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].status == DagTaskStatus.CANCELLED
class TestExtractDagTasksTopology:
"""Test topological ordering and parent extraction."""
def test_linear_chain_parents(self):
"""A -> B -> C should produce correct parents."""
shape = [
_make_shape_item("s1", "get_recording", children_step_ids=["s2"]),
_make_shape_item("s2", "get_participants", children_step_ids=["s3"]),
_make_shape_item("s3", "process_tracks"),
]
tasks = [
_make_task_summary("s1", status="COMPLETED"),
_make_task_summary("s2", status="COMPLETED"),
_make_task_summary("s3", status="QUEUED"),
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert [t.name for t in result] == [
"get_recording",
"get_participants",
"process_tracks",
]
assert result[0].parents == []
assert result[1].parents == ["get_recording"]
assert result[2].parents == ["get_participants"]
def test_diamond_dag(self):
"""
A -> B, A -> C, B -> D, C -> D
D should have parents [B, C] (or [C, B] depending on sort).
"""
shape = [
_make_shape_item("s1", "get_recording", children_step_ids=["s2", "s3"]),
_make_shape_item("s2", "mixdown_tracks", children_step_ids=["s4"]),
_make_shape_item("s3", "detect_topics", children_step_ids=["s4"]),
_make_shape_item("s4", "finalize"),
]
tasks = [
_make_task_summary("s1", status="COMPLETED"),
_make_task_summary("s2", status="RUNNING"),
_make_task_summary("s3", status="RUNNING"),
_make_task_summary("s4", status="QUEUED"),
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
# Topological: s1 first, s2/s3 in some order, s4 last
assert result[0].name == "get_recording"
assert result[-1].name == "finalize"
finalize = result[-1]
assert set(finalize.parents) == {"mixdown_tracks", "detect_topics"}
def test_topological_order_is_stable(self):
"""Verify deterministic ordering (sorted queue in Kahn's)."""
shape = [
_make_shape_item("s_c", "task_c"),
_make_shape_item("s_a", "task_a", children_step_ids=["s_c"]),
_make_shape_item("s_b", "task_b", children_step_ids=["s_c"]),
]
tasks = [
_make_task_summary("s_c", status="QUEUED"),
_make_task_summary("s_a", status="COMPLETED"),
_make_task_summary("s_b", status="COMPLETED"),
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
# s_a and s_b both roots with in-degree 0; sorted alphabetically by step_id
names = [t.name for t in result]
assert names[-1] == "task_c"
# First two should be task_a, task_b (sorted by step_id: s_a < s_b)
assert names[0] == "task_a"
assert names[1] == "task_b"
def test_production_dag_shape(self):
"""Test the real 15-task pipeline topology with mixed statuses.
Simulates a mid-pipeline state where early tasks completed,
middle tasks running, and later tasks still queued.
"""
# Production DAG edges (parent -> children):
# get_recording -> get_participants
# get_participants -> process_tracks
# process_tracks -> mixdown_tracks, detect_topics, finalize
# mixdown_tracks -> generate_waveform
# detect_topics -> generate_title, extract_subjects
# extract_subjects -> process_subjects, identify_action_items
# process_subjects -> generate_recap
# generate_title -> finalize
# generate_recap -> finalize
# identify_action_items -> finalize
# finalize -> cleanup_consent
# cleanup_consent -> post_zulip, send_webhook
shape = [
_make_shape_item(
"s_get_recording", TaskName.GET_RECORDING, ["s_get_participants"]
),
_make_shape_item(
"s_get_participants", TaskName.GET_PARTICIPANTS, ["s_process_tracks"]
),
_make_shape_item(
"s_process_tracks",
TaskName.PROCESS_TRACKS,
["s_mixdown_tracks", "s_detect_topics", "s_finalize"],
),
_make_shape_item(
"s_mixdown_tracks", TaskName.MIXDOWN_TRACKS, ["s_generate_waveform"]
),
_make_shape_item("s_generate_waveform", TaskName.GENERATE_WAVEFORM),
_make_shape_item(
"s_detect_topics",
TaskName.DETECT_TOPICS,
["s_generate_title", "s_extract_subjects"],
),
_make_shape_item(
"s_generate_title", TaskName.GENERATE_TITLE, ["s_finalize"]
),
_make_shape_item(
"s_extract_subjects",
TaskName.EXTRACT_SUBJECTS,
["s_process_subjects", "s_identify_action_items"],
),
_make_shape_item(
"s_process_subjects", TaskName.PROCESS_SUBJECTS, ["s_generate_recap"]
),
_make_shape_item(
"s_generate_recap", TaskName.GENERATE_RECAP, ["s_finalize"]
),
_make_shape_item(
"s_identify_action_items",
TaskName.IDENTIFY_ACTION_ITEMS,
["s_finalize"],
),
_make_shape_item("s_finalize", TaskName.FINALIZE, ["s_cleanup_consent"]),
_make_shape_item(
"s_cleanup_consent",
TaskName.CLEANUP_CONSENT,
["s_post_zulip", "s_send_webhook"],
),
_make_shape_item("s_post_zulip", TaskName.POST_ZULIP),
_make_shape_item("s_send_webhook", TaskName.SEND_WEBHOOK),
]
# Mid-pipeline: early tasks done, middle running, later queued
tasks = [
_make_task_summary("s_get_recording", status="COMPLETED"),
_make_task_summary("s_get_participants", status="COMPLETED"),
_make_task_summary("s_process_tracks", status="COMPLETED"),
_make_task_summary("s_mixdown_tracks", status="RUNNING"),
_make_task_summary("s_generate_waveform", status="QUEUED"),
_make_task_summary("s_detect_topics", status="RUNNING"),
_make_task_summary("s_generate_title", status="QUEUED"),
_make_task_summary("s_extract_subjects", status="QUEUED"),
_make_task_summary("s_process_subjects", status="QUEUED"),
_make_task_summary("s_generate_recap", status="QUEUED"),
_make_task_summary("s_identify_action_items", status="QUEUED"),
_make_task_summary("s_finalize", status="QUEUED"),
_make_task_summary("s_cleanup_consent", status="QUEUED"),
_make_task_summary("s_post_zulip", status="QUEUED"),
_make_task_summary("s_send_webhook", status="QUEUED"),
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
# All 15 tasks present
assert len(result) == 15
result_names = [t.name for t in result]
assert set(result_names) == {
TaskName.GET_RECORDING,
TaskName.GET_PARTICIPANTS,
TaskName.PROCESS_TRACKS,
TaskName.MIXDOWN_TRACKS,
TaskName.GENERATE_WAVEFORM,
TaskName.DETECT_TOPICS,
TaskName.GENERATE_TITLE,
TaskName.EXTRACT_SUBJECTS,
TaskName.PROCESS_SUBJECTS,
TaskName.GENERATE_RECAP,
TaskName.IDENTIFY_ACTION_ITEMS,
TaskName.FINALIZE,
TaskName.CLEANUP_CONSENT,
TaskName.POST_ZULIP,
TaskName.SEND_WEBHOOK,
}
# Topological order invariant: no task appears before its parents
name_to_index = {t.name: i for i, t in enumerate(result)}
for task in result:
for parent_name in task.parents:
assert name_to_index[parent_name] < name_to_index[task.name], (
f"Parent {parent_name} (idx {name_to_index[parent_name]}) "
f"must appear before {task.name} (idx {name_to_index[task.name]})"
)
# finalize has exactly 4 parents
finalize = next(t for t in result if t.name == TaskName.FINALIZE)
assert set(finalize.parents) == {
TaskName.PROCESS_TRACKS,
TaskName.GENERATE_TITLE,
TaskName.GENERATE_RECAP,
TaskName.IDENTIFY_ACTION_ITEMS,
}
# cleanup_consent has 1 parent (finalize)
cleanup = next(t for t in result if t.name == TaskName.CLEANUP_CONSENT)
assert cleanup.parents == [TaskName.FINALIZE]
# post_zulip and send_webhook both have cleanup_consent as parent
post_zulip = next(t for t in result if t.name == TaskName.POST_ZULIP)
send_webhook = next(t for t in result if t.name == TaskName.SEND_WEBHOOK)
assert post_zulip.parents == [TaskName.CLEANUP_CONSENT]
assert send_webhook.parents == [TaskName.CLEANUP_CONSENT]
# Verify statuses propagated correctly
assert (
next(t for t in result if t.name == TaskName.GET_RECORDING).status
== DagTaskStatus.COMPLETED
)
assert (
next(t for t in result if t.name == TaskName.MIXDOWN_TRACKS).status
== DagTaskStatus.RUNNING
)
assert (
next(t for t in result if t.name == TaskName.FINALIZE).status
== DagTaskStatus.QUEUED
)
def test_topological_sort_invariant_complex_dag(self):
"""For a complex DAG, every task's parents appear earlier in the list.
Uses a wider branching/merging DAG than diamond to stress the invariant.
"""
# DAG: A -> B, A -> C, A -> D, B -> E, C -> E, C -> F, D -> F, E -> G, F -> G
shape = [
_make_shape_item("s_a", "task_a", ["s_b", "s_c", "s_d"]),
_make_shape_item("s_b", "task_b", ["s_e"]),
_make_shape_item("s_c", "task_c", ["s_e", "s_f"]),
_make_shape_item("s_d", "task_d", ["s_f"]),
_make_shape_item("s_e", "task_e", ["s_g"]),
_make_shape_item("s_f", "task_f", ["s_g"]),
_make_shape_item("s_g", "task_g"),
]
tasks = [
_make_task_summary("s_a", status="COMPLETED"),
_make_task_summary("s_b", status="COMPLETED"),
_make_task_summary("s_c", status="RUNNING"),
_make_task_summary("s_d", status="COMPLETED"),
_make_task_summary("s_e", status="QUEUED"),
_make_task_summary("s_f", status="QUEUED"),
_make_task_summary("s_g", status="QUEUED"),
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert len(result) == 7
name_to_index = {t.name: i for i, t in enumerate(result)}
# Verify invariant: every parent appears before its child
for task in result:
for parent_name in task.parents:
assert name_to_index[parent_name] < name_to_index[task.name], (
f"Parent {parent_name} (idx {name_to_index[parent_name]}) "
f"must appear before {task.name} (idx {name_to_index[task.name]})"
)
# task_g has 2 parents
task_g = next(t for t in result if t.name == "task_g")
assert set(task_g.parents) == {"task_e", "task_f"}
# task_e has 2 parents
task_e = next(t for t in result if t.name == "task_e")
assert set(task_e.parents) == {"task_b", "task_c"}
# task_a is root (first in topological order)
assert result[0].name == "task_a"
assert result[0].parents == []
class TestExtractDagTasksFanOut:
"""Test fan-out tasks with spawned children."""
def test_fan_out_children_counts(self):
from hatchet_sdk.clients.rest.models import V1TaskStatus
child_mocks = []
for status in ["COMPLETED", "COMPLETED", "RUNNING", "QUEUED"]:
child = MagicMock()
child.status = V1TaskStatus(status)
child_mocks.append(child)
shape = [_make_shape_item("s1", "process_tracks")]
tasks = [
_make_task_summary(
"s1",
status="RUNNING",
num_spawned_children=4,
children=child_mocks,
)
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].children_total == 4
assert result[0].children_completed == 2
def test_no_children_when_no_spawn(self):
shape = [_make_shape_item("s1", "get_recording")]
tasks = [
_make_task_summary("s1", status="COMPLETED", num_spawned_children=None)
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].children_total is None
assert result[0].children_completed is None
def test_zero_spawned_children(self):
shape = [_make_shape_item("s1", "process_tracks")]
tasks = [_make_task_summary("s1", status="COMPLETED", num_spawned_children=0)]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].children_total is None
assert result[0].children_completed is None
class TestExtractDagTasksErrorExtraction:
"""Test error message extraction logic."""
def test_simple_error(self):
shape = [_make_shape_item("s1", "mixdown_tracks")]
tasks = [
_make_task_summary(
"s1", status="FAILED", error_message="ValueError: no tracks"
)
]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].error == "ValueError: no tracks"
def test_traceback_extracts_meaningful_line(self):
error = (
"Traceback (most recent call last):\n"
' File "/app/something.py", line 42\n'
"RuntimeError: out of memory"
)
shape = [_make_shape_item("s1", "mixdown_tracks")]
tasks = [_make_task_summary("s1", status="FAILED", error_message=error)]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].error == "RuntimeError: out of memory"
def test_no_error_when_none(self):
shape = [_make_shape_item("s1", "get_recording")]
tasks = [_make_task_summary("s1", status="COMPLETED", error_message=None)]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].error is None
def test_empty_error_message(self):
shape = [_make_shape_item("s1", "get_recording")]
tasks = [_make_task_summary("s1", status="FAILED", error_message="")]
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert result[0].error is None
class TestExtractDagTasksMissingData:
"""Test edge cases with missing task data."""
def test_shape_without_matching_task(self):
"""Shape has a step but tasks list doesn't contain it."""
shape = [_make_shape_item("s1", "get_recording")]
tasks = [] # No matching task
details = _make_details(shape, tasks)
result = extract_dag_tasks(details)
assert len(result) == 1
assert result[0].name == "get_recording"
assert result[0].status == DagTaskStatus.QUEUED # default when no task data
assert result[0].started_at is None
def test_none_shape_returns_empty(self):
details = _make_details(shape=[], tasks=[])
details.shape = None
result = extract_dag_tasks(details)
assert result == []
class TestDagStatusData:
"""Test DagStatusData model serialization."""
def test_serialization(self):
task = DagTask(
name="get_recording",
status=DagTaskStatus.COMPLETED,
started_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
finished_at=datetime(2025, 1, 1, 0, 0, 1, tzinfo=timezone.utc),
duration_seconds=1.0,
parents=[],
error=None,
children_total=None,
children_completed=None,
progress_pct=None,
)
data = DagStatusData(workflow_run_id="test-123", tasks=[task])
dumped = data.model_dump(mode="json")
assert dumped["workflow_run_id"] == "test-123"
assert len(dumped["tasks"]) == 1
assert dumped["tasks"][0]["name"] == "get_recording"
assert dumped["tasks"][0]["status"] == "completed"
assert dumped["tasks"][0]["duration_seconds"] == 1.0
class AsyncContextManager:
"""No-op async context manager for mocking fresh_db_connection."""
async def __aenter__(self):
return None
async def __aexit__(self, *args):
return None
class TestBroadcastDagStatus:
"""Test broadcast_dag_status function.
broadcast_dag_status uses deferred imports inside its function body.
We mock the source modules/objects before calling the function.
Importing daily_multitrack_pipeline triggers a cascade
(subject_processing -> HatchetClientManager.get_client at module level),
so we set _instance before the import to prevent real SDK init.
"""
@pytest.fixture(autouse=True)
def _setup_hatchet_mock(self):
"""Set HatchetClientManager._instance to a mock to prevent real SDK init.
Module-level code in workflow files calls get_client() during import.
Setting _instance before import avoids ClientConfig validation.
"""
from reflector.hatchet.client import HatchetClientManager
original = HatchetClientManager._instance
HatchetClientManager._instance = MagicMock()
yield
HatchetClientManager._instance = original
@pytest.mark.asyncio
async def test_broadcasts_dag_status(self):
"""broadcast_dag_status fetches run, transforms, and broadcasts."""
mock_transcript = MagicMock()
mock_transcript.id = "t-123"
mock_details = _make_details(
shape=[_make_shape_item("s1", "get_recording")],
tasks=[_make_task_summary("s1", status="COMPLETED")],
run_id="wf-abc",
)
mock_client = MagicMock()
mock_client.runs.aio_get = AsyncMock(return_value=mock_details)
with (
patch(
"reflector.hatchet.client.HatchetClientManager.get_client",
return_value=mock_client,
),
patch(
"reflector.hatchet.broadcast.append_event_and_broadcast",
new_callable=AsyncMock,
) as mock_broadcast,
patch(
"reflector.db.transcripts.transcripts_controller.get_by_id",
new_callable=AsyncMock,
return_value=mock_transcript,
),
patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
return_value=AsyncContextManager(),
),
):
from reflector.hatchet.dag_progress import broadcast_dag_status
await broadcast_dag_status("t-123", "wf-abc")
mock_client.runs.aio_get.assert_called_once_with("wf-abc")
mock_broadcast.assert_called_once()
call_args = mock_broadcast.call_args
assert call_args[0][0] == "t-123" # transcript_id
assert call_args[0][1] is mock_transcript # transcript
assert call_args[0][2] == "DAG_STATUS" # event_name
data = call_args[0][3]
assert isinstance(data, DagStatusData)
assert data.workflow_run_id == "wf-abc"
assert len(data.tasks) == 1
@pytest.mark.asyncio
async def test_swallows_exceptions(self):
"""broadcast_dag_status never raises even when internals fail."""
from reflector.hatchet.dag_progress import broadcast_dag_status
with patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
side_effect=RuntimeError("db exploded"),
):
# Should not raise
await broadcast_dag_status("t-123", "wf-abc")
@pytest.mark.asyncio
async def test_no_broadcast_when_transcript_not_found(self):
"""broadcast_dag_status does not broadcast if transcript is None."""
mock_details = _make_details(
shape=[_make_shape_item("s1", "get_recording")],
tasks=[_make_task_summary("s1", status="COMPLETED")],
)
mock_client = MagicMock()
mock_client.runs.aio_get = AsyncMock(return_value=mock_details)
with (
patch(
"reflector.hatchet.client.HatchetClientManager.get_client",
return_value=mock_client,
),
patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
return_value=AsyncContextManager(),
),
patch(
"reflector.db.transcripts.transcripts_controller.get_by_id",
new_callable=AsyncMock,
return_value=None,
),
patch(
"reflector.hatchet.broadcast.append_event_and_broadcast",
new_callable=AsyncMock,
) as mock_broadcast,
):
from reflector.hatchet.dag_progress import broadcast_dag_status
await broadcast_dag_status("t-123", "wf-abc")
mock_broadcast.assert_not_called()
class TestMakeAudioProgressLoggerWithBroadcast:
"""Test make_audio_progress_logger with transcript_id for transient broadcasts."""
@pytest.fixture(autouse=True)
def _setup_hatchet_mock(self):
"""Set HatchetClientManager._instance to prevent real SDK init on import."""
from reflector.hatchet.client import HatchetClientManager
original = HatchetClientManager._instance
if original is None:
HatchetClientManager._instance = MagicMock()
yield
HatchetClientManager._instance = original
def test_broadcasts_transient_progress_event(self):
"""When transcript_id provided and progress_pct not None, broadcasts event."""
import asyncio
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
make_audio_progress_logger,
)
ctx = MagicMock()
ctx.log = MagicMock()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
mock_broadcast = AsyncMock()
tasks_created = []
original_create_task = loop.create_task
def capture_create_task(coro):
task = original_create_task(coro)
tasks_created.append(task)
return task
try:
with (
patch(
"reflector.hatchet.broadcast.broadcast_event",
mock_broadcast,
),
patch.object(loop, "create_task", side_effect=capture_create_task),
):
callback = make_audio_progress_logger(
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id="t-123"
)
callback(50.0, 100.0)
# Run pending tasks
if tasks_created:
loop.run_until_complete(asyncio.gather(*tasks_created))
mock_broadcast.assert_called_once()
event_arg = mock_broadcast.call_args[0][1]
assert event_arg.event == "DAG_TASK_PROGRESS"
assert event_arg.data["task_name"] == TaskName.MIXDOWN_TRACKS
assert event_arg.data["progress_pct"] == 50.0
finally:
loop.close()
def test_no_broadcast_without_transcript_id(self):
"""When transcript_id is None, no broadcast happens."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
make_audio_progress_logger,
)
ctx = MagicMock()
with patch(
"reflector.hatchet.broadcast.broadcast_event",
new_callable=AsyncMock,
) as mock_broadcast:
callback = make_audio_progress_logger(
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id=None
)
callback(50.0, 100.0)
mock_broadcast.assert_not_called()
def test_no_broadcast_when_progress_pct_is_none(self):
"""When progress_pct is None, no broadcast happens even with transcript_id."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
make_audio_progress_logger,
)
ctx = MagicMock()
with patch(
"reflector.hatchet.broadcast.broadcast_event",
new_callable=AsyncMock,
) as mock_broadcast:
callback = make_audio_progress_logger(
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id="t-123"
)
callback(None, 100.0)
mock_broadcast.assert_not_called()
def test_logging_throttled_by_interval(self):
"""With interval=5.0, rapid calls only log once until interval elapses.
The throttle applies to ctx.log() calls. Broadcasts (fire-and-forget)
are not throttled — they occur every call when transcript_id + progress_pct set.
"""
import asyncio
import time as time_mod
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
make_audio_progress_logger,
)
ctx = MagicMock()
ctx.log = MagicMock()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
mock_broadcast = AsyncMock()
tasks_created = []
original_create_task = loop.create_task
def capture_create_task(coro):
task = original_create_task(coro)
tasks_created.append(task)
return task
# Controlled monotonic values for the 4 calls from make_audio_progress_logger:
# init (start_time, last_log_time), call1 (now), call2 (now), call3 (now)
# After those, fall back to real time.monotonic() for asyncio internals.
controlled_values = [100.0, 100.0, 101.0, 106.0]
call_index = [0]
real_monotonic = time_mod.monotonic
def mock_monotonic():
if call_index[0] < len(controlled_values):
val = controlled_values[call_index[0]]
call_index[0] += 1
return val
return real_monotonic()
try:
with (
patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.time.monotonic",
side_effect=mock_monotonic,
),
patch(
"reflector.hatchet.broadcast.broadcast_event",
mock_broadcast,
),
patch.object(loop, "create_task", side_effect=capture_create_task),
):
callback = make_audio_progress_logger(
ctx, TaskName.MIXDOWN_TRACKS, interval=5.0, transcript_id="t-123"
)
# Call 1 at t=100.0: 100.0 - 100.0 = 0.0 < 5.0 => no log
callback(25.0, 50.0)
assert ctx.log.call_count == 0
# Call 2 at t=101.0: 101.0 - 100.0 = 1.0 < 5.0 => no log
callback(50.0, 100.0)
assert ctx.log.call_count == 0
# Call 3 at t=106.0: 106.0 - 100.0 = 6.0 >= 5.0 => logs
callback(75.0, 150.0)
assert ctx.log.call_count == 1
# Run pending broadcast tasks
if tasks_created:
loop.run_until_complete(asyncio.gather(*tasks_created))
# Broadcasts happen on every call (not throttled) — 3 calls total
assert mock_broadcast.call_count == 3
finally:
loop.close()
def test_uses_broadcast_event_not_append_event_and_broadcast(self):
"""Progress events use broadcast_event (transient), not append_event_and_broadcast (persisted)."""
import asyncio
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
make_audio_progress_logger,
)
ctx = MagicMock()
ctx.log = MagicMock()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
mock_broadcast_event = AsyncMock()
mock_append = AsyncMock()
tasks_created = []
original_create_task = loop.create_task
def capture_create_task(coro):
task = original_create_task(coro)
tasks_created.append(task)
return task
try:
with (
patch(
"reflector.hatchet.broadcast.broadcast_event",
mock_broadcast_event,
),
patch(
"reflector.hatchet.broadcast.append_event_and_broadcast",
mock_append,
),
patch.object(loop, "create_task", side_effect=capture_create_task),
):
callback = make_audio_progress_logger(
ctx, TaskName.MIXDOWN_TRACKS, interval=0.0, transcript_id="t-123"
)
callback(50.0, 100.0)
if tasks_created:
loop.run_until_complete(asyncio.gather(*tasks_created))
# broadcast_event (transient) IS called
mock_broadcast_event.assert_called_once()
# append_event_and_broadcast (persisted) is NOT called
mock_append.assert_not_called()
finally:
loop.close()

View File

@@ -0,0 +1,181 @@
"""Tests for with_error_handling decorator integration with broadcast_dag_status.
The decorator wraps each pipeline task and calls broadcast_dag_status on both
success and failure paths. These tests verify that integration rather than
testing broadcast_dag_status in isolation (which test_dag_progress.py covers).
"""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from reflector.hatchet.constants import TaskName
class TestWithErrorHandlingBroadcast:
"""Test with_error_handling decorator's integration with broadcast_dag_status."""
@pytest.fixture(autouse=True)
def _setup_hatchet_mock(self):
"""Set HatchetClientManager._instance to a mock to prevent real SDK init.
Module-level code in workflow files calls get_client() during import.
Setting _instance before import avoids ClientConfig validation.
"""
from reflector.hatchet.client import HatchetClientManager
original = HatchetClientManager._instance
HatchetClientManager._instance = MagicMock()
yield
HatchetClientManager._instance = original
def _make_input(self, transcript_id: str = "t-123") -> MagicMock:
"""Create a mock PipelineInput with transcript_id."""
inp = MagicMock()
inp.transcript_id = transcript_id
return inp
def _make_ctx(self, workflow_run_id: str = "wf-abc") -> MagicMock:
"""Create a mock Context with workflow_run_id."""
ctx = MagicMock()
ctx.workflow_run_id = workflow_run_id
return ctx
@pytest.mark.asyncio
async def test_calls_broadcast_on_success(self):
"""Decorator calls broadcast_dag_status once when task succeeds."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
with_error_handling,
)
inner = AsyncMock(return_value="ok")
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
with patch(
"reflector.hatchet.dag_progress.broadcast_dag_status",
new_callable=AsyncMock,
) as mock_broadcast:
result = await wrapped(self._make_input(), self._make_ctx())
assert result == "ok"
mock_broadcast.assert_called_once_with("t-123", "wf-abc")
@pytest.mark.asyncio
async def test_calls_broadcast_on_failure(self):
"""Decorator calls broadcast_dag_status once when task raises."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
with_error_handling,
)
inner = AsyncMock(side_effect=RuntimeError("boom"))
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
with (
patch(
"reflector.hatchet.dag_progress.broadcast_dag_status",
new_callable=AsyncMock,
) as mock_broadcast,
patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
new_callable=AsyncMock,
),
):
with pytest.raises(RuntimeError, match="boom"):
await wrapped(self._make_input(), self._make_ctx())
mock_broadcast.assert_called_once_with("t-123", "wf-abc")
@pytest.mark.asyncio
async def test_swallows_broadcast_exception_on_success(self):
"""Broadcast failure does not crash the task on the success path."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
with_error_handling,
)
inner = AsyncMock(return_value="ok")
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
with patch(
"reflector.hatchet.dag_progress.broadcast_dag_status",
new_callable=AsyncMock,
side_effect=RuntimeError("broadcast exploded"),
):
result = await wrapped(self._make_input(), self._make_ctx())
assert result == "ok"
@pytest.mark.asyncio
async def test_swallows_broadcast_exception_on_failure(self):
"""Original task exception propagates even when broadcast also fails."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
with_error_handling,
)
inner = AsyncMock(side_effect=ValueError("original error"))
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
with (
patch(
"reflector.hatchet.dag_progress.broadcast_dag_status",
new_callable=AsyncMock,
side_effect=RuntimeError("broadcast exploded"),
),
patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
new_callable=AsyncMock,
),
):
with pytest.raises(ValueError, match="original error"):
await wrapped(self._make_input(), self._make_ctx())
@pytest.mark.asyncio
async def test_calls_set_workflow_error_status_on_failure(self):
"""On task failure with set_error_status=True (default), calls set_workflow_error_status."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
with_error_handling,
)
inner = AsyncMock(side_effect=RuntimeError("boom"))
wrapped = with_error_handling(TaskName.GET_RECORDING)(inner)
with (
patch(
"reflector.hatchet.dag_progress.broadcast_dag_status",
new_callable=AsyncMock,
),
patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
new_callable=AsyncMock,
) as mock_set_error,
):
with pytest.raises(RuntimeError, match="boom"):
await wrapped(self._make_input(), self._make_ctx())
mock_set_error.assert_called_once_with("t-123")
@pytest.mark.asyncio
async def test_no_set_workflow_error_status_when_disabled(self):
"""With set_error_status=False, set_workflow_error_status is NOT called on failure."""
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
with_error_handling,
)
inner = AsyncMock(side_effect=RuntimeError("boom"))
wrapped = with_error_handling(TaskName.GET_RECORDING, set_error_status=False)(
inner
)
with (
patch(
"reflector.hatchet.dag_progress.broadcast_dag_status",
new_callable=AsyncMock,
),
patch(
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
new_callable=AsyncMock,
) as mock_set_error,
):
with pytest.raises(RuntimeError, match="boom"):
await wrapped(self._make_input(), self._make_ctx())
mock_set_error.assert_not_called()

View File

@@ -0,0 +1,421 @@
"""Tests for DAG status REST enrichment on search and transcript GET endpoints."""
from datetime import datetime, timezone
from types import SimpleNamespace
from unittest.mock import AsyncMock, patch
import pytest
import reflector.db.search as search_module
from reflector.db.search import SearchResult, _fetch_dag_statuses
from reflector.db.transcripts import TranscriptEvent
class TestFetchDagStatuses:
"""Test the _fetch_dag_statuses helper."""
@pytest.mark.asyncio
async def test_returns_empty_for_empty_ids(self):
result = await _fetch_dag_statuses([])
assert result == {}
@pytest.mark.asyncio
async def test_extracts_last_dag_status(self):
events = [
{"event": "STATUS", "data": {"value": "processing"}},
{
"event": "DAG_STATUS",
"data": {
"workflow_run_id": "r1",
"tasks": [{"name": "get_recording", "status": "completed"}],
},
},
{
"event": "DAG_STATUS",
"data": {
"workflow_run_id": "r1",
"tasks": [
{"name": "get_recording", "status": "completed"},
{"name": "process_tracks", "status": "running"},
],
},
},
]
mock_row = {"id": "t1", "events": events}
with patch("reflector.db.search.get_database") as mock_db:
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
result = await _fetch_dag_statuses(["t1"])
assert "t1" in result
assert len(result["t1"]) == 2 # Last DAG_STATUS had 2 tasks
@pytest.mark.asyncio
async def test_skips_transcripts_without_events(self):
mock_row = {"id": "t1", "events": None}
with patch("reflector.db.search.get_database") as mock_db:
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
result = await _fetch_dag_statuses(["t1"])
assert result == {}
@pytest.mark.asyncio
async def test_skips_transcripts_without_dag_status(self):
events = [
{"event": "STATUS", "data": {"value": "processing"}},
{"event": "DURATION", "data": {"duration": 1000}},
]
mock_row = {"id": "t1", "events": events}
with patch("reflector.db.search.get_database") as mock_db:
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
result = await _fetch_dag_statuses(["t1"])
assert result == {}
@pytest.mark.asyncio
async def test_handles_json_string_events(self):
"""Events stored as JSON string rather than already-parsed list."""
import json
events = [
{
"event": "DAG_STATUS",
"data": {
"workflow_run_id": "r1",
"tasks": [{"name": "transcribe", "status": "running"}],
},
},
]
mock_row = {"id": "t1", "events": json.dumps(events)}
with patch("reflector.db.search.get_database") as mock_db:
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
result = await _fetch_dag_statuses(["t1"])
assert "t1" in result
assert len(result["t1"]) == 1
assert result["t1"][0]["name"] == "transcribe"
@pytest.mark.asyncio
async def test_multiple_transcripts(self):
"""Handles multiple transcripts in one call."""
events_t1 = [
{
"event": "DAG_STATUS",
"data": {
"workflow_run_id": "r1",
"tasks": [{"name": "a", "status": "completed"}],
},
},
]
events_t2 = [
{
"event": "DAG_STATUS",
"data": {
"workflow_run_id": "r2",
"tasks": [{"name": "b", "status": "running"}],
},
},
]
mock_rows = [
{"id": "t1", "events": events_t1},
{"id": "t2", "events": events_t2},
]
with patch("reflector.db.search.get_database") as mock_db:
mock_db.return_value.fetch_all = AsyncMock(return_value=mock_rows)
result = await _fetch_dag_statuses(["t1", "t2"])
assert "t1" in result
assert "t2" in result
assert result["t1"][0]["name"] == "a"
assert result["t2"][0]["name"] == "b"
@pytest.mark.asyncio
async def test_dag_status_without_tasks_key_skipped(self):
"""DAG_STATUS event with no tasks key in data should be skipped."""
events = [
{"event": "DAG_STATUS", "data": {"workflow_run_id": "r1"}},
]
mock_row = {"id": "t1", "events": events}
with patch("reflector.db.search.get_database") as mock_db:
mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row])
result = await _fetch_dag_statuses(["t1"])
assert result == {}
def _extract_dag_status_from_transcript(transcript):
"""Replicate the dag_status extraction logic from transcript_get view.
This mirrors the code in reflector/views/transcripts.py lines 495-500:
dag_status = None
if transcript.status == "processing" and transcript.events:
for ev in reversed(transcript.events):
if ev.event == "DAG_STATUS":
dag_status = ev.data.get("tasks") if isinstance(ev.data, dict) else None
break
"""
dag_status = None
if transcript.status == "processing" and transcript.events:
for ev in reversed(transcript.events):
if ev.event == "DAG_STATUS":
dag_status = ev.data.get("tasks") if isinstance(ev.data, dict) else None
break
return dag_status
class TestTranscriptGetDagStatusExtraction:
"""Test dag_status extraction logic from transcript_get endpoint.
The actual endpoint is complex to set up, so we test the extraction
logic directly using the same code pattern from the view.
"""
def test_processing_transcript_with_dag_status_events(self):
"""Processing transcript with DAG_STATUS events returns tasks from last event."""
transcript = SimpleNamespace(
status="processing",
events=[
TranscriptEvent(event="STATUS", data={"value": "processing"}),
TranscriptEvent(
event="DAG_STATUS",
data={
"workflow_run_id": "r1",
"tasks": [{"name": "get_recording", "status": "completed"}],
},
),
TranscriptEvent(
event="DAG_STATUS",
data={
"workflow_run_id": "r1",
"tasks": [
{"name": "get_recording", "status": "completed"},
{"name": "transcribe", "status": "running"},
],
},
),
],
)
result = _extract_dag_status_from_transcript(transcript)
assert result is not None
assert len(result) == 2
assert result[0]["name"] == "get_recording"
assert result[1]["name"] == "transcribe"
assert result[1]["status"] == "running"
def test_processing_transcript_without_dag_status_events(self):
"""Processing transcript with only non-DAG_STATUS events returns None."""
transcript = SimpleNamespace(
status="processing",
events=[
TranscriptEvent(event="STATUS", data={"value": "processing"}),
TranscriptEvent(event="DURATION", data={"duration": 1000}),
],
)
result = _extract_dag_status_from_transcript(transcript)
assert result is None
def test_ended_transcript_with_dag_status_events(self):
"""Ended transcript with DAG_STATUS events returns None (status check)."""
transcript = SimpleNamespace(
status="ended",
events=[
TranscriptEvent(
event="DAG_STATUS",
data={
"workflow_run_id": "r1",
"tasks": [{"name": "transcribe", "status": "completed"}],
},
),
],
)
result = _extract_dag_status_from_transcript(transcript)
assert result is None
def test_processing_transcript_with_empty_events(self):
"""Processing transcript with empty events list returns None."""
transcript = SimpleNamespace(
status="processing",
events=[],
)
result = _extract_dag_status_from_transcript(transcript)
assert result is None
def test_processing_transcript_with_none_events(self):
"""Processing transcript with None events returns None."""
transcript = SimpleNamespace(
status="processing",
events=None,
)
result = _extract_dag_status_from_transcript(transcript)
assert result is None
def test_extracts_last_dag_status_not_first(self):
"""Should pick the last DAG_STATUS event (most recent), not the first."""
transcript = SimpleNamespace(
status="processing",
events=[
TranscriptEvent(
event="DAG_STATUS",
data={
"workflow_run_id": "r1",
"tasks": [{"name": "a", "status": "running"}],
},
),
TranscriptEvent(event="STATUS", data={"value": "processing"}),
TranscriptEvent(
event="DAG_STATUS",
data={
"workflow_run_id": "r1",
"tasks": [
{"name": "a", "status": "completed"},
{"name": "b", "status": "running"},
],
},
),
],
)
result = _extract_dag_status_from_transcript(transcript)
assert len(result) == 2
assert result[0]["status"] == "completed"
assert result[1]["name"] == "b"
class TestSearchEnrichmentIntegration:
"""Test DAG status enrichment in search results.
The search function enriches processing transcripts with dag_status
by calling _fetch_dag_statuses for processing IDs and assigning results.
We test this enrichment logic by mocking _fetch_dag_statuses.
"""
def _make_search_result(self, id: str, status: str) -> SearchResult:
"""Create a minimal SearchResult for testing."""
return SearchResult(
id=id,
title=f"Transcript {id}",
user_id="u1",
room_id=None,
room_name=None,
source_kind="live",
created_at=datetime(2024, 1, 1, tzinfo=timezone.utc),
status=status,
rank=1.0,
duration=60.0,
search_snippets=[],
total_match_count=0,
dag_status=None,
)
@pytest.mark.asyncio
async def test_processing_result_gets_dag_status(self):
"""SearchResult with status='processing' and matching DAG_STATUS events
gets dag_status populated."""
results = [self._make_search_result("t1", "processing")]
dag_tasks = [
{"name": "get_recording", "status": "completed"},
{"name": "transcribe", "status": "running"},
]
with patch.object(
search_module,
"_fetch_dag_statuses",
new_callable=AsyncMock,
return_value={"t1": dag_tasks},
) as mock_fetch:
# Replicate the enrichment logic from SearchController.search_transcripts
processing_ids = [r.id for r in results if r.status == "processing"]
if processing_ids:
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
for r in results:
if r.id in dag_statuses:
r.dag_status = dag_statuses[r.id]
mock_fetch.assert_called_once_with(["t1"])
assert results[0].dag_status == dag_tasks
@pytest.mark.asyncio
async def test_ended_result_does_not_trigger_fetch(self):
"""SearchResult with status='ended' does NOT trigger _fetch_dag_statuses."""
results = [self._make_search_result("t1", "ended")]
with patch.object(
search_module,
"_fetch_dag_statuses",
new_callable=AsyncMock,
return_value={},
) as mock_fetch:
processing_ids = [r.id for r in results if r.status == "processing"]
if processing_ids:
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
for r in results:
if r.id in dag_statuses:
r.dag_status = dag_statuses[r.id]
mock_fetch.assert_not_called()
assert results[0].dag_status is None
@pytest.mark.asyncio
async def test_mixed_processing_and_ended_results(self):
"""Only processing results get enriched; ended results stay None."""
results = [
self._make_search_result("t1", "processing"),
self._make_search_result("t2", "ended"),
self._make_search_result("t3", "processing"),
]
dag_tasks_t1 = [{"name": "transcribe", "status": "running"}]
dag_tasks_t3 = [{"name": "diarize", "status": "completed"}]
with patch.object(
search_module,
"_fetch_dag_statuses",
new_callable=AsyncMock,
return_value={"t1": dag_tasks_t1, "t3": dag_tasks_t3},
) as mock_fetch:
processing_ids = [r.id for r in results if r.status == "processing"]
if processing_ids:
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
for r in results:
if r.id in dag_statuses:
r.dag_status = dag_statuses[r.id]
mock_fetch.assert_called_once_with(["t1", "t3"])
assert results[0].dag_status == dag_tasks_t1
assert results[1].dag_status is None
assert results[2].dag_status == dag_tasks_t3
@pytest.mark.asyncio
async def test_processing_result_without_dag_events_stays_none(self):
"""Processing result with no DAG_STATUS events in DB stays dag_status=None."""
results = [self._make_search_result("t1", "processing")]
with patch.object(
search_module,
"_fetch_dag_statuses",
new_callable=AsyncMock,
return_value={},
) as mock_fetch:
processing_ids = [r.id for r in results if r.status == "processing"]
if processing_ids:
dag_statuses = await search_module._fetch_dag_statuses(processing_ids)
for r in results:
if r.id in dag_statuses:
r.dag_status = dag_statuses[r.id]
mock_fetch.assert_called_once_with(["t1"])
assert results[0].dag_status is None

View File

@@ -1,6 +1,6 @@
import asyncio
import time
from unittest.mock import patch
from unittest.mock import AsyncMock, patch
import pytest
from httpx import ASGITransport, AsyncClient
@@ -142,17 +142,17 @@ async def test_whereby_recording_uses_file_pipeline(client):
"reflector.services.transcript_process.task_pipeline_file_process"
) as mock_file_pipeline,
patch(
"reflector.services.transcript_process.task_pipeline_multitrack_process"
) as mock_multitrack_pipeline,
"reflector.services.transcript_process.HatchetClientManager"
) as mock_hatchet,
):
response = await client.post(f"/transcripts/{transcript.id}/process")
assert response.status_code == 200
assert response.json()["status"] == "ok"
# Whereby recordings should use file pipeline
# Whereby recordings should use file pipeline, not Hatchet
mock_file_pipeline.delay.assert_called_once_with(transcript_id=transcript.id)
mock_multitrack_pipeline.delay.assert_not_called()
mock_hatchet.start_workflow.assert_not_called()
@pytest.mark.usefixtures("setup_database")
@@ -177,8 +177,6 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
recording_trigger="automatic-2nd-participant",
is_shared=False,
)
# Force Celery backend for test
await rooms_controller.update(room, {"use_celery": True})
transcript = await transcripts_controller.add(
"",
@@ -213,18 +211,23 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
"reflector.services.transcript_process.task_pipeline_file_process"
) as mock_file_pipeline,
patch(
"reflector.services.transcript_process.task_pipeline_multitrack_process"
) as mock_multitrack_pipeline,
"reflector.services.transcript_process.HatchetClientManager"
) as mock_hatchet,
):
mock_hatchet.start_workflow = AsyncMock(return_value="test-workflow-id")
response = await client.post(f"/transcripts/{transcript.id}/process")
assert response.status_code == 200
assert response.json()["status"] == "ok"
# Daily.co multitrack recordings should use multitrack pipeline
mock_multitrack_pipeline.delay.assert_called_once_with(
transcript_id=transcript.id,
bucket_name="daily-bucket",
track_keys=track_keys,
)
# Daily.co multitrack recordings should use Hatchet workflow
mock_hatchet.start_workflow.assert_called_once()
call_kwargs = mock_hatchet.start_workflow.call_args.kwargs
assert call_kwargs["workflow_name"] == "DiarizationPipeline"
assert call_kwargs["input_data"]["transcript_id"] == transcript.id
assert call_kwargs["input_data"]["bucket_name"] == "daily-bucket"
assert call_kwargs["input_data"]["tracks"] == [
{"s3_key": k} for k in track_keys
]
mock_file_pipeline.delay.assert_not_called()

View File

@@ -115,9 +115,7 @@ def appserver(tmpdir, setup_database, celery_session_app, celery_session_worker)
settings.DATA_DIR = DATA_DIR
@pytest.fixture(scope="session")
def celery_includes():
return ["reflector.pipelines.main_live_pipeline"]
# Using celery_includes from conftest.py which includes both pipelines
@pytest.mark.usefixtures("setup_database")

View File

@@ -56,7 +56,12 @@ def appserver_ws_user(setup_database):
if server_instance:
server_instance.should_exit = True
server_thread.join(timeout=30)
server_thread.join(timeout=2.0)
# Reset global singleton for test isolation
from reflector.ws_manager import reset_ws_manager
reset_ws_manager()
@pytest.fixture(autouse=True)
@@ -133,6 +138,8 @@ async def test_user_ws_accepts_valid_token_and_receives_events(appserver_ws_user
# Connect and then trigger an event via HTTP create
async with aconnect_ws(base_ws, subprotocols=subprotocols) as ws:
await asyncio.sleep(0.2)
# Emit an event to the user's room via a standard HTTP action
from httpx import AsyncClient
@@ -150,6 +157,7 @@ async def test_user_ws_accepts_valid_token_and_receives_events(appserver_ws_user
"email": "user-abc@example.com",
}
# Use in-memory client (global singleton makes it share ws_manager)
async with AsyncClient(app=app, base_url=f"http://{host}:{port}/v1") as ac:
# Create a transcript as this user so that the server publishes TRANSCRIPT_CREATED to user room
resp = await ac.post("/transcripts", json={"name": "WS Test"})

View File

@@ -0,0 +1,331 @@
"""WebSocket broadcast delivery tests for STATUS and DAG_STATUS events.
Tests the full chain identified in DEBUG.md:
broadcast_event() → ws_manager.send_json() → Redis/in-memory pub/sub
→ _pubsub_data_reader() → socket.send_json() → WebSocket client
Covers:
1. STATUS event delivery to transcript room WS
2. DAG_STATUS event delivery to transcript room WS
3. Full broadcast_event() chain (requires broadcast.py patching)
4. _pubsub_data_reader resilience when a client disconnects
"""
import asyncio
import threading
import time
import pytest
from httpx import AsyncClient
from httpx_ws import aconnect_ws
from uvicorn import Config, Server
@pytest.fixture
def appserver_ws_broadcast(setup_database, monkeypatch):
"""Start real uvicorn server for WebSocket broadcast tests.
Also patches broadcast.py's get_ws_manager (missing from conftest autouse fixture).
"""
# Patch broadcast.py's get_ws_manager — conftest.py misses this module.
# Without this, broadcast_event() creates a real Redis ws_manager.
import reflector.ws_manager as ws_mod
from reflector.app import app
from reflector.db import get_database
monkeypatch.setattr(
"reflector.hatchet.broadcast.get_ws_manager", ws_mod.get_ws_manager
)
host = "127.0.0.1"
port = 1259
server_started = threading.Event()
server_exception = None
server_instance = None
def run_server():
nonlocal server_exception, server_instance
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
config = Config(app=app, host=host, port=port, loop=loop)
server_instance = Server(config)
async def start_server():
database = get_database()
await database.connect()
try:
await server_instance.serve()
finally:
await database.disconnect()
server_started.set()
loop.run_until_complete(start_server())
except Exception as e:
server_exception = e
server_started.set()
finally:
loop.close()
server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()
server_started.wait(timeout=30)
if server_exception:
raise server_exception
time.sleep(0.5)
yield host, port
if server_instance:
server_instance.should_exit = True
server_thread.join(timeout=2.0)
from reflector.ws_manager import reset_ws_manager
reset_ws_manager()
async def _create_transcript(host: str, port: int, name: str) -> str:
"""Create a transcript via ASGI transport and return its ID."""
from reflector.app import app
async with AsyncClient(app=app, base_url=f"http://{host}:{port}/v1") as ac:
resp = await ac.post("/transcripts", json={"name": name})
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
return resp.json()["id"]
async def _drain_historical_events(ws, timeout: float = 0.5) -> list[dict]:
"""Read all historical events sent on WS connect (non-blocking drain)."""
events = []
deadline = asyncio.get_event_loop().time() + timeout
while asyncio.get_event_loop().time() < deadline:
try:
msg = await asyncio.wait_for(ws.receive_json(), timeout=0.1)
events.append(msg)
except (asyncio.TimeoutError, Exception):
break
return events
# ---------------------------------------------------------------------------
# Test 1: STATUS event delivery via ws_manager.send_json
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_transcript_ws_receives_status_via_send_json(appserver_ws_broadcast):
"""STATUS event published via ws_manager.send_json() arrives at transcript room WS."""
host, port = appserver_ws_broadcast
transcript_id = await _create_transcript(host, port, "Status send_json test")
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
async with aconnect_ws(ws_url) as ws:
await _drain_historical_events(ws)
import reflector.ws_manager as ws_mod
ws_manager = ws_mod.get_ws_manager()
await ws_manager.send_json(
room_id=f"ts:{transcript_id}",
message={"event": "STATUS", "data": {"value": "processing"}},
)
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
assert msg["event"] == "STATUS"
assert msg["data"]["value"] == "processing"
# ---------------------------------------------------------------------------
# Test 2: DAG_STATUS event delivery via ws_manager.send_json
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_transcript_ws_receives_dag_status_via_send_json(appserver_ws_broadcast):
"""DAG_STATUS event published via ws_manager.send_json() arrives at transcript room WS."""
host, port = appserver_ws_broadcast
transcript_id = await _create_transcript(host, port, "DAG_STATUS send_json test")
dag_payload = {
"event": "DAG_STATUS",
"data": {
"workflow_run_id": "test-run-123",
"tasks": [
{
"name": "get_recording",
"status": "completed",
"started_at": "2025-01-01T00:00:00Z",
"finished_at": "2025-01-01T00:00:05Z",
"duration_seconds": 5.0,
"parents": [],
"error": None,
"children_total": None,
"children_completed": None,
"progress_pct": None,
},
{
"name": "process_tracks",
"status": "running",
"started_at": "2025-01-01T00:00:05Z",
"finished_at": None,
"duration_seconds": None,
"parents": ["get_recording"],
"error": None,
"children_total": 3,
"children_completed": 1,
"progress_pct": 33.3,
},
],
},
}
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
async with aconnect_ws(ws_url) as ws:
await _drain_historical_events(ws)
import reflector.ws_manager as ws_mod
ws_manager = ws_mod.get_ws_manager()
await ws_manager.send_json(
room_id=f"ts:{transcript_id}",
message=dag_payload,
)
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
assert msg["event"] == "DAG_STATUS"
assert msg["data"]["workflow_run_id"] == "test-run-123"
assert len(msg["data"]["tasks"]) == 2
assert msg["data"]["tasks"][0]["name"] == "get_recording"
assert msg["data"]["tasks"][0]["status"] == "completed"
assert msg["data"]["tasks"][1]["name"] == "process_tracks"
assert msg["data"]["tasks"][1]["children_completed"] == 1
# ---------------------------------------------------------------------------
# Test 3: Full broadcast_event() chain for STATUS
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_broadcast_event_delivers_status_to_transcript_ws(appserver_ws_broadcast):
"""broadcast_event() end-to-end: STATUS event reaches transcript room WS."""
host, port = appserver_ws_broadcast
transcript_id = await _create_transcript(host, port, "broadcast_event STATUS test")
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
async with aconnect_ws(ws_url) as ws:
await _drain_historical_events(ws)
from reflector.db.transcripts import TranscriptEvent
from reflector.hatchet.broadcast import broadcast_event
from reflector.logger import logger
log = logger.bind(transcript_id=transcript_id)
event = TranscriptEvent(event="STATUS", data={"value": "processing"})
await broadcast_event(transcript_id, event, logger=log)
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
assert msg["event"] == "STATUS"
assert msg["data"]["value"] == "processing"
# ---------------------------------------------------------------------------
# Test 4: Full broadcast_event() chain for DAG_STATUS
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_broadcast_event_delivers_dag_status_to_transcript_ws(
appserver_ws_broadcast,
):
"""broadcast_event() end-to-end: DAG_STATUS event reaches transcript room WS."""
host, port = appserver_ws_broadcast
transcript_id = await _create_transcript(host, port, "broadcast_event DAG test")
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
async with aconnect_ws(ws_url) as ws:
await _drain_historical_events(ws)
from reflector.db.transcripts import TranscriptEvent
from reflector.hatchet.broadcast import broadcast_event
from reflector.logger import logger
log = logger.bind(transcript_id=transcript_id)
event = TranscriptEvent(
event="DAG_STATUS",
data={
"workflow_run_id": "test-run-456",
"tasks": [
{
"name": "get_recording",
"status": "running",
"started_at": None,
"finished_at": None,
"duration_seconds": None,
"parents": [],
"error": None,
"children_total": None,
"children_completed": None,
"progress_pct": None,
}
],
},
)
await broadcast_event(transcript_id, event, logger=log)
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
assert msg["event"] == "DAG_STATUS"
assert msg["data"]["tasks"][0]["name"] == "get_recording"
# ---------------------------------------------------------------------------
# Test 5: Multiple rapid events arrive in order
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_multiple_events_arrive_in_order(appserver_ws_broadcast):
"""Multiple STATUS then DAG_STATUS events arrive in correct order."""
host, port = appserver_ws_broadcast
transcript_id = await _create_transcript(host, port, "ordering test")
ws_url = f"http://{host}:{port}/v1/transcripts/{transcript_id}/events"
async with aconnect_ws(ws_url) as ws:
await _drain_historical_events(ws)
import reflector.ws_manager as ws_mod
ws_manager = ws_mod.get_ws_manager()
await ws_manager.send_json(
room_id=f"ts:{transcript_id}",
message={"event": "STATUS", "data": {"value": "processing"}},
)
await ws_manager.send_json(
room_id=f"ts:{transcript_id}",
message={
"event": "DAG_STATUS",
"data": {"workflow_run_id": "r1", "tasks": []},
},
)
await ws_manager.send_json(
room_id=f"ts:{transcript_id}",
message={
"event": "DAG_STATUS",
"data": {
"workflow_run_id": "r1",
"tasks": [{"name": "a", "status": "running"}],
},
},
)
await ws_manager.send_json(
room_id=f"ts:{transcript_id}",
message={"event": "STATUS", "data": {"value": "ended"}},
)
msgs = []
for _ in range(4):
msg = await asyncio.wait_for(ws.receive_json(), timeout=5.0)
msgs.append(msg)
assert msgs[0]["event"] == "STATUS"
assert msgs[0]["data"]["value"] == "processing"
assert msgs[1]["event"] == "DAG_STATUS"
assert msgs[1]["data"]["tasks"] == []
assert msgs[2]["event"] == "DAG_STATUS"
assert len(msgs[2]["data"]["tasks"]) == 1
assert msgs[3]["event"] == "STATUS"
assert msgs[3]["data"]["value"] == "ended"

45
server/uv.lock generated
View File

@@ -159,21 +159,20 @@ wheels = [
[[package]]
name = "aiortc"
version = "1.13.0"
version = "1.14.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "aioice" },
{ name = "av" },
{ name = "cffi" },
{ name = "cryptography" },
{ name = "google-crc32c" },
{ name = "pyee" },
{ name = "pylibsrtp" },
{ name = "pyopenssl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/62/03/bc947d74c548e0c17cf94e5d5bdacaed0ee9e5b2bb7b8b8cf1ac7a7c01ec/aiortc-1.13.0.tar.gz", hash = "sha256:5d209975c22d0910fb5a0f0e2caa828f2da966c53580f7c7170ac3a16a871620", size = 1179894 }
sdist = { url = "https://files.pythonhosted.org/packages/51/9c/4e027bfe0195de0442da301e2389329496745d40ae44d2d7c4571c4290ce/aiortc-1.14.0.tar.gz", hash = "sha256:adc8a67ace10a085721e588e06a00358ed8eaf5f6b62f0a95358ff45628dd762", size = 1180864 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/87/29/765633cab5f1888890f5f172d1d53009b9b14e079cdfa01a62d9896a9ea9/aiortc-1.13.0-py3-none-any.whl", hash = "sha256:9ccccec98796f6a96bd1c3dd437a06da7e0f57521c96bd56e4b965a91b03a0a0", size = 92910 },
{ url = "https://files.pythonhosted.org/packages/57/ab/31646a49209568cde3b97eeade0d28bb78b400e6645c56422c101df68932/aiortc-1.14.0-py3-none-any.whl", hash = "sha256:4b244d7e482f4e1f67e685b3468269628eca1ec91fa5b329ab517738cfca086e", size = 93183 },
]
[[package]]
@@ -327,28 +326,24 @@ wheels = [
[[package]]
name = "av"
version = "14.4.0"
version = "16.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/86/f6/0b473dab52dfdea05f28f3578b1c56b6c796ce85e76951bab7c4e38d5a74/av-14.4.0.tar.gz", hash = "sha256:3ecbf803a7fdf67229c0edada0830d6bfaea4d10bfb24f0c3f4e607cd1064b42", size = 3892203 }
sdist = { url = "https://files.pythonhosted.org/packages/78/cd/3a83ffbc3cc25b39721d174487fb0d51a76582f4a1703f98e46170ce83d4/av-16.1.0.tar.gz", hash = "sha256:a094b4fd87a3721dacf02794d3d2c82b8d712c85b9534437e82a8a978c175ffd", size = 4285203 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/18/8a/d57418b686ffd05fabd5a0a9cfa97e63b38c35d7101af00e87c51c8cc43c/av-14.4.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b21d5586a88b9fce0ab78e26bd1c38f8642f8e2aad5b35e619f4d202217c701", size = 19965048 },
{ url = "https://files.pythonhosted.org/packages/f5/aa/3f878b0301efe587e9b07bb773dd6b47ef44ca09a3cffb4af50c08a170f3/av-14.4.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:cf8762d90b0f94a20c9f6e25a94f1757db5a256707964dfd0b1d4403e7a16835", size = 23750064 },
{ url = "https://files.pythonhosted.org/packages/9a/b4/6fe94a31f9ed3a927daa72df67c7151968587106f30f9f8fcd792b186633/av-14.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0ac9f08920c7bbe0795319689d901e27cb3d7870b9a0acae3f26fc9daa801a6", size = 33648775 },
{ url = "https://files.pythonhosted.org/packages/6c/f3/7f3130753521d779450c935aec3f4beefc8d4645471159f27b54e896470c/av-14.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a56d9ad2afdb638ec0404e962dc570960aae7e08ae331ad7ff70fbe99a6cf40e", size = 32216915 },
{ url = "https://files.pythonhosted.org/packages/f8/9a/8ffabfcafb42154b4b3a67d63f9b69e68fa8c34cb39ddd5cb813dd049ed4/av-14.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bed513cbcb3437d0ae47743edc1f5b4a113c0b66cdd4e1aafc533abf5b2fbf2", size = 35287279 },
{ url = "https://files.pythonhosted.org/packages/ad/11/7023ba0a2ca94a57aedf3114ab8cfcecb0819b50c30982a4c5be4d31df41/av-14.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d030c2d3647931e53d51f2f6e0fcf465263e7acf9ec6e4faa8dbfc77975318c3", size = 36294683 },
{ url = "https://files.pythonhosted.org/packages/3d/fa/b8ac9636bd5034e2b899354468bef9f4dadb067420a16d8a493a514b7817/av-14.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1cc21582a4f606271d8c2036ec7a6247df0831050306c55cf8a905701d0f0474", size = 34552391 },
{ url = "https://files.pythonhosted.org/packages/fb/29/0db48079c207d1cba7a2783896db5aec3816e17de55942262c244dffbc0f/av-14.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce7c9cd452153d36f1b1478f904ed5f9ab191d76db873bdd3a597193290805d4", size = 37265250 },
{ url = "https://files.pythonhosted.org/packages/1c/55/715858c3feb7efa4d667ce83a829c8e6ee3862e297fb2b568da3f968639d/av-14.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd261e31cc6b43ca722f80656c39934199d8f2eb391e0147e704b6226acebc29", size = 27925845 },
{ url = "https://files.pythonhosted.org/packages/a6/75/b8641653780336c90ba89e5352cac0afa6256a86a150c7703c0b38851c6d/av-14.4.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:a53e682b239dd23b4e3bc9568cfb1168fc629ab01925fdb2e7556eb426339e94", size = 19954125 },
{ url = "https://files.pythonhosted.org/packages/99/e6/37fe6fa5853a48d54d749526365780a63a4bc530be6abf2115e3a21e292a/av-14.4.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5aa0b901751a32703fa938d2155d56ce3faf3630e4a48d238b35d2f7e49e5395", size = 23751479 },
{ url = "https://files.pythonhosted.org/packages/f7/75/9a5f0e6bda5f513b62bafd1cff2b495441a8b07ab7fb7b8e62f0c0d1683f/av-14.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3b316fed3597675fe2aacfed34e25fc9d5bb0196dc8c0b014ae5ed4adda48de", size = 33801401 },
{ url = "https://files.pythonhosted.org/packages/6a/c9/e4df32a2ad1cb7f3a112d0ed610c5e43c89da80b63c60d60e3dc23793ec0/av-14.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a587b5c5014c3c0e16143a0f8d99874e46b5d0c50db6111aa0b54206b5687c81", size = 32364330 },
{ url = "https://files.pythonhosted.org/packages/ca/f0/64e7444a41817fde49a07d0239c033f7e9280bec4a4bb4784f5c79af95e6/av-14.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d53f75e8ac1ec8877a551c0db32a83c0aaeae719d05285281eaaba211bbc30", size = 35519508 },
{ url = "https://files.pythonhosted.org/packages/c2/a8/a370099daa9033a3b6f9b9bd815304b3d8396907a14d09845f27467ba138/av-14.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c8558cfde79dd8fc92d97c70e0f0fa8c94c7a66f68ae73afdf58598f0fe5e10d", size = 36448593 },
{ url = "https://files.pythonhosted.org/packages/27/bb/edb6ceff8fa7259cb6330c51dbfbc98dd1912bd6eb5f7bc05a4bb14a9d6e/av-14.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:455b6410dea0ab2d30234ffb28df7d62ca3cdf10708528e247bec3a4cdcced09", size = 34701485 },
{ url = "https://files.pythonhosted.org/packages/a7/8a/957da1f581aa1faa9a5dfa8b47ca955edb47f2b76b949950933b457bfa1d/av-14.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1661efbe9d975f927b8512d654704223d936f39016fad2ddab00aee7c40f412c", size = 37521981 },
{ url = "https://files.pythonhosted.org/packages/28/76/3f1cf0568592f100fd68eb40ed8c491ce95ca3c1378cc2d4c1f6d1bd295d/av-14.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:fbbeef1f421a3461086853d6464ad5526b56ffe8ccb0ab3fd0a1f121dfbf26ad", size = 27925944 },
{ url = "https://files.pythonhosted.org/packages/48/d0/b71b65d1b36520dcb8291a2307d98b7fc12329a45614a303ff92ada4d723/av-16.1.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:e88ad64ee9d2b9c4c5d891f16c22ae78e725188b8926eb88187538d9dd0b232f", size = 26927747 },
{ url = "https://files.pythonhosted.org/packages/2f/79/720a5a6ccdee06eafa211b945b0a450e3a0b8fc3d12922f0f3c454d870d2/av-16.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cb296073fa6935724de72593800ba86ae49ed48af03960a4aee34f8a611f442b", size = 21492232 },
{ url = "https://files.pythonhosted.org/packages/8e/4f/a1ba8d922f2f6d1a3d52419463ef26dd6c4d43ee364164a71b424b5ae204/av-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:720edd4d25aa73723c1532bb0597806d7b9af5ee34fc02358782c358cfe2f879", size = 39291737 },
{ url = "https://files.pythonhosted.org/packages/1a/31/fc62b9fe8738d2693e18d99f040b219e26e8df894c10d065f27c6b4f07e3/av-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c7f2bc703d0df260a1fdf4de4253c7f5500ca9fc57772ea241b0cb241bcf972e", size = 40846822 },
{ url = "https://files.pythonhosted.org/packages/53/10/ab446583dbce730000e8e6beec6ec3c2753e628c7f78f334a35cad0317f4/av-16.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d69c393809babada7d54964d56099e4b30a3e1f8b5736ca5e27bd7be0e0f3c83", size = 40675604 },
{ url = "https://files.pythonhosted.org/packages/31/d7/1003be685277005f6d63fd9e64904ee222fe1f7a0ea70af313468bb597db/av-16.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:441892be28582356d53f282873c5a951592daaf71642c7f20165e3ddcb0b4c63", size = 42015955 },
{ url = "https://files.pythonhosted.org/packages/2f/4a/fa2a38ee9306bf4579f556f94ecbc757520652eb91294d2a99c7cf7623b9/av-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:273a3e32de64819e4a1cd96341824299fe06f70c46f2288b5dc4173944f0fd62", size = 31750339 },
{ url = "https://files.pythonhosted.org/packages/9c/84/2535f55edcd426cebec02eb37b811b1b0c163f26b8d3f53b059e2ec32665/av-16.1.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:640f57b93f927fba8689f6966c956737ee95388a91bd0b8c8b5e0481f73513d6", size = 26945785 },
{ url = "https://files.pythonhosted.org/packages/b6/17/ffb940c9e490bf42e86db4db1ff426ee1559cd355a69609ec1efe4d3a9eb/av-16.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:ae3fb658eec00852ebd7412fdc141f17f3ddce8afee2d2e1cf366263ad2a3b35", size = 21481147 },
{ url = "https://files.pythonhosted.org/packages/15/c1/e0d58003d2d83c3921887d5c8c9b8f5f7de9b58dc2194356a2656a45cfdc/av-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ee558d9c02a142eebcbe55578a6d817fedfde42ff5676275504e16d07a7f86", size = 39517197 },
{ url = "https://files.pythonhosted.org/packages/32/77/787797b43475d1b90626af76f80bfb0c12cfec5e11eafcfc4151b8c80218/av-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7ae547f6d5fa31763f73900d43901e8c5fa6367bb9a9840978d57b5a7ae14ed2", size = 41174337 },
{ url = "https://files.pythonhosted.org/packages/8e/ac/d90df7f1e3b97fc5554cf45076df5045f1e0a6adf13899e10121229b826c/av-16.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8cf065f9d438e1921dc31fc7aa045790b58aee71736897866420d80b5450f62a", size = 40817720 },
{ url = "https://files.pythonhosted.org/packages/80/6f/13c3a35f9dbcebafd03fe0c4cbd075d71ac8968ec849a3cfce406c35a9d2/av-16.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a345877a9d3cc0f08e2bc4ec163ee83176864b92587afb9d08dff50f37a9a829", size = 42267396 },
{ url = "https://files.pythonhosted.org/packages/c8/b9/275df9607f7fb44317ccb1d4be74827185c0d410f52b6e2cd770fe209118/av-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f49243b1d27c91cd8c66fdba90a674e344eb8eb917264f36117bf2b6879118fd", size = 31752045 },
]
[[package]]
@@ -3267,7 +3262,7 @@ requires-dist = [
{ name = "aiohttp-cors", specifier = ">=0.7.0" },
{ name = "aiortc", specifier = ">=1.5.0" },
{ name = "alembic", specifier = ">=1.11.3" },
{ name = "av", specifier = ">=10.0.0" },
{ name = "av", specifier = ">=15.0.0" },
{ name = "celery", specifier = ">=5.3.4" },
{ name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
{ name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },

View File

@@ -0,0 +1,61 @@
import React from "react";
import { Box, Flex } from "@chakra-ui/react";
import type { DagTask } from "../../../lib/UserEventsProvider";
const pulseKeyframes = `
@keyframes dagDotPulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.3; }
}
`;
function humanizeTaskName(name: string): string {
return name
.split("_")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(" ");
}
function dotProps(status: DagTask["status"]): Record<string, unknown> {
switch (status) {
case "completed":
return { bg: "green.500" };
case "running":
return {
bg: "blue.500",
style: { animation: "dagDotPulse 1.5s ease-in-out infinite" },
};
case "failed":
return { bg: "red.500" };
case "cancelled":
return { bg: "gray.400" };
case "queued":
default:
return {
bg: "transparent",
border: "1px solid",
borderColor: "gray.400",
};
}
}
export default function DagProgressDots({ tasks }: { tasks: DagTask[] }) {
return (
<>
<style>{pulseKeyframes}</style>
<Flex gap="2px" alignItems="center" flexWrap="wrap">
{tasks.map((task) => (
<Box
key={task.name}
w="4px"
h="4px"
borderRadius="full"
flexShrink={0}
title={humanizeTaskName(task.name)}
{...dotProps(task.status)}
/>
))}
</Flex>
</>
);
}

View File

@@ -19,6 +19,7 @@ import {
generateTextFragment,
} from "../../../lib/textHighlight";
import type { components } from "../../../reflector-api";
import type { DagTask } from "../../../lib/UserEventsProvider";
type SearchResult = components["schemas"]["SearchResult"];
type SourceKind = components["schemas"]["SourceKind"];
@@ -29,6 +30,7 @@ interface TranscriptCardsProps {
isLoading?: boolean;
onDelete: (transcriptId: string) => void;
onReprocess: (transcriptId: string) => void;
dagStatusMap?: Map<string, DagTask[]>;
}
function highlightText(text: string, query: string): React.ReactNode {
@@ -102,11 +104,13 @@ function TranscriptCard({
query,
onDelete,
onReprocess,
dagStatusMap,
}: {
result: SearchResult;
query: string;
onDelete: (transcriptId: string) => void;
onReprocess: (transcriptId: string) => void;
dagStatusMap?: Map<string, DagTask[]>;
}) {
const [isExpanded, setIsExpanded] = useState(false);
@@ -137,7 +141,16 @@ function TranscriptCard({
<Box borderWidth={1} p={4} borderRadius="md" fontSize="sm">
<Flex justify="space-between" alignItems="flex-start" gap="2">
<Box>
<TranscriptStatusIcon status={result.status} />
<TranscriptStatusIcon
status={result.status}
dagStatus={
dagStatusMap?.get(result.id) ??
((result as Record<string, unknown>).dag_status as
| DagTask[]
| null) ??
null
}
/>
</Box>
<Box flex="1">
{/* Title with highlighting and text fragment for deep linking */}
@@ -284,6 +297,7 @@ export default function TranscriptCards({
isLoading,
onDelete,
onReprocess,
dagStatusMap,
}: TranscriptCardsProps) {
return (
<Box position="relative">
@@ -315,6 +329,7 @@ export default function TranscriptCards({
query={query}
onDelete={onDelete}
onReprocess={onReprocess}
dagStatusMap={dagStatusMap}
/>
))}
</Stack>

View File

@@ -8,13 +8,17 @@ import {
FaGear,
} from "react-icons/fa6";
import { TranscriptStatus } from "../../../lib/transcript";
import type { DagTask } from "../../../lib/UserEventsProvider";
import DagProgressDots from "./DagProgressDots";
interface TranscriptStatusIconProps {
status: TranscriptStatus;
dagStatus?: DagTask[] | null;
}
export default function TranscriptStatusIcon({
status,
dagStatus,
}: TranscriptStatusIconProps) {
switch (status) {
case "ended":
@@ -36,6 +40,9 @@ export default function TranscriptStatusIcon({
</Box>
);
case "processing":
if (dagStatus && dagStatus.length > 0) {
return <DagProgressDots tasks={dagStatus} />;
}
return (
<Box as="span" title="Processing in progress">
<Icon color="gray.500" as={FaGear} />

View File

@@ -43,6 +43,7 @@ import DeleteTranscriptDialog from "./_components/DeleteTranscriptDialog";
import { formatLocalDate } from "../../lib/time";
import { RECORD_A_MEETING_URL } from "../../api/urls";
import { useUserName } from "../../lib/useUserName";
import { useDagStatusMap } from "../../lib/UserEventsProvider";
const SEARCH_FORM_QUERY_INPUT_NAME = "query" as const;
@@ -273,6 +274,7 @@ export default function TranscriptBrowser() {
}, [JSON.stringify(searchFilters)]);
const userName = useUserName();
const dagStatusMap = useDagStatusMap();
const [deletionLoading, setDeletionLoading] = useState(false);
const cancelRef = React.useRef(null);
const [transcriptToDeleteId, setTranscriptToDeleteId] =
@@ -408,6 +410,7 @@ export default function TranscriptBrowser() {
isLoading={searchLoading}
onDelete={setTranscriptToDeleteId}
onReprocess={handleProcessTranscript}
dagStatusMap={dagStatusMap}
/>
{!searchLoading && results.length === 0 && (

View File

@@ -302,10 +302,10 @@ export default function RoomsList() {
return;
}
const platform: "whereby" | "daily" | null =
const platform: "whereby" | "daily" =
room.platform === "whereby" || room.platform === "daily"
? room.platform
: null;
: "daily";
const roomData = {
name: room.name,

View File

@@ -16,6 +16,7 @@ import {
import { useError } from "../../../../(errors)/errorContext";
import { useRouter } from "next/navigation";
import { Box, Grid } from "@chakra-ui/react";
import { parseNonEmptyString } from "../../../../lib/utils";
export type TranscriptCorrect = {
params: Promise<{
@@ -25,8 +26,7 @@ export type TranscriptCorrect = {
export default function TranscriptCorrect(props: TranscriptCorrect) {
const params = use(props.params);
const { transcriptId } = params;
const transcriptId = parseNonEmptyString(params.transcriptId);
const updateTranscriptMutation = useTranscriptUpdate();
const transcript = useTranscriptGet(transcriptId);

View File

@@ -9,7 +9,9 @@ import React, { useEffect, useState, use } from "react";
import FinalSummary from "./finalSummary";
import TranscriptTitle from "../transcriptTitle";
import Player from "../player";
import { useWebSockets } from "../useWebSockets";
import { useRouter } from "next/navigation";
import { parseNonEmptyString } from "../../../lib/utils";
import {
Box,
Flex,
@@ -30,7 +32,7 @@ type TranscriptDetails = {
export default function TranscriptDetails(details: TranscriptDetails) {
const params = use(details.params);
const transcriptId = params.transcriptId;
const transcriptId = parseNonEmptyString(params.transcriptId);
const router = useRouter();
const statusToRedirect = [
"idle",
@@ -49,6 +51,7 @@ export default function TranscriptDetails(details: TranscriptDetails) {
transcriptId,
waiting || mp3.audioDeleted === true,
);
useWebSockets(transcriptId);
const useActiveTopic = useState<Topic | null>(null);
const [finalSummaryElement, setFinalSummaryElement] =
useState<HTMLDivElement | null>(null);

View File

@@ -0,0 +1,190 @@
"use client";
import { useEffect, useState } from "react";
import { Table, Box, Icon, Spinner, Text, Badge } from "@chakra-ui/react";
import { FaCheck, FaXmark, FaClock, FaMinus } from "react-icons/fa6";
import type { DagTask, DagTaskStatus } from "../../useWebSockets";
function humanizeTaskName(name: string): string {
return name
.split("_")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(" ");
}
function formatDuration(seconds: number): string {
if (seconds < 60) {
return `${Math.round(seconds)}s`;
}
const minutes = Math.floor(seconds / 60);
const remainingSeconds = Math.round(seconds % 60);
return `${minutes}m ${remainingSeconds}s`;
}
function StatusIcon({ status }: { status: DagTaskStatus }) {
switch (status) {
case "completed":
return (
<Box as="span" title="Completed">
<Icon color="green.500" as={FaCheck} />
</Box>
);
case "running":
return <Spinner size="sm" color="blue.500" />;
case "failed":
return (
<Box as="span" title="Failed">
<Icon color="red.500" as={FaXmark} />
</Box>
);
case "queued":
return (
<Box as="span" title="Queued">
<Icon color="gray.400" as={FaClock} />
</Box>
);
case "cancelled":
return (
<Box as="span" title="Cancelled">
<Icon color="gray.400" as={FaMinus} />
</Box>
);
default:
return null;
}
}
function ElapsedTimer({ startedAt }: { startedAt: string }) {
const [elapsed, setElapsed] = useState<number>(() => {
return (Date.now() - new Date(startedAt).getTime()) / 1000;
});
useEffect(() => {
const interval = setInterval(() => {
setElapsed((Date.now() - new Date(startedAt).getTime()) / 1000);
}, 1000);
return () => clearInterval(interval);
}, [startedAt]);
return <Text fontSize="sm">{formatDuration(elapsed)}</Text>;
}
function DurationCell({ task }: { task: DagTask }) {
if (task.status === "completed" && task.duration_seconds !== null) {
return <Text fontSize="sm">{formatDuration(task.duration_seconds)}</Text>;
}
if (task.status === "running" && task.started_at) {
return <ElapsedTimer startedAt={task.started_at} />;
}
return (
<Text fontSize="sm" color="gray.400">
--
</Text>
);
}
function ProgressCell({ task }: { task: DagTask }) {
if (task.progress_pct === null && task.children_total === null) {
return null;
}
return (
<Box>
{task.progress_pct !== null && (
<Box
w="100%"
h="6px"
bg="gray.200"
borderRadius="full"
overflow="hidden"
>
<Box
h="100%"
w={`${Math.min(100, Math.max(0, task.progress_pct))}%`}
bg={task.status === "failed" ? "red.400" : "blue.400"}
borderRadius="full"
transition="width 0.3s ease"
/>
</Box>
)}
{task.children_total !== null && (
<Badge
size="sm"
colorPalette="gray"
mt={task.progress_pct !== null ? 1 : 0}
>
{task.children_completed ?? 0}/{task.children_total}
</Badge>
)}
</Box>
);
}
function TaskRow({ task }: { task: DagTask }) {
const [expanded, setExpanded] = useState(false);
const hasFailed = task.status === "failed" && task.error;
return (
<>
<Table.Row
cursor={hasFailed ? "pointer" : "default"}
onClick={hasFailed ? () => setExpanded((prev) => !prev) : undefined}
_hover={hasFailed ? { bg: "gray.50" } : undefined}
>
<Table.Cell>
<Text fontSize="sm" fontWeight="medium">
{humanizeTaskName(task.name)}
</Text>
</Table.Cell>
<Table.Cell>
<StatusIcon status={task.status} />
</Table.Cell>
<Table.Cell>
<DurationCell task={task} />
</Table.Cell>
<Table.Cell>
<ProgressCell task={task} />
</Table.Cell>
</Table.Row>
{hasFailed && expanded && (
<Table.Row>
<Table.Cell colSpan={4}>
<Box bg="red.50" p={3} borderRadius="md">
<Text fontSize="xs" color="red.700" whiteSpace="pre-wrap">
{task.error}
</Text>
</Box>
</Table.Cell>
</Table.Row>
)}
</>
);
}
export default function DagProgressTable({ tasks }: { tasks: DagTask[] }) {
return (
<Box w="100%" overflowX="auto">
<Table.Root size="sm">
<Table.Header>
<Table.Row>
<Table.ColumnHeader fontWeight="600">Task</Table.ColumnHeader>
<Table.ColumnHeader fontWeight="600" width="80px">
Status
</Table.ColumnHeader>
<Table.ColumnHeader fontWeight="600" width="100px">
Duration
</Table.ColumnHeader>
<Table.ColumnHeader fontWeight="600" width="140px">
Progress
</Table.ColumnHeader>
</Table.Row>
</Table.Header>
<Table.Body>
{tasks.map((task) => (
<TaskRow key={task.name} task={task} />
))}
</Table.Body>
</Table.Root>
</Box>
);
}

View File

@@ -10,6 +10,11 @@ import {
} from "@chakra-ui/react";
import { useRouter } from "next/navigation";
import { useTranscriptGet } from "../../../../lib/apiHooks";
import { parseNonEmptyString } from "../../../../lib/utils";
import { useWebSockets } from "../../useWebSockets";
import type { DagTask } from "../../useWebSockets";
import { useDagStatusMap } from "../../../../lib/UserEventsProvider";
import DagProgressTable from "./DagProgressTable";
type TranscriptProcessing = {
params: Promise<{
@@ -19,13 +24,25 @@ type TranscriptProcessing = {
export default function TranscriptProcessing(details: TranscriptProcessing) {
const params = use(details.params);
const transcriptId = params.transcriptId;
const transcriptId = parseNonEmptyString(params.transcriptId);
const router = useRouter();
const transcript = useTranscriptGet(transcriptId);
const { status: wsStatus, dagStatus: wsDagStatus } =
useWebSockets(transcriptId);
const userDagStatusMap = useDagStatusMap();
const userDagStatus = userDagStatusMap.get(transcriptId) ?? null;
const restDagStatus: DagTask[] | null =
((transcript.data as Record<string, unknown>)?.dag_status as
| DagTask[]
| null) ?? null;
// Prefer transcript room WS (most granular), then user room WS, then REST
const dagStatus = wsDagStatus ?? userDagStatus ?? restDagStatus;
useEffect(() => {
const status = transcript.data?.status;
const status = wsStatus?.value ?? transcript.data?.status;
if (!status) return;
if (status === "ended" || status === "error") {
@@ -40,6 +57,7 @@ export default function TranscriptProcessing(details: TranscriptProcessing) {
router.replace(dest);
}
}, [
wsStatus?.value,
transcript.data?.status,
transcript.data?.source_kind,
router,
@@ -73,11 +91,29 @@ export default function TranscriptProcessing(details: TranscriptProcessing) {
w={{ base: "full", md: "container.xl" }}
>
<Center h={"full"} w="full">
<VStack gap={10} bg="gray.100" p={10} borderRadius="md" maxW="500px">
<Spinner size="xl" color="blue.500" />
<Heading size={"md"} textAlign="center">
Processing recording
</Heading>
<VStack
gap={10}
bg="gray.100"
p={10}
borderRadius="md"
maxW="600px"
w="full"
>
{dagStatus ? (
<>
<Heading size={"md"} textAlign="center">
Processing recording
</Heading>
<DagProgressTable tasks={dagStatus} />
</>
) : (
<>
<Spinner size="xl" color="blue.500" />
<Heading size={"md"} textAlign="center">
Processing recording
</Heading>
</>
)}
<Text color="gray.600" textAlign="center">
You can safely return to the library while your recording is being
processed.

View File

@@ -12,6 +12,7 @@ import { Box, Text, Grid, Heading, VStack, Flex } from "@chakra-ui/react";
import LiveTrancription from "../../liveTranscription";
import { useTranscriptGet } from "../../../../lib/apiHooks";
import { TranscriptStatus } from "../../../../lib/transcript";
import { parseNonEmptyString } from "../../../../lib/utils";
type TranscriptDetails = {
params: Promise<{
@@ -21,13 +22,14 @@ type TranscriptDetails = {
const TranscriptRecord = (details: TranscriptDetails) => {
const params = use(details.params);
const transcript = useTranscriptGet(params.transcriptId);
const transcriptId = parseNonEmptyString(params.transcriptId);
const transcript = useTranscriptGet(transcriptId);
const [transcriptStarted, setTranscriptStarted] = useState(false);
const useActiveTopic = useState<Topic | null>(null);
const webSockets = useWebSockets(params.transcriptId);
const webSockets = useWebSockets(transcriptId);
const mp3 = useMp3(params.transcriptId, true);
const mp3 = useMp3(transcriptId, true);
const router = useRouter();

View File

@@ -7,6 +7,7 @@ import useMp3 from "../../useMp3";
import { Center, VStack, Text, Heading } from "@chakra-ui/react";
import FileUploadButton from "../../fileUploadButton";
import { useTranscriptGet } from "../../../../lib/apiHooks";
import { parseNonEmptyString } from "../../../../lib/utils";
type TranscriptUpload = {
params: Promise<{
@@ -16,12 +17,13 @@ type TranscriptUpload = {
const TranscriptUpload = (details: TranscriptUpload) => {
const params = use(details.params);
const transcript = useTranscriptGet(params.transcriptId);
const transcriptId = parseNonEmptyString(params.transcriptId);
const transcript = useTranscriptGet(transcriptId);
const [transcriptStarted, setTranscriptStarted] = useState(false);
const webSockets = useWebSockets(params.transcriptId);
const webSockets = useWebSockets(transcriptId);
const mp3 = useMp3(params.transcriptId, true);
const mp3 = useMp3(transcriptId, true);
const router = useRouter();

View File

@@ -1,5 +1,6 @@
import { useState } from "react";
import type { components } from "../../reflector-api";
import { parseMaybeNonEmptyString } from "../../lib/utils";
type UpdateTranscript = components["schemas"]["UpdateTranscript"];
type GetTranscriptWithParticipants =
@@ -32,7 +33,7 @@ const TranscriptTitle = (props: TranscriptTitle) => {
const [isEditing, setIsEditing] = useState(false);
const updateTranscriptMutation = useTranscriptUpdate();
const participantsQuery = useTranscriptParticipants(
props.transcript?.id || null,
props.transcript?.id ? parseMaybeNonEmptyString(props.transcript.id) : null,
);
const updateTitle = async (newTitle: string, transcriptId: string) => {

View File

@@ -1,5 +1,6 @@
import { useEffect, useState } from "react";
import { useTranscriptGet } from "../../lib/apiHooks";
import { parseMaybeNonEmptyString } from "../../lib/utils";
import { useAuth } from "../../lib/AuthProvider";
import { API_URL } from "../../lib/apiClient";
@@ -27,7 +28,7 @@ const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => {
data: transcript,
isLoading: transcriptMetadataLoading,
error: transcriptError,
} = useTranscriptGet(later ? null : transcriptId);
} = useTranscriptGet(later ? null : parseMaybeNonEmptyString(transcriptId));
const [serviceWorker, setServiceWorker] =
useState<ServiceWorkerRegistration | null>(null);

View File

@@ -1,6 +1,7 @@
import type { components } from "../../reflector-api";
type Participant = components["schemas"]["Participant"];
import { useTranscriptParticipants } from "../../lib/apiHooks";
import { parseMaybeNonEmptyString } from "../../lib/utils";
type ErrorParticipants = {
error: Error;
@@ -32,7 +33,7 @@ const useParticipants = (transcriptId: string): UseParticipants => {
isLoading: loading,
error,
refetch,
} = useTranscriptParticipants(transcriptId || null);
} = useTranscriptParticipants(parseMaybeNonEmptyString(transcriptId));
// Type-safe return based on state
if (error) {

View File

@@ -1,5 +1,6 @@
import type { components } from "../../reflector-api";
import { useTranscriptTopicsWithWordsPerSpeaker } from "../../lib/apiHooks";
import { parseMaybeNonEmptyString } from "../../lib/utils";
type GetTranscriptTopicWithWordsPerSpeaker =
components["schemas"]["GetTranscriptTopicWithWordsPerSpeaker"];
@@ -38,7 +39,7 @@ const useTopicWithWords = (
error,
refetch,
} = useTranscriptTopicsWithWordsPerSpeaker(
transcriptId || null,
parseMaybeNonEmptyString(transcriptId),
topicId || null,
);

View File

@@ -1,5 +1,6 @@
import { useTranscriptTopics } from "../../lib/apiHooks";
import type { components } from "../../reflector-api";
import { parseMaybeNonEmptyString } from "../../lib/utils";
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
@@ -10,7 +11,11 @@ type TranscriptTopics = {
};
const useTopics = (id: string): TranscriptTopics => {
const { data: topics, isLoading: loading, error } = useTranscriptTopics(id);
const {
data: topics,
isLoading: loading,
error,
} = useTranscriptTopics(parseMaybeNonEmptyString(id));
return {
topics: topics || null,

View File

@@ -1,5 +1,6 @@
import type { components } from "../../reflector-api";
import { useTranscriptWaveform } from "../../lib/apiHooks";
import { parseMaybeNonEmptyString } from "../../lib/utils";
type AudioWaveform = components["schemas"]["AudioWaveform"];
@@ -14,7 +15,7 @@ const useWaveform = (id: string, skip: boolean): AudioWaveFormResponse => {
data: waveform,
isLoading: loading,
error,
} = useTranscriptWaveform(skip ? null : id);
} = useTranscriptWaveform(skip ? null : parseMaybeNonEmptyString(id));
return {
waveform: waveform || null,

View File

@@ -7,6 +7,15 @@ type GetTranscriptSegmentTopic =
components["schemas"]["GetTranscriptSegmentTopic"];
import { useQueryClient } from "@tanstack/react-query";
import { $api, WEBSOCKET_URL } from "../../lib/apiClient";
import {
invalidateTranscript,
invalidateTranscriptTopics,
invalidateTranscriptWaveform,
} from "../../lib/apiHooks";
import { NonEmptyString } from "../../lib/utils";
import type { DagTask } from "../../lib/dagTypes";
export type { DagTask, DagTaskStatus } from "../../lib/dagTypes";
export type UseWebSockets = {
transcriptTextLive: string;
@@ -18,6 +27,7 @@ export type UseWebSockets = {
status: Status | null;
waveform: AudioWaveform | null;
duration: number | null;
dagStatus: DagTask[] | null;
};
export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
@@ -34,6 +44,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
summary: "",
});
const [status, setStatus] = useState<Status | null>(null);
const [dagStatus, setDagStatus] = useState<DagTask[] | null>(null);
const { setError } = useError();
const queryClient = useQueryClient();
@@ -369,15 +380,10 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
});
console.debug("TOPIC event:", message.data);
// Invalidate topics query to sync with WebSocket data
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}/topics",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
invalidateTranscriptTopics(
queryClient,
transcriptId as NonEmptyString,
);
break;
case "FINAL_SHORT_SUMMARY":
@@ -388,15 +394,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
if (message.data) {
setFinalSummary(message.data);
// Invalidate transcript query to sync summary
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
invalidateTranscript(queryClient, transcriptId as NonEmptyString);
}
break;
@@ -405,15 +403,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
if (message.data) {
setTitle(message.data.title);
// Invalidate transcript query to sync title
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
invalidateTranscript(queryClient, transcriptId as NonEmptyString);
}
break;
@@ -424,6 +414,10 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
);
if (message.data) {
setWaveForm(message.data.waveform);
invalidateTranscriptWaveform(
queryClient,
transcriptId as NonEmptyString,
);
}
break;
case "DURATION":
@@ -442,11 +436,31 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
);
}
setStatus(message.data);
invalidateTranscript(queryClient, transcriptId as NonEmptyString);
if (message.data.value === "ended") {
ws.close();
}
break;
case "DAG_STATUS":
if (message.data?.tasks) {
setDagStatus(message.data.tasks);
}
break;
case "DAG_TASK_PROGRESS":
if (message.data) {
setDagStatus(
(prev) =>
prev?.map((t) =>
t.name === message.data.task_name
? { ...t, progress_pct: message.data.progress_pct }
: t,
) ?? null,
);
}
break;
default:
setError(
new Error(`Received unknown WebSocket event: ${message.event}`),
@@ -504,5 +518,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
status,
waveform,
duration,
dagStatus,
};
};

View File

@@ -26,7 +26,7 @@ import { useRouter } from "next/navigation";
import { formatDateTime, formatStartedAgo } from "../lib/timeUtils";
import MeetingMinimalHeader from "../components/MeetingMinimalHeader";
import { NonEmptyString } from "../lib/utils";
import { MeetingId } from "../lib/types";
import { MeetingId, assertMeetingId } from "../lib/types";
type Meeting = components["schemas"]["Meeting"];
@@ -315,7 +315,9 @@ export default function MeetingSelection({
variant="outline"
colorScheme="red"
size="md"
onClick={() => handleEndMeeting(meeting.id)}
onClick={() =>
handleEndMeeting(assertMeetingId(meeting.id))
}
loading={deactivateMeetingMutation.isPending}
>
<Icon as={LuX} me={2} />
@@ -460,7 +462,9 @@ export default function MeetingSelection({
variant="outline"
colorScheme="red"
size="md"
onClick={() => handleEndMeeting(meeting.id)}
onClick={() =>
handleEndMeeting(assertMeetingId(meeting.id))
}
loading={deactivateMeetingMutation.isPending}
>
<Icon as={LuX} me={2} />

View File

@@ -1,11 +1,25 @@
"use client";
import React, { useEffect, useRef } from "react";
import React, { useEffect, useRef, useState } from "react";
import { useQueryClient } from "@tanstack/react-query";
import { WEBSOCKET_URL } from "./apiClient";
import { useAuth } from "./AuthProvider";
import { z } from "zod";
import { invalidateTranscriptLists, TRANSCRIPT_SEARCH_URL } from "./apiHooks";
import {
invalidateTranscript,
invalidateTranscriptLists,
TRANSCRIPT_SEARCH_URL,
} from "./apiHooks";
import type { NonEmptyString } from "./utils";
import type { DagTask } from "./dagTypes";
export type { DagTask, DagTaskStatus } from "./dagTypes";
const DagStatusContext = React.createContext<Map<string, DagTask[]>>(new Map());
export function useDagStatusMap() {
return React.useContext(DagStatusContext);
}
const UserEvent = z.object({
event: z.string(),
@@ -95,6 +109,9 @@ export function UserEventsProvider({
const queryClient = useQueryClient();
const tokenRef = useRef<string | null>(null);
const detachRef = useRef<(() => void) | null>(null);
const [dagStatusMap, setDagStatusMap] = useState<Map<string, DagTask[]>>(
new Map(),
);
useEffect(() => {
// Only tear down when the user is truly unauthenticated
@@ -133,20 +150,52 @@ export function UserEventsProvider({
if (!detachRef.current) {
const onMessage = (event: MessageEvent) => {
try {
const msg = UserEvent.parse(JSON.parse(event.data));
const fullMsg = JSON.parse(event.data);
const msg = UserEvent.parse(fullMsg);
const eventName = msg.event;
const invalidateList = () => invalidateTranscriptLists(queryClient);
switch (eventName) {
case "TRANSCRIPT_CREATED":
case "TRANSCRIPT_DELETED":
case "TRANSCRIPT_STATUS":
case "TRANSCRIPT_FINAL_TITLE":
case "TRANSCRIPT_DURATION":
invalidateList().then(() => {});
break;
case "TRANSCRIPT_STATUS": {
invalidateList().then(() => {});
const transcriptId = fullMsg.data?.id as string | undefined;
if (transcriptId) {
invalidateTranscript(
queryClient,
transcriptId as NonEmptyString,
).then(() => {});
}
const status = fullMsg.data?.value as string | undefined;
if (transcriptId && status && status !== "processing") {
setDagStatusMap((prev) => {
const next = new Map(prev);
next.delete(transcriptId);
return next;
});
}
break;
}
case "TRANSCRIPT_DAG_STATUS": {
const transcriptId = fullMsg.data?.id as string | undefined;
const tasks = fullMsg.data?.tasks as DagTask[] | undefined;
if (transcriptId && tasks) {
setDagStatusMap((prev) => {
const next = new Map(prev);
next.set(transcriptId, tasks);
return next;
});
}
break;
}
default:
// Ignore other content events for list updates
break;
@@ -176,5 +225,9 @@ export function UserEventsProvider({
};
}, []);
return <>{children}</>;
return (
<DagStatusContext.Provider value={dagStatusMap}>
{children}
</DagStatusContext.Provider>
);
}

View File

@@ -6,6 +6,7 @@ import { QueryClient, useQueryClient } from "@tanstack/react-query";
import type { components } from "../reflector-api";
import { useAuth } from "./AuthProvider";
import { MeetingId } from "./types";
import { NonEmptyString } from "./utils";
/*
* XXX error types returned from the hooks are not always correct; declared types are ValidationError but real type could be string or any other
@@ -103,7 +104,7 @@ export function useTranscriptProcess() {
});
}
export function useTranscriptGet(transcriptId: string | null) {
export function useTranscriptGet(transcriptId: NonEmptyString | null) {
return $api.useQuery(
"get",
"/v1/transcripts/{transcript_id}",
@@ -120,6 +121,16 @@ export function useTranscriptGet(transcriptId: string | null) {
);
}
export const invalidateTranscript = (
queryClient: QueryClient,
transcriptId: NonEmptyString,
) =>
queryClient.invalidateQueries({
queryKey: $api.queryOptions("get", "/v1/transcripts/{transcript_id}", {
params: { path: { transcript_id: transcriptId } },
}).queryKey,
});
export function useRoomGet(roomId: string | null) {
const { isAuthenticated } = useAuthReady();
@@ -297,7 +308,7 @@ export function useTranscriptUploadAudio() {
);
}
export function useTranscriptWaveform(transcriptId: string | null) {
export function useTranscriptWaveform(transcriptId: NonEmptyString | null) {
return $api.useQuery(
"get",
"/v1/transcripts/{transcript_id}/audio/waveform",
@@ -312,7 +323,21 @@ export function useTranscriptWaveform(transcriptId: string | null) {
);
}
export function useTranscriptMP3(transcriptId: string | null) {
export const invalidateTranscriptWaveform = (
queryClient: QueryClient,
transcriptId: NonEmptyString,
) =>
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}/audio/waveform",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
export function useTranscriptMP3(transcriptId: NonEmptyString | null) {
const { isAuthenticated } = useAuthReady();
return $api.useQuery(
@@ -329,7 +354,7 @@ export function useTranscriptMP3(transcriptId: string | null) {
);
}
export function useTranscriptTopics(transcriptId: string | null) {
export function useTranscriptTopics(transcriptId: NonEmptyString | null) {
return $api.useQuery(
"get",
"/v1/transcripts/{transcript_id}/topics",
@@ -344,7 +369,23 @@ export function useTranscriptTopics(transcriptId: string | null) {
);
}
export function useTranscriptTopicsWithWords(transcriptId: string | null) {
export const invalidateTranscriptTopics = (
queryClient: QueryClient,
transcriptId: NonEmptyString,
) =>
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}/topics",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
export function useTranscriptTopicsWithWords(
transcriptId: NonEmptyString | null,
) {
const { isAuthenticated } = useAuthReady();
return $api.useQuery(
@@ -362,7 +403,7 @@ export function useTranscriptTopicsWithWords(transcriptId: string | null) {
}
export function useTranscriptTopicsWithWordsPerSpeaker(
transcriptId: string | null,
transcriptId: NonEmptyString | null,
topicId: string | null,
) {
const { isAuthenticated } = useAuthReady();
@@ -384,7 +425,7 @@ export function useTranscriptTopicsWithWordsPerSpeaker(
);
}
export function useTranscriptParticipants(transcriptId: string | null) {
export function useTranscriptParticipants(transcriptId: NonEmptyString | null) {
const { isAuthenticated } = useAuthReady();
return $api.useQuery(

19
www/app/lib/dagTypes.ts Normal file
View File

@@ -0,0 +1,19 @@
export type DagTaskStatus =
| "queued"
| "running"
| "completed"
| "failed"
| "cancelled";
export type DagTask = {
name: string;
status: DagTaskStatus;
started_at: string | null;
finished_at: string | null;
duration_seconds: number | null;
parents: string[];
error: string | null;
children_total: number | null;
children_completed: number | null;
progress_pct: number | null;
};