mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-29 02:16:46 +00:00
Compare commits
28 Commits
v0.38.1
...
juan/fixes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
deefb63a95 | ||
|
|
8c9435d8ca | ||
|
|
bfaf4f403b | ||
|
|
0258754a4c | ||
|
|
ea89fa5261 | ||
|
|
1f98790e7b | ||
|
|
7b8d190c52 | ||
|
|
f19113a3cf | ||
|
|
e2ba502697 | ||
|
|
74b9b97453 | ||
|
|
9e37d60b3f | ||
|
|
55222ecc47 | ||
|
|
41e7b3e84f | ||
|
|
e5712a4168 | ||
|
|
a76f114378 | ||
|
|
cb1beae90d | ||
|
|
1e396ca0ca | ||
|
|
9a2f973a2e | ||
|
|
a9200d35bf | ||
|
|
5646319e96 | ||
|
|
d0472ebf5f | ||
|
|
628a6d735c | ||
|
|
37a1f01850 | ||
|
|
72dca7cacc | ||
|
|
4ae56b730a | ||
|
|
cf6e867cf1 | ||
|
|
183601a121 | ||
|
|
b53c8da398 |
139
.github/workflows/integration_tests.yml
vendored
Normal file
139
.github/workflows/integration_tests.yml
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
name: Integration Tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
llm_model:
|
||||
description: "LLM model name (overrides LLM_MODEL secret)"
|
||||
required: false
|
||||
default: ""
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
integration:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Start infrastructure services
|
||||
working-directory: server/tests
|
||||
env:
|
||||
LLM_URL: ${{ secrets.LLM_URL }}
|
||||
LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
docker compose -f docker-compose.integration.yml up -d --build postgres redis garage hatchet mock-daily
|
||||
|
||||
- name: Set up Garage bucket and keys
|
||||
working-directory: server/tests
|
||||
run: |
|
||||
GARAGE="docker compose -f docker-compose.integration.yml exec -T garage /garage"
|
||||
GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
|
||||
GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
|
||||
|
||||
echo "Waiting for Garage to be healthy..."
|
||||
for i in $(seq 1 60); do
|
||||
if $GARAGE stats &>/dev/null; then break; fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "Setting up Garage..."
|
||||
NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
|
||||
LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
|
||||
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
|
||||
$GARAGE layout assign "$NODE_ID" -c 1G -z dc1
|
||||
$GARAGE layout apply --version 1
|
||||
fi
|
||||
|
||||
$GARAGE bucket info reflector-media &>/dev/null || $GARAGE bucket create reflector-media
|
||||
if ! $GARAGE key info reflector-test &>/dev/null; then
|
||||
$GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
|
||||
$GARAGE key rename "$GARAGE_KEY_ID" reflector-test
|
||||
fi
|
||||
$GARAGE bucket allow reflector-media --read --write --key reflector-test
|
||||
|
||||
- name: Wait for Hatchet and generate API token
|
||||
working-directory: server/tests
|
||||
run: |
|
||||
echo "Waiting for Hatchet to be healthy..."
|
||||
for i in $(seq 1 90); do
|
||||
if docker compose -f docker-compose.integration.yml exec -T hatchet curl -sf http://localhost:8888/api/live &>/dev/null; then
|
||||
echo "Hatchet is ready."
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "Generating Hatchet API token..."
|
||||
HATCHET_OUTPUT=$(docker compose -f docker-compose.integration.yml exec -T hatchet \
|
||||
/hatchet-admin token create --config /config --name integration-test 2>&1)
|
||||
HATCHET_TOKEN=$(echo "$HATCHET_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
|
||||
if [ -z "$HATCHET_TOKEN" ]; then
|
||||
echo "ERROR: Failed to extract Hatchet JWT token"
|
||||
exit 1
|
||||
fi
|
||||
echo "HATCHET_CLIENT_TOKEN=${HATCHET_TOKEN}" >> $GITHUB_ENV
|
||||
|
||||
- name: Start backend services
|
||||
working-directory: server/tests
|
||||
env:
|
||||
LLM_URL: ${{ secrets.LLM_URL }}
|
||||
LLM_MODEL: ${{ inputs.llm_model || secrets.LLM_MODEL }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
# Export garage and hatchet credentials for backend services
|
||||
export GARAGE_KEY_ID="${{ env.GARAGE_KEY_ID }}"
|
||||
export GARAGE_KEY_SECRET="${{ env.GARAGE_KEY_SECRET }}"
|
||||
export HATCHET_CLIENT_TOKEN="${{ env.HATCHET_CLIENT_TOKEN }}"
|
||||
|
||||
docker compose -f docker-compose.integration.yml up -d \
|
||||
server worker hatchet-worker-cpu hatchet-worker-llm test-runner
|
||||
|
||||
- name: Wait for server health check
|
||||
working-directory: server/tests
|
||||
run: |
|
||||
echo "Waiting for server to be healthy..."
|
||||
for i in $(seq 1 60); do
|
||||
if docker compose -f docker-compose.integration.yml exec -T test-runner \
|
||||
curl -sf http://server:1250/health &>/dev/null; then
|
||||
echo "Server is ready."
|
||||
break
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
|
||||
- name: Run DB migrations
|
||||
working-directory: server/tests
|
||||
run: |
|
||||
docker compose -f docker-compose.integration.yml exec -T server \
|
||||
uv run alembic upgrade head
|
||||
|
||||
- name: Run integration tests
|
||||
working-directory: server/tests
|
||||
run: |
|
||||
docker compose -f docker-compose.integration.yml exec -T test-runner \
|
||||
uv run pytest tests/integration/ -v -x
|
||||
|
||||
- name: Collect logs on failure
|
||||
if: failure()
|
||||
working-directory: server/tests
|
||||
run: |
|
||||
docker compose -f docker-compose.integration.yml logs --tail=500 > integration-logs.txt 2>&1
|
||||
|
||||
- name: Upload logs artifact
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: integration-logs
|
||||
path: server/tests/integration-logs.txt
|
||||
retention-days: 7
|
||||
|
||||
- name: Teardown
|
||||
if: always()
|
||||
working-directory: server/tests
|
||||
run: |
|
||||
docker compose -f docker-compose.integration.yml down -v --remove-orphans
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -24,4 +24,10 @@ www/.env.production
|
||||
.secrets
|
||||
opencode.json
|
||||
|
||||
certs/
|
||||
docker-compose.ca.yml
|
||||
docker-compose.gpu-ca.yml
|
||||
Caddyfile.gpu-host
|
||||
.env.gpu-host
|
||||
vibedocs/
|
||||
server/tests/integration/logs/
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
exclude: '(^uv\.lock$|pnpm-lock\.yaml$)'
|
||||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
|
||||
38
CHANGELOG.md
38
CHANGELOG.md
@@ -1,5 +1,43 @@
|
||||
# Changelog
|
||||
|
||||
## [0.41.0](https://github.com/GreyhavenHQ/reflector/compare/v0.40.0...v0.41.0) (2026-03-25)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* add auto-generated captions, speaker-colored progress bar with sync controls, and speaker tooltip to cloud video player ([#926](https://github.com/GreyhavenHQ/reflector/issues/926)) ([f19113a](https://github.com/GreyhavenHQ/reflector/commit/f19113a3cfa27797a70b9496bfcf1baff9d89f0d))
|
||||
* send email in share transcript and add email sending in room ([#924](https://github.com/GreyhavenHQ/reflector/issues/924)) ([e2ba502](https://github.com/GreyhavenHQ/reflector/commit/e2ba502697ce331c4d87fb019648fcbe4e7cca73))
|
||||
* zulip dag monitor for failed runs ([#928](https://github.com/GreyhavenHQ/reflector/issues/928)) ([1f98790](https://github.com/GreyhavenHQ/reflector/commit/1f98790e7bc58013690ec81aefa051da5e36e93e))
|
||||
|
||||
## [0.40.0](https://github.com/GreyhavenHQ/reflector/compare/v0.39.0...v0.40.0) (2026-03-20)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* allow participants to ask for email transcript ([#923](https://github.com/GreyhavenHQ/reflector/issues/923)) ([55222ec](https://github.com/GreyhavenHQ/reflector/commit/55222ecc4736f99ad461f03a006c8d97b5876142))
|
||||
* download files, show cloud video, solf deletion with no reprocessing ([#920](https://github.com/GreyhavenHQ/reflector/issues/920)) ([a76f114](https://github.com/GreyhavenHQ/reflector/commit/a76f1143783d3cf137a8847a851b72302e04445b))
|
||||
|
||||
## [0.39.0](https://github.com/GreyhavenHQ/reflector/compare/v0.38.2...v0.39.0) (2026-03-18)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* migrate file and live post-processing pipelines from Celery to Hatchet workflow engine ([#911](https://github.com/GreyhavenHQ/reflector/issues/911)) ([37a1f01](https://github.com/GreyhavenHQ/reflector/commit/37a1f0185057dd43b68df2b12bb08d3b18e28d34))
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* integration tests runner in CI ([#919](https://github.com/GreyhavenHQ/reflector/issues/919)) ([1e396ca](https://github.com/GreyhavenHQ/reflector/commit/1e396ca0ca91bc9d2645ddfc63a1576469491faa))
|
||||
* latest vulns ([#915](https://github.com/GreyhavenHQ/reflector/issues/915)) ([a9200d3](https://github.com/GreyhavenHQ/reflector/commit/a9200d35bf856f65f24a4f34931ebe0d75ad0382))
|
||||
|
||||
## [0.38.2](https://github.com/GreyhavenHQ/reflector/compare/v0.38.1...v0.38.2) (2026-03-12)
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* add auth guards to prevent anonymous access to write endpoints in non-public mode ([#907](https://github.com/GreyhavenHQ/reflector/issues/907)) ([cf6e867](https://github.com/GreyhavenHQ/reflector/commit/cf6e867cf12c42411e5a7412f6ec44eee8351665))
|
||||
* add tests that check some of the issues are already fixed ([#905](https://github.com/GreyhavenHQ/reflector/issues/905)) ([b53c8da](https://github.com/GreyhavenHQ/reflector/commit/b53c8da3981c394bdab08504b45d25f62c35495a))
|
||||
|
||||
## [0.38.1](https://github.com/GreyhavenHQ/reflector/compare/v0.38.0...v0.38.1) (2026-03-06)
|
||||
|
||||
|
||||
|
||||
33
CLAUDE.md
33
CLAUDE.md
@@ -41,14 +41,14 @@ uv run celery -A reflector.worker.app beat
|
||||
|
||||
**Testing:**
|
||||
```bash
|
||||
# Run all tests with coverage
|
||||
uv run pytest
|
||||
# Run all tests with coverage (requires Redis on localhost)
|
||||
REDIS_HOST=localhost REDIS_PORT=6379 uv run pytest
|
||||
|
||||
# Run specific test file
|
||||
uv run pytest tests/test_transcripts.py
|
||||
REDIS_HOST=localhost REDIS_PORT=6379 uv run pytest tests/test_transcripts.py
|
||||
|
||||
# Run tests with verbose output
|
||||
uv run pytest -v
|
||||
REDIS_HOST=localhost REDIS_PORT=6379 uv run pytest -v
|
||||
```
|
||||
|
||||
**Process Audio Files:**
|
||||
@@ -160,6 +160,21 @@ All endpoints prefixed `/v1/`:
|
||||
- **Frontend**: No current test suite - opportunities for Jest/React Testing Library
|
||||
- **Coverage**: Backend maintains test coverage reports in `htmlcov/`
|
||||
|
||||
### Integration Tests (DO NOT run unless explicitly asked)
|
||||
|
||||
There are end-to-end integration tests in `server/tests/integration/` that spin up the full stack (PostgreSQL, Redis, Hatchet, Garage, mock-daily, server, workers) via Docker Compose and exercise real processing pipelines. These tests are:
|
||||
|
||||
- `test_file_pipeline.py` — File upload → FilePipeline
|
||||
- `test_live_pipeline.py` — WebRTC stream → LivePostPipeline
|
||||
- `test_multitrack_pipeline.py` — Multitrack → DailyMultitrackPipeline
|
||||
|
||||
**Important:**
|
||||
- These tests are **excluded** from normal `uv run pytest` runs via `--ignore=tests/integration` in pyproject.toml.
|
||||
- Do **NOT** run them as part of verification, code review, or general testing unless the user explicitly asks.
|
||||
- They require Docker, external LLM credentials, and HuggingFace token — they cannot run in a regular test environment.
|
||||
- To run locally: `./scripts/run-integration-tests.sh` (requires env vars: `LLM_URL`, `LLM_API_KEY`, `HF_TOKEN`).
|
||||
- In CI: triggered manually via the "Integration Tests" GitHub Actions workflow (`workflow_dispatch`).
|
||||
|
||||
## GPU Processing
|
||||
|
||||
Modal.com integration for scalable ML processing:
|
||||
@@ -177,3 +192,13 @@ Modal.com integration for scalable ML processing:
|
||||
## Pipeline/worker related info
|
||||
|
||||
If you need to do any worker/pipeline related work, search for "Pipeline" classes and their "create" or "build" methods to find the main processor sequence. Look for task orchestration patterns (like "chord", "group", or "chain") to identify the post-processing flow with parallel execution chains. This will give you abstract vision on how processing pipeling is organized.
|
||||
|
||||
## Documentation
|
||||
|
||||
- New documentation files go in `docsv2/`, not in `docs/docs/`.
|
||||
- Existing `docs/` directory contains legacy Docusaurus docs.
|
||||
|
||||
## Code Style
|
||||
|
||||
- Always put imports at the top of the file. Let ruff/pre-commit handle sorting and formatting of imports.
|
||||
- Exception: In Hatchet pipeline task functions, DB controller imports (e.g., `transcripts_controller`, `meetings_controller`) stay as deferred/inline imports inside `fresh_db_connection()` blocks — this is intentional to avoid sharing DB connections across forked processes. Non-DB imports (utilities, services) should still go at the top of the file.
|
||||
|
||||
106
docker-compose.gpu-host.yml
Normal file
106
docker-compose.gpu-host.yml
Normal file
@@ -0,0 +1,106 @@
|
||||
# Standalone GPU host for Reflector — transcription, diarization, translation.
|
||||
#
|
||||
# Usage: ./scripts/setup-gpu-host.sh [--domain DOMAIN] [--custom-ca PATH] [--api-key KEY] [--cpu]
|
||||
# or: docker compose -f docker-compose.gpu-host.yml --profile gpu [--profile caddy] up -d
|
||||
#
|
||||
# Processing mode (pick ONE — mutually exclusive, both bind port 8000):
|
||||
# --profile gpu NVIDIA GPU container (requires nvidia-container-toolkit)
|
||||
# --profile cpu CPU-only container (no GPU required, slower)
|
||||
#
|
||||
# Optional:
|
||||
# --profile caddy Caddy reverse proxy with HTTPS
|
||||
#
|
||||
# This file is checked into the repo. The setup script generates:
|
||||
# - .env.gpu-host (HF_TOKEN, API key, port config)
|
||||
# - Caddyfile.gpu-host (Caddy config, only with --domain)
|
||||
# - docker-compose.gpu-ca.yml (CA cert mounts, only with --custom-ca)
|
||||
|
||||
services:
|
||||
# ===========================================================
|
||||
# GPU service — NVIDIA GPU accelerated
|
||||
# Activated with: --profile gpu
|
||||
# ===========================================================
|
||||
|
||||
gpu:
|
||||
build:
|
||||
context: ./gpu/self_hosted
|
||||
dockerfile: Dockerfile
|
||||
profiles: [gpu]
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "${GPU_HOST_PORT:-8000}:8000"
|
||||
environment:
|
||||
HF_TOKEN: ${HF_TOKEN:-}
|
||||
REFLECTOR_GPU_APIKEY: ${REFLECTOR_GPU_APIKEY:-}
|
||||
volumes:
|
||||
- gpu_cache:/root/.cache
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
start_period: 120s
|
||||
networks:
|
||||
default:
|
||||
aliases:
|
||||
- transcription
|
||||
|
||||
# ===========================================================
|
||||
# CPU service — no GPU required, uses Dockerfile.cpu
|
||||
# Activated with: --profile cpu
|
||||
# Mutually exclusive with gpu (both bind port 8000)
|
||||
# ===========================================================
|
||||
|
||||
cpu:
|
||||
build:
|
||||
context: ./gpu/self_hosted
|
||||
dockerfile: Dockerfile.cpu
|
||||
profiles: [cpu]
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "${GPU_HOST_PORT:-8000}:8000"
|
||||
environment:
|
||||
HF_TOKEN: ${HF_TOKEN:-}
|
||||
REFLECTOR_GPU_APIKEY: ${REFLECTOR_GPU_APIKEY:-}
|
||||
volumes:
|
||||
- gpu_cache:/root/.cache
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
start_period: 120s
|
||||
networks:
|
||||
default:
|
||||
aliases:
|
||||
- transcription
|
||||
|
||||
# ===========================================================
|
||||
# Caddy — reverse proxy with HTTPS (optional)
|
||||
# Activated with: --profile caddy
|
||||
# Proxies to "transcription" network alias (works for both gpu and cpu)
|
||||
# ===========================================================
|
||||
|
||||
caddy:
|
||||
image: caddy:2-alpine
|
||||
profiles: [caddy]
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "80:80"
|
||||
- "${CADDY_HTTPS_PORT:-443}:443"
|
||||
volumes:
|
||||
- ./Caddyfile.gpu-host:/etc/caddy/Caddyfile:ro
|
||||
- caddy_data:/data
|
||||
- caddy_config:/config
|
||||
|
||||
volumes:
|
||||
gpu_cache:
|
||||
caddy_data:
|
||||
caddy_config:
|
||||
@@ -36,7 +36,7 @@ services:
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:1250:1250"
|
||||
- "51000-51100:51000-51100/udp"
|
||||
- "40000-40100:40000-40100/udp"
|
||||
env_file:
|
||||
- ./server/.env
|
||||
environment:
|
||||
@@ -50,7 +50,10 @@ services:
|
||||
# HF_TOKEN needed for in-process pyannote diarization (--cpu mode)
|
||||
HF_TOKEN: ${HF_TOKEN:-}
|
||||
# WebRTC: fixed UDP port range for ICE candidates (mapped above)
|
||||
WEBRTC_PORT_RANGE: "51000-51100"
|
||||
WEBRTC_PORT_RANGE: "40000-40100"
|
||||
# Hatchet workflow engine (always-on for processing pipelines)
|
||||
HATCHET_CLIENT_SERVER_URL: ${HATCHET_CLIENT_SERVER_URL:-http://hatchet:8888}
|
||||
HATCHET_CLIENT_HOST_PORT: ${HATCHET_CLIENT_HOST_PORT:-hatchet:7077}
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
@@ -75,6 +78,9 @@ services:
|
||||
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
||||
# ML backend config comes from env_file (server/.env), set per-mode by setup script
|
||||
HF_TOKEN: ${HF_TOKEN:-}
|
||||
# Hatchet workflow engine (always-on for processing pipelines)
|
||||
HATCHET_CLIENT_SERVER_URL: ${HATCHET_CLIENT_SERVER_URL:-http://hatchet:8888}
|
||||
HATCHET_CLIENT_HOST_PORT: ${HATCHET_CLIENT_HOST_PORT:-hatchet:7077}
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
@@ -126,6 +132,8 @@ services:
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 30s
|
||||
@@ -301,20 +309,38 @@ services:
|
||||
- server
|
||||
|
||||
# ===========================================================
|
||||
# Hatchet + Daily.co workers (optional — for Daily.co multitrack processing)
|
||||
# Auto-enabled when DAILY_API_KEY is configured in server/r
|
||||
# Mailpit — local SMTP sink for testing email transcript notifications
|
||||
# Start with: --profile mailpit
|
||||
# Web UI at http://localhost:8025
|
||||
# ===========================================================
|
||||
|
||||
mailpit:
|
||||
image: axllent/mailpit:latest
|
||||
profiles: [mailpit]
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:8025:8025" # Web UI
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8025/api/v1/messages"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
# ===========================================================
|
||||
# Hatchet workflow engine + workers
|
||||
# Required for all processing pipelines (file, live, Daily.co multitrack).
|
||||
# Always-on — every selfhosted deployment needs Hatchet.
|
||||
# ===========================================================
|
||||
|
||||
hatchet:
|
||||
image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
|
||||
profiles: [dailyco]
|
||||
restart: on-failure
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "8888:8888"
|
||||
- "7078:7077"
|
||||
- "127.0.0.1:8888:8888"
|
||||
- "127.0.0.1:7078:7077"
|
||||
env_file:
|
||||
- ./.env.hatchet
|
||||
environment:
|
||||
@@ -363,7 +389,6 @@ services:
|
||||
context: ./server
|
||||
dockerfile: Dockerfile
|
||||
image: monadicalsas/reflector-backend:latest
|
||||
profiles: [dailyco]
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- ./server/.env
|
||||
|
||||
52
docs/pnpm-lock.yaml
generated
52
docs/pnpm-lock.yaml
generated
@@ -701,6 +701,10 @@ packages:
|
||||
resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==}
|
||||
engines: {node: '>=6.9.0'}
|
||||
|
||||
'@babel/runtime@7.29.2':
|
||||
resolution: {integrity: sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==}
|
||||
engines: {node: '>=6.9.0'}
|
||||
|
||||
'@babel/template@7.28.6':
|
||||
resolution: {integrity: sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==}
|
||||
engines: {node: '>=6.9.0'}
|
||||
@@ -1490,42 +1494,36 @@ packages:
|
||||
engines: {node: '>= 10.0.0'}
|
||||
cpu: [arm]
|
||||
os: [linux]
|
||||
libc: [glibc]
|
||||
|
||||
'@parcel/watcher-linux-arm-musl@2.5.6':
|
||||
resolution: {integrity: sha512-Ve3gUCG57nuUUSyjBq/MAM0CzArtuIOxsBdQ+ftz6ho8n7s1i9E1Nmk/xmP323r2YL0SONs1EuwqBp2u1k5fxg==}
|
||||
engines: {node: '>= 10.0.0'}
|
||||
cpu: [arm]
|
||||
os: [linux]
|
||||
libc: [musl]
|
||||
|
||||
'@parcel/watcher-linux-arm64-glibc@2.5.6':
|
||||
resolution: {integrity: sha512-f2g/DT3NhGPdBmMWYoxixqYr3v/UXcmLOYy16Bx0TM20Tchduwr4EaCbmxh1321TABqPGDpS8D/ggOTaljijOA==}
|
||||
engines: {node: '>= 10.0.0'}
|
||||
cpu: [arm64]
|
||||
os: [linux]
|
||||
libc: [glibc]
|
||||
|
||||
'@parcel/watcher-linux-arm64-musl@2.5.6':
|
||||
resolution: {integrity: sha512-qb6naMDGlbCwdhLj6hgoVKJl2odL34z2sqkC7Z6kzir8b5W65WYDpLB6R06KabvZdgoHI/zxke4b3zR0wAbDTA==}
|
||||
engines: {node: '>= 10.0.0'}
|
||||
cpu: [arm64]
|
||||
os: [linux]
|
||||
libc: [musl]
|
||||
|
||||
'@parcel/watcher-linux-x64-glibc@2.5.6':
|
||||
resolution: {integrity: sha512-kbT5wvNQlx7NaGjzPFu8nVIW1rWqV780O7ZtkjuWaPUgpv2NMFpjYERVi0UYj1msZNyCzGlaCWEtzc+exjMGbQ==}
|
||||
engines: {node: '>= 10.0.0'}
|
||||
cpu: [x64]
|
||||
os: [linux]
|
||||
libc: [glibc]
|
||||
|
||||
'@parcel/watcher-linux-x64-musl@2.5.6':
|
||||
resolution: {integrity: sha512-1JRFeC+h7RdXwldHzTsmdtYR/Ku8SylLgTU/reMuqdVD7CtLwf0VR1FqeprZ0eHQkO0vqsbvFLXUmYm/uNKJBg==}
|
||||
engines: {node: '>= 10.0.0'}
|
||||
cpu: [x64]
|
||||
os: [linux]
|
||||
libc: [musl]
|
||||
|
||||
'@parcel/watcher-win32-arm64@2.5.6':
|
||||
resolution: {integrity: sha512-3ukyebjc6eGlw9yRt678DxVF7rjXatWiHvTXqphZLvo7aC5NdEgFufVwjFfY51ijYEWpXbqF5jtrK275z52D4Q==}
|
||||
@@ -3410,8 +3408,8 @@ packages:
|
||||
graphlib@2.1.8:
|
||||
resolution: {integrity: sha512-jcLLfkpoVGmH7/InMC/1hIvOPSUh38oJtGhvrOFGzioE1DZ+0YW16RgmOJhHiuWTvGiJQ9Z1Ik43JvkRPRvE+A==}
|
||||
|
||||
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec:
|
||||
resolution: {tarball: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec}
|
||||
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e:
|
||||
resolution: {tarball: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e}
|
||||
version: 4.0.3
|
||||
engines: {node: '>=6.0'}
|
||||
|
||||
@@ -4555,12 +4553,12 @@ packages:
|
||||
picocolors@1.1.1:
|
||||
resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==}
|
||||
|
||||
picomatch@2.3.1:
|
||||
resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==}
|
||||
picomatch@2.3.2:
|
||||
resolution: {integrity: sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==}
|
||||
engines: {node: '>=8.6'}
|
||||
|
||||
picomatch@4.0.3:
|
||||
resolution: {integrity: sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==}
|
||||
picomatch@4.0.4:
|
||||
resolution: {integrity: sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
pirates@4.0.7:
|
||||
@@ -7024,6 +7022,8 @@ snapshots:
|
||||
|
||||
'@babel/runtime@7.28.6': {}
|
||||
|
||||
'@babel/runtime@7.29.2': {}
|
||||
|
||||
'@babel/template@7.28.6':
|
||||
dependencies:
|
||||
'@babel/code-frame': 7.29.0
|
||||
@@ -8162,7 +8162,7 @@ snapshots:
|
||||
fs-extra: 11.3.3
|
||||
github-slugger: 1.5.0
|
||||
globby: 11.1.0
|
||||
gray-matter: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec
|
||||
gray-matter: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e
|
||||
jiti: 1.21.7
|
||||
js-yaml: 4.1.1
|
||||
lodash: 4.17.23
|
||||
@@ -8473,7 +8473,7 @@ snapshots:
|
||||
detect-libc: 2.1.2
|
||||
is-glob: 4.0.3
|
||||
node-addon-api: 7.1.1
|
||||
picomatch: 4.0.3
|
||||
picomatch: 4.0.4
|
||||
optionalDependencies:
|
||||
'@parcel/watcher-android-arm64': 2.5.6
|
||||
'@parcel/watcher-darwin-arm64': 2.5.6
|
||||
@@ -8645,7 +8645,7 @@ snapshots:
|
||||
|
||||
'@slorber/react-helmet-async@1.3.0(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
|
||||
dependencies:
|
||||
'@babel/runtime': 7.28.6
|
||||
'@babel/runtime': 7.29.2
|
||||
invariant: 2.2.4
|
||||
prop-types: 15.8.1
|
||||
react: 19.2.4
|
||||
@@ -9244,7 +9244,7 @@ snapshots:
|
||||
anymatch@3.1.3:
|
||||
dependencies:
|
||||
normalize-path: 3.0.0
|
||||
picomatch: 2.3.1
|
||||
picomatch: 2.3.2
|
||||
|
||||
arg@5.0.2: {}
|
||||
|
||||
@@ -10485,9 +10485,9 @@ snapshots:
|
||||
dependencies:
|
||||
websocket-driver: 0.7.4
|
||||
|
||||
fdir@6.5.0(picomatch@4.0.3):
|
||||
fdir@6.5.0(picomatch@4.0.4):
|
||||
optionalDependencies:
|
||||
picomatch: 4.0.3
|
||||
picomatch: 4.0.4
|
||||
|
||||
feed@4.2.2:
|
||||
dependencies:
|
||||
@@ -10658,7 +10658,7 @@ snapshots:
|
||||
dependencies:
|
||||
lodash: 4.17.23
|
||||
|
||||
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec:
|
||||
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e:
|
||||
dependencies:
|
||||
js-yaml: 4.1.1
|
||||
kind-of: 6.0.3
|
||||
@@ -11080,7 +11080,7 @@ snapshots:
|
||||
chalk: 4.1.2
|
||||
ci-info: 3.9.0
|
||||
graceful-fs: 4.2.11
|
||||
picomatch: 2.3.1
|
||||
picomatch: 2.3.2
|
||||
|
||||
jest-worker@27.5.1:
|
||||
dependencies:
|
||||
@@ -11780,7 +11780,7 @@ snapshots:
|
||||
micromatch@4.0.8:
|
||||
dependencies:
|
||||
braces: 3.0.3
|
||||
picomatch: 2.3.1
|
||||
picomatch: 2.3.2
|
||||
|
||||
mime-db@1.33.0: {}
|
||||
|
||||
@@ -12146,9 +12146,9 @@ snapshots:
|
||||
|
||||
picocolors@1.1.1: {}
|
||||
|
||||
picomatch@2.3.1: {}
|
||||
picomatch@2.3.2: {}
|
||||
|
||||
picomatch@4.0.3: {}
|
||||
picomatch@4.0.4: {}
|
||||
|
||||
pirates@4.0.7: {}
|
||||
|
||||
@@ -12852,7 +12852,7 @@ snapshots:
|
||||
|
||||
readdirp@3.6.0:
|
||||
dependencies:
|
||||
picomatch: 2.3.1
|
||||
picomatch: 2.3.2
|
||||
|
||||
readdirp@4.1.2: {}
|
||||
|
||||
@@ -13510,8 +13510,8 @@ snapshots:
|
||||
|
||||
tinyglobby@0.2.15:
|
||||
dependencies:
|
||||
fdir: 6.5.0(picomatch@4.0.3)
|
||||
picomatch: 4.0.3
|
||||
fdir: 6.5.0(picomatch@4.0.4)
|
||||
picomatch: 4.0.4
|
||||
|
||||
tinypool@1.1.1: {}
|
||||
|
||||
|
||||
337
docsv2/custom-ca-setup.md
Normal file
337
docsv2/custom-ca-setup.md
Normal file
@@ -0,0 +1,337 @@
|
||||
# Custom CA Certificate Setup
|
||||
|
||||
Use a private Certificate Authority (CA) with Reflector self-hosted deployments. This covers two scenarios:
|
||||
|
||||
1. **Custom local domain** — Serve Reflector over HTTPS on an internal domain (e.g., `reflector.local`) using certs signed by your own CA
|
||||
2. **Backend CA trust** — Let Reflector's backend services (server, workers, GPU) make HTTPS calls to GPU, LLM, or other internal services behind your private CA
|
||||
|
||||
Both can be used independently or together.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Generate test certificates
|
||||
|
||||
```bash
|
||||
./scripts/generate-certs.sh reflector.local
|
||||
```
|
||||
|
||||
This creates `certs/` with:
|
||||
- `ca.key` + `ca.crt` — Root CA (10-year validity)
|
||||
- `server-key.pem` + `server.pem` — Server certificate (1-year, SAN: domain + localhost + 127.0.0.1)
|
||||
|
||||
### Deploy with custom CA + domain
|
||||
|
||||
```bash
|
||||
# Add domain to /etc/hosts on the server (use 127.0.0.1 for local, or server LAN IP for network access)
|
||||
echo "127.0.0.1 reflector.local" | sudo tee -a /etc/hosts
|
||||
|
||||
# Run setup — pass the certs directory
|
||||
./scripts/setup-selfhosted.sh --gpu --caddy --domain reflector.local --custom-ca certs/
|
||||
|
||||
# Trust the CA on your machine (see "Trust the CA" section below)
|
||||
```
|
||||
|
||||
### Deploy with CA trust only (GPU/LLM behind private CA)
|
||||
|
||||
```bash
|
||||
# Only need the CA cert file — no Caddy TLS certs needed
|
||||
./scripts/setup-selfhosted.sh --hosted --custom-ca /path/to/corporate-ca.crt
|
||||
```
|
||||
|
||||
## How `--custom-ca` Works
|
||||
|
||||
The flag accepts a **directory** or a **single file**:
|
||||
|
||||
### Directory mode
|
||||
|
||||
```bash
|
||||
--custom-ca certs/
|
||||
```
|
||||
|
||||
Looks for these files by convention:
|
||||
- `ca.crt` (required) — CA certificate to trust
|
||||
- `server.pem` + `server-key.pem` (optional) — TLS certificate/key for Caddy
|
||||
|
||||
If `server.pem` + `server-key.pem` are found AND `--domain` is provided:
|
||||
- Caddy serves HTTPS using those certs
|
||||
- Backend containers trust the CA for outbound calls
|
||||
|
||||
If only `ca.crt` is found:
|
||||
- Backend containers trust the CA for outbound calls
|
||||
- Caddy is unaffected (uses Let's Encrypt, self-signed, or no Caddy)
|
||||
|
||||
### Single file mode
|
||||
|
||||
```bash
|
||||
--custom-ca /path/to/corporate-ca.crt
|
||||
```
|
||||
|
||||
Only injects CA trust into backend containers. No Caddy TLS changes.
|
||||
|
||||
## Scenarios
|
||||
|
||||
### Scenario 1: Custom local domain
|
||||
|
||||
Your Reflector instance runs on an internal network. You want `https://reflector.local` with proper TLS (no browser warnings).
|
||||
|
||||
```bash
|
||||
# 1. Generate certs
|
||||
./scripts/generate-certs.sh reflector.local
|
||||
|
||||
# 2. Add to /etc/hosts on the server
|
||||
echo "127.0.0.1 reflector.local" | sudo tee -a /etc/hosts
|
||||
|
||||
# 3. Deploy
|
||||
./scripts/setup-selfhosted.sh --gpu --garage --caddy --domain reflector.local --custom-ca certs/
|
||||
|
||||
# 4. Trust the CA on your machine (see "Trust the CA" section below)
|
||||
```
|
||||
|
||||
If other machines on the network need to access it, add the server's LAN IP to `/etc/hosts` on those machines instead:
|
||||
```bash
|
||||
echo "192.168.1.100 reflector.local" | sudo tee -a /etc/hosts
|
||||
```
|
||||
|
||||
And include that IP as an extra SAN when generating certs:
|
||||
```bash
|
||||
./scripts/generate-certs.sh reflector.local "IP:192.168.1.100"
|
||||
```
|
||||
|
||||
### Scenario 2: GPU/LLM behind corporate CA
|
||||
|
||||
Your GPU or LLM server (e.g., `https://gpu.internal.corp`) uses certificates signed by your corporate CA. Reflector's backend needs to trust that CA for outbound HTTPS calls.
|
||||
|
||||
```bash
|
||||
# Get the CA certificate from your IT team (PEM format)
|
||||
# Then deploy — Caddy can still use Let's Encrypt or self-signed
|
||||
./scripts/setup-selfhosted.sh --hosted --garage --caddy --custom-ca /path/to/corporate-ca.crt
|
||||
```
|
||||
|
||||
This works because:
|
||||
- **TLS cert/key** = "this is my identity" — for Caddy to serve HTTPS to browsers
|
||||
- **CA cert** = "I trust this authority" — for backend containers to verify outbound connections
|
||||
|
||||
Your Reflector frontend can use Let's Encrypt (public domain) or self-signed certs, while the backend trusts a completely different CA for GPU/LLM calls.
|
||||
|
||||
### Scenario 3: Both combined (same CA)
|
||||
|
||||
Custom domain + GPU/LLM all behind the same CA:
|
||||
|
||||
```bash
|
||||
./scripts/generate-certs.sh reflector.local "DNS:gpu.local"
|
||||
./scripts/setup-selfhosted.sh --gpu --garage --caddy --domain reflector.local --custom-ca certs/
|
||||
```
|
||||
|
||||
### Scenario 4: Multiple CAs (local domain + remote GPU on different CA)
|
||||
|
||||
Your Reflector uses one CA for `reflector.local`, but the GPU host uses a different CA:
|
||||
|
||||
```bash
|
||||
# Your local domain setup
|
||||
./scripts/generate-certs.sh reflector.local
|
||||
|
||||
# Deploy with your CA + trust the GPU host's CA too
|
||||
./scripts/setup-selfhosted.sh --hosted --garage --caddy \
|
||||
--domain reflector.local \
|
||||
--custom-ca certs/ \
|
||||
--extra-ca /path/to/gpu-machine-ca.crt
|
||||
```
|
||||
|
||||
`--extra-ca` appends additional CA certs to the trust bundle. Backend containers trust ALL CAs — your local domain AND the GPU host's certs both work.
|
||||
|
||||
You can repeat `--extra-ca` for multiple remote services:
|
||||
```bash
|
||||
--extra-ca /path/to/gpu-ca.crt --extra-ca /path/to/llm-ca.crt
|
||||
```
|
||||
|
||||
For setting up a dedicated GPU host, see [Standalone GPU Host Setup](gpu-host-setup.md).
|
||||
|
||||
## Trust the CA on Client Machines
|
||||
|
||||
After deploying, clients need to trust the CA to avoid browser warnings.
|
||||
|
||||
### macOS
|
||||
|
||||
```bash
|
||||
sudo security add-trusted-cert -d -r trustRoot \
|
||||
-k /Library/Keychains/System.keychain certs/ca.crt
|
||||
```
|
||||
|
||||
### Linux (Ubuntu/Debian)
|
||||
|
||||
```bash
|
||||
sudo cp certs/ca.crt /usr/local/share/ca-certificates/reflector-ca.crt
|
||||
sudo update-ca-certificates
|
||||
```
|
||||
|
||||
### Linux (RHEL/Fedora)
|
||||
|
||||
```bash
|
||||
sudo cp certs/ca.crt /etc/pki/ca-trust/source/anchors/reflector-ca.crt
|
||||
sudo update-ca-trust
|
||||
```
|
||||
|
||||
### Windows (PowerShell as admin)
|
||||
|
||||
```powershell
|
||||
Import-Certificate -FilePath .\certs\ca.crt -CertStoreLocation Cert:\LocalMachine\Root
|
||||
```
|
||||
|
||||
### Firefox (all platforms)
|
||||
|
||||
Firefox uses its own certificate store:
|
||||
1. Settings > Privacy & Security > View Certificates
|
||||
2. Authorities tab > Import
|
||||
3. Select `ca.crt` and check "Trust this CA to identify websites"
|
||||
|
||||
## How It Works Internally
|
||||
|
||||
### Docker entrypoint CA injection
|
||||
|
||||
Each backend container (server, worker, beat, hatchet workers, GPU) has an entrypoint script (`docker-entrypoint.sh`) that:
|
||||
|
||||
1. Checks if a CA cert is mounted at `/usr/local/share/ca-certificates/custom-ca.crt`
|
||||
2. If present, runs `update-ca-certificates` to create a **combined bundle** (system CAs + custom CA)
|
||||
3. Sets environment variables so all Python/gRPC libraries use the combined bundle:
|
||||
|
||||
| Env var | Covers |
|
||||
|---------|--------|
|
||||
| `SSL_CERT_FILE` | httpx, OpenAI SDK, llama-index, Python ssl module |
|
||||
| `REQUESTS_CA_BUNDLE` | requests library (transitive dependencies) |
|
||||
| `CURL_CA_BUNDLE` | curl CLI (container healthchecks) |
|
||||
| `GRPC_DEFAULT_SSL_ROOTS_FILE_PATH` | grpcio (Hatchet gRPC client) |
|
||||
|
||||
When no CA cert is mounted, the entrypoint is a no-op — containers behave exactly as before.
|
||||
|
||||
### Why this replaces manual certifi patching
|
||||
|
||||
Previously, the workaround for trusting a private CA in Python was to patch certifi's bundle directly:
|
||||
|
||||
```bash
|
||||
# OLD approach — fragile, do NOT use
|
||||
cat custom-ca.crt >> $(python -c "import certifi; print(certifi.where())")
|
||||
```
|
||||
|
||||
This breaks whenever certifi is updated (any `pip install`/`uv sync` overwrites the bundle and the CA is lost).
|
||||
|
||||
Our entrypoint approach is permanent because:
|
||||
|
||||
1. `SSL_CERT_FILE` is checked by Python's `ssl.create_default_context()` **before** falling back to `certifi.where()`. When set, certifi's bundle is never read.
|
||||
2. `REQUESTS_CA_BUNDLE` similarly overrides certifi for the `requests` library.
|
||||
3. The CA is injected at container startup (runtime), not baked into the Python environment. It survives image rebuilds, dependency updates, and `uv sync`.
|
||||
|
||||
```
|
||||
Python SSL lookup chain:
|
||||
ssl.create_default_context()
|
||||
→ SSL_CERT_FILE env var? → YES → use combined bundle (system + custom CA) ✓
|
||||
→ (certifi.where() is never reached)
|
||||
```
|
||||
|
||||
This covers all outbound HTTPS calls: httpx (transcription, diarization, translation, webhooks), OpenAI SDK (transcription), llama-index (LLM/summarization), and requests (transitive dependencies).
|
||||
|
||||
### Compose override
|
||||
|
||||
The setup script generates `docker-compose.ca.yml` which mounts the CA cert into every backend container as a read-only bind mount. This file is:
|
||||
- Only generated when `--custom-ca` is passed
|
||||
- Deleted on re-runs without `--custom-ca` (prevents stale overrides)
|
||||
- Added to `.gitignore`
|
||||
|
||||
### Node.js (frontend)
|
||||
|
||||
The web container uses `NODE_EXTRA_CA_CERTS` which **adds** to Node's trust store (unlike Python's `SSL_CERT_FILE` which replaces it). This is set via the compose override.
|
||||
|
||||
## Generate Your Own CA (Manual)
|
||||
|
||||
If you prefer not to use `generate-certs.sh`:
|
||||
|
||||
```bash
|
||||
# 1. Create CA
|
||||
openssl genrsa -out ca.key 4096
|
||||
openssl req -x509 -new -nodes -key ca.key -sha256 -days 3650 \
|
||||
-out ca.crt -subj "/CN=My CA/O=My Organization"
|
||||
|
||||
# 2. Create server key
|
||||
openssl genrsa -out server-key.pem 2048
|
||||
|
||||
# 3. Create CSR with SANs
|
||||
openssl req -new -key server-key.pem -out server.csr \
|
||||
-subj "/CN=reflector.local" \
|
||||
-addext "subjectAltName=DNS:reflector.local,DNS:localhost,IP:127.0.0.1"
|
||||
|
||||
# 4. Sign with CA
|
||||
openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key \
|
||||
-CAcreateserial -out server.pem -days 365 -sha256 \
|
||||
-copy_extensions copyall
|
||||
|
||||
# 5. Clean up
|
||||
rm server.csr ca.srl
|
||||
```
|
||||
|
||||
## Using Existing Corporate Certificates
|
||||
|
||||
If your organization already has a CA:
|
||||
|
||||
1. Get the CA certificate in PEM format from your IT team
|
||||
2. If you have a PKCS#12 (.p12/.pfx) bundle, extract the CA cert:
|
||||
```bash
|
||||
openssl pkcs12 -in bundle.p12 -cacerts -nokeys -out ca.crt
|
||||
```
|
||||
3. If you have multiple intermediate CAs, concatenate them into one PEM file:
|
||||
```bash
|
||||
cat intermediate-ca.crt root-ca.crt > ca.crt
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Browser: "Your connection is not private"
|
||||
|
||||
The CA is not trusted on the client machine. See "Trust the CA" section above.
|
||||
|
||||
Check certificate expiry:
|
||||
```bash
|
||||
openssl x509 -noout -dates -in certs/server.pem
|
||||
```
|
||||
|
||||
### Backend: `SSL: CERTIFICATE_VERIFY_FAILED`
|
||||
|
||||
CA cert not mounted or not loaded. Check inside the container:
|
||||
```bash
|
||||
docker compose exec server env | grep SSL_CERT_FILE
|
||||
docker compose exec server python -c "
|
||||
import ssl, os
|
||||
print('SSL_CERT_FILE:', os.environ.get('SSL_CERT_FILE', 'not set'))
|
||||
ctx = ssl.create_default_context()
|
||||
print('CA certs loaded:', ctx.cert_store_stats())
|
||||
"
|
||||
```
|
||||
|
||||
### Caddy: "certificate is not valid for any names"
|
||||
|
||||
Domain in Caddyfile doesn't match the certificate's SAN/CN. Check:
|
||||
```bash
|
||||
openssl x509 -noout -text -in certs/server.pem | grep -A1 "Subject Alternative Name"
|
||||
```
|
||||
|
||||
### Certificate chain issues
|
||||
|
||||
If you have intermediate CAs, concatenate them into `server.pem`:
|
||||
```bash
|
||||
cat server-cert.pem intermediate-ca.pem > certs/server.pem
|
||||
```
|
||||
|
||||
Verify the chain:
|
||||
```bash
|
||||
openssl verify -CAfile certs/ca.crt certs/server.pem
|
||||
```
|
||||
|
||||
### Certificate renewal
|
||||
|
||||
Custom CA certs are NOT auto-renewed (unlike Let's Encrypt). Replace cert files and restart:
|
||||
```bash
|
||||
# Replace certs
|
||||
cp new-server.pem certs/server.pem
|
||||
cp new-server-key.pem certs/server-key.pem
|
||||
|
||||
# Restart Caddy to pick up new certs
|
||||
docker compose restart caddy
|
||||
```
|
||||
294
docsv2/gpu-host-setup.md
Normal file
294
docsv2/gpu-host-setup.md
Normal file
@@ -0,0 +1,294 @@
|
||||
# Standalone GPU Host Setup
|
||||
|
||||
Deploy Reflector's GPU transcription/diarization/translation service on a dedicated machine, separate from the main Reflector instance. Useful when:
|
||||
|
||||
- Your GPU machine is on a different network than the Reflector server
|
||||
- You want to share one GPU service across multiple Reflector instances
|
||||
- The GPU machine has special hardware/drivers that can't run the full stack
|
||||
- You need to scale GPU processing independently
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────┐ HTTPS ┌────────────────────┐
|
||||
│ Reflector Server │ ────────────────────── │ GPU Host │
|
||||
│ (server, worker, │ TRANSCRIPT_URL │ (transcription, │
|
||||
│ web, postgres, │ DIARIZATION_URL │ diarization, │
|
||||
│ redis, hatchet) │ TRANSLATE_URL │ translation) │
|
||||
│ │ │ │
|
||||
│ setup-selfhosted.sh │ │ setup-gpu-host.sh │
|
||||
│ --hosted │ │ │
|
||||
└─────────────────────┘ └────────────────────┘
|
||||
```
|
||||
|
||||
The GPU service is a standalone FastAPI app that exposes transcription, diarization, translation, and audio padding endpoints. It has **no dependencies** on PostgreSQL, Redis, Hatchet, or any other Reflector service.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### On the GPU machine
|
||||
|
||||
```bash
|
||||
git clone <reflector-repo>
|
||||
cd reflector
|
||||
|
||||
# Set HuggingFace token (required for diarization models)
|
||||
export HF_TOKEN=your-huggingface-token
|
||||
|
||||
# Deploy with HTTPS (Let's Encrypt)
|
||||
./scripts/setup-gpu-host.sh --domain gpu.example.com --api-key my-secret-key
|
||||
|
||||
# Or deploy with custom CA
|
||||
./scripts/generate-certs.sh gpu.local
|
||||
./scripts/setup-gpu-host.sh --domain gpu.local --custom-ca certs/ --api-key my-secret-key
|
||||
```
|
||||
|
||||
### On the Reflector machine
|
||||
|
||||
```bash
|
||||
# If the GPU host uses a custom CA, trust it
|
||||
./scripts/setup-selfhosted.sh --hosted --garage --caddy \
|
||||
--extra-ca /path/to/gpu-machine-ca.crt
|
||||
|
||||
# Or if you already have --custom-ca for your local domain
|
||||
./scripts/setup-selfhosted.sh --hosted --garage --caddy \
|
||||
--domain reflector.local --custom-ca certs/ \
|
||||
--extra-ca /path/to/gpu-machine-ca.crt
|
||||
```
|
||||
|
||||
Then configure `server/.env` to point to the GPU host:
|
||||
|
||||
```bash
|
||||
TRANSCRIPT_BACKEND=modal
|
||||
TRANSCRIPT_URL=https://gpu.example.com
|
||||
TRANSCRIPT_MODAL_API_KEY=my-secret-key
|
||||
|
||||
DIARIZATION_BACKEND=modal
|
||||
DIARIZATION_URL=https://gpu.example.com
|
||||
DIARIZATION_MODAL_API_KEY=my-secret-key
|
||||
|
||||
TRANSLATION_BACKEND=modal
|
||||
TRANSLATE_URL=https://gpu.example.com
|
||||
TRANSLATION_MODAL_API_KEY=my-secret-key
|
||||
```
|
||||
|
||||
## Script Options
|
||||
|
||||
```
|
||||
./scripts/setup-gpu-host.sh [OPTIONS]
|
||||
|
||||
Options:
|
||||
--domain DOMAIN Domain name for HTTPS (Let's Encrypt or custom cert)
|
||||
--custom-ca PATH Custom CA (directory or single PEM file)
|
||||
--extra-ca FILE Additional CA cert to trust (repeatable)
|
||||
--api-key KEY API key to protect the service (strongly recommended)
|
||||
--cpu CPU-only mode (no NVIDIA GPU required)
|
||||
--port PORT Host port (default: 443 with Caddy, 8000 without)
|
||||
```
|
||||
|
||||
## Deployment Scenarios
|
||||
|
||||
### Public internet with Let's Encrypt
|
||||
|
||||
GPU machine has a public IP and domain:
|
||||
|
||||
```bash
|
||||
./scripts/setup-gpu-host.sh --domain gpu.example.com --api-key my-secret-key
|
||||
```
|
||||
|
||||
Requirements:
|
||||
- DNS A record: `gpu.example.com` → GPU machine's public IP
|
||||
- Ports 80 and 443 open
|
||||
- Caddy auto-provisions Let's Encrypt certificate
|
||||
|
||||
### Internal network with custom CA
|
||||
|
||||
GPU machine on a private network:
|
||||
|
||||
```bash
|
||||
# Generate certs on the GPU machine
|
||||
./scripts/generate-certs.sh gpu.internal "IP:192.168.1.200"
|
||||
|
||||
# Deploy
|
||||
./scripts/setup-gpu-host.sh --domain gpu.internal --custom-ca certs/ --api-key my-secret-key
|
||||
```
|
||||
|
||||
On each machine that connects (including the Reflector server), add DNS:
|
||||
```bash
|
||||
echo "192.168.1.200 gpu.internal" | sudo tee -a /etc/hosts
|
||||
```
|
||||
|
||||
### IP-only (no domain)
|
||||
|
||||
No domain needed — just use the machine's IP:
|
||||
|
||||
```bash
|
||||
./scripts/setup-gpu-host.sh --api-key my-secret-key
|
||||
```
|
||||
|
||||
Caddy is not used; the GPU service runs directly on port 8000 (HTTP). For HTTPS without a domain, the Reflector machine connects via `http://<GPU_IP>:8000`.
|
||||
|
||||
### CPU-only (no NVIDIA GPU)
|
||||
|
||||
Works on any machine — transcription will be slower:
|
||||
|
||||
```bash
|
||||
./scripts/setup-gpu-host.sh --cpu --domain gpu.example.com --api-key my-secret-key
|
||||
```
|
||||
|
||||
## DNS Resolution
|
||||
|
||||
The Reflector server must be able to reach the GPU host by name or IP.
|
||||
|
||||
| Setup | DNS Method | TRANSCRIPT_URL example |
|
||||
|-------|------------|----------------------|
|
||||
| Public domain | DNS A record | `https://gpu.example.com` |
|
||||
| Internal domain | `/etc/hosts` on both machines | `https://gpu.internal` |
|
||||
| IP only | No DNS needed | `http://192.168.1.200:8000` |
|
||||
|
||||
For internal domains, add the GPU machine's IP to `/etc/hosts` on the Reflector machine:
|
||||
```bash
|
||||
echo "192.168.1.200 gpu.internal" | sudo tee -a /etc/hosts
|
||||
```
|
||||
|
||||
If the Reflector server runs in Docker, the containers resolve DNS from the host (Docker's default DNS behavior). So adding to the host's `/etc/hosts` is sufficient.
|
||||
|
||||
## Multi-CA Setup
|
||||
|
||||
When your Reflector instance has its own CA (for `reflector.local`) and the GPU host has a different CA:
|
||||
|
||||
**On the GPU machine:**
|
||||
```bash
|
||||
./scripts/generate-certs.sh gpu.local
|
||||
./scripts/setup-gpu-host.sh --domain gpu.local --custom-ca certs/ --api-key my-key
|
||||
```
|
||||
|
||||
**On the Reflector machine:**
|
||||
```bash
|
||||
# Your local CA for reflector.local + the GPU host's CA
|
||||
./scripts/setup-selfhosted.sh --hosted --garage --caddy \
|
||||
--domain reflector.local \
|
||||
--custom-ca certs/ \
|
||||
--extra-ca /path/to/gpu-machine-ca.crt
|
||||
```
|
||||
|
||||
The `--extra-ca` flag appends the GPU host's CA to the trust bundle. Backend containers trust both CAs — your local domain works AND outbound calls to the GPU host succeed.
|
||||
|
||||
You can repeat `--extra-ca` for multiple remote services:
|
||||
```bash
|
||||
--extra-ca /path/to/gpu-ca.crt --extra-ca /path/to/llm-ca.crt
|
||||
```
|
||||
|
||||
## API Key Authentication
|
||||
|
||||
The GPU service uses Bearer token authentication via `REFLECTOR_GPU_APIKEY`:
|
||||
|
||||
```bash
|
||||
# Test from the Reflector machine
|
||||
curl -s https://gpu.example.com/docs # No auth needed for docs
|
||||
curl -s -X POST https://gpu.example.com/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer <my-secret-key>" \ #gitleaks:allow
|
||||
-F "file=@audio.wav"
|
||||
```
|
||||
|
||||
If `REFLECTOR_GPU_APIKEY` is not set, the service accepts all requests (open access). Always use `--api-key` for internet-facing deployments.
|
||||
|
||||
The same key goes in Reflector's `server/.env` as `TRANSCRIPT_MODAL_API_KEY` and `DIARIZATION_MODAL_API_KEY`.
|
||||
|
||||
## Files
|
||||
|
||||
| File | Checked in? | Purpose |
|
||||
|------|-------------|---------|
|
||||
| `docker-compose.gpu-host.yml` | Yes | Static compose file with profiles (`gpu`, `cpu`, `caddy`) |
|
||||
| `.env.gpu-host` | No (generated) | Environment variables (HF_TOKEN, API key, ports) |
|
||||
| `Caddyfile.gpu-host` | No (generated) | Caddy config (only when using HTTPS) |
|
||||
| `docker-compose.gpu-ca.yml` | No (generated) | CA cert mounts override (only with --custom-ca) |
|
||||
| `certs/` | No (generated) | Staged certificates (when using --custom-ca) |
|
||||
|
||||
The compose file is checked into the repo — you can read it to understand exactly what runs. The script only generates env vars, Caddyfile, and CA overrides. Profiles control which service starts:
|
||||
|
||||
```bash
|
||||
# What the script does under the hood:
|
||||
docker compose -f docker-compose.gpu-host.yml --profile gpu --profile caddy \
|
||||
--env-file .env.gpu-host up -d
|
||||
|
||||
# CPU mode:
|
||||
docker compose -f docker-compose.gpu-host.yml --profile cpu --profile caddy \
|
||||
--env-file .env.gpu-host up -d
|
||||
```
|
||||
|
||||
Both `gpu` and `cpu` services get the network alias `transcription`, so Caddy's config works with either.
|
||||
|
||||
## Management
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker compose -f docker-compose.gpu-host.yml --profile gpu logs -f gpu
|
||||
|
||||
# Restart
|
||||
docker compose -f docker-compose.gpu-host.yml --profile gpu restart gpu
|
||||
|
||||
# Stop
|
||||
docker compose -f docker-compose.gpu-host.yml --profile gpu --profile caddy down
|
||||
|
||||
# Re-run setup
|
||||
./scripts/setup-gpu-host.sh [same flags]
|
||||
|
||||
# Rebuild after code changes
|
||||
docker compose -f docker-compose.gpu-host.yml --profile gpu build gpu
|
||||
docker compose -f docker-compose.gpu-host.yml --profile gpu up -d gpu
|
||||
```
|
||||
|
||||
If you deployed with `--custom-ca`, include the CA override in manual commands:
|
||||
```bash
|
||||
docker compose -f docker-compose.gpu-host.yml -f docker-compose.gpu-ca.yml \
|
||||
--profile gpu logs -f gpu
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### GPU service won't start
|
||||
|
||||
Check logs:
|
||||
```bash
|
||||
docker compose -f docker-compose.gpu-host.yml logs gpu
|
||||
```
|
||||
|
||||
Common causes:
|
||||
- NVIDIA driver not installed or `nvidia-container-toolkit` missing
|
||||
- `HF_TOKEN` not set (diarization model download fails)
|
||||
- Port already in use
|
||||
|
||||
### Reflector can't connect to GPU host
|
||||
|
||||
From the Reflector machine:
|
||||
```bash
|
||||
# Test HTTPS connectivity
|
||||
curl -v https://gpu.example.com/docs
|
||||
|
||||
# If using custom CA, test with explicit CA
|
||||
curl --cacert /path/to/gpu-ca.crt https://gpu.internal/docs
|
||||
```
|
||||
|
||||
From inside the Reflector container:
|
||||
```bash
|
||||
docker compose exec server python -c "
|
||||
import httpx
|
||||
r = httpx.get('https://gpu.internal/docs')
|
||||
print(r.status_code)
|
||||
"
|
||||
```
|
||||
|
||||
### SSL: CERTIFICATE_VERIFY_FAILED
|
||||
|
||||
The Reflector backend doesn't trust the GPU host's CA. Fix:
|
||||
```bash
|
||||
# Re-run Reflector setup with the GPU host's CA
|
||||
./scripts/setup-selfhosted.sh --hosted --extra-ca /path/to/gpu-ca.crt
|
||||
```
|
||||
|
||||
### Diarization returns errors
|
||||
|
||||
- Accept pyannote model licenses on HuggingFace:
|
||||
- https://huggingface.co/pyannote/speaker-diarization-3.1
|
||||
- https://huggingface.co/pyannote/segmentation-3.0
|
||||
- Verify `HF_TOKEN` is set in `.env.gpu-host`
|
||||
@@ -199,6 +199,11 @@ Without `--caddy` or `--domain`, no ports are exposed. Point your own reverse pr
|
||||
| `DAILY_SUBDOMAIN` | Daily.co subdomain | *(unset)* |
|
||||
| `DAILYCO_STORAGE_AWS_ACCESS_KEY_ID` | AWS access key for reading Daily's recording bucket | *(unset)* |
|
||||
| `DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY` | AWS secret key for reading Daily's recording bucket | *(unset)* |
|
||||
| `ZULIP_REALM` | Zulip server hostname (e.g. `zulip.example.com`) | *(unset)* |
|
||||
| `ZULIP_API_KEY` | Zulip bot API key | *(unset)* |
|
||||
| `ZULIP_BOT_EMAIL` | Zulip bot email address | *(unset)* |
|
||||
| `ZULIP_DAG_STREAM` | Zulip stream for pipeline failure alerts | *(unset)* |
|
||||
| `ZULIP_DAG_TOPIC` | Zulip topic for pipeline failure alerts | *(unset)* |
|
||||
| `HATCHET_CLIENT_TOKEN` | Hatchet API token (auto-generated) | *(unset)* |
|
||||
| `HATCHET_CLIENT_SERVER_URL` | Hatchet server URL | Auto-set when Daily.co configured |
|
||||
| `HATCHET_CLIENT_HOST_PORT` | Hatchet gRPC address | Auto-set when Daily.co configured |
|
||||
|
||||
@@ -42,6 +42,7 @@ COPY pyproject.toml uv.lock /app/
|
||||
COPY ./app /app/app
|
||||
COPY ./main.py /app/
|
||||
COPY ./runserver.sh /app/
|
||||
COPY ./docker-entrypoint.sh /app/
|
||||
|
||||
# prevent uv failing with too many open files on big cpus
|
||||
ENV UV_CONCURRENT_INSTALLS=16
|
||||
@@ -52,6 +53,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["sh", "/app/runserver.sh"]
|
||||
RUN chmod +x /app/docker-entrypoint.sh
|
||||
|
||||
CMD ["sh", "/app/docker-entrypoint.sh"]
|
||||
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ COPY pyproject.toml uv.lock /app/
|
||||
COPY ./app /app/app
|
||||
COPY ./main.py /app/
|
||||
COPY ./runserver.sh /app/
|
||||
COPY ./docker-entrypoint.sh /app/
|
||||
|
||||
# prevent uv failing with too many open files on big cpus
|
||||
ENV UV_CONCURRENT_INSTALLS=16
|
||||
@@ -36,4 +37,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["sh", "/app/runserver.sh"]
|
||||
RUN chmod +x /app/docker-entrypoint.sh
|
||||
|
||||
CMD ["sh", "/app/docker-entrypoint.sh"]
|
||||
|
||||
22
gpu/self_hosted/docker-entrypoint.sh
Normal file
22
gpu/self_hosted/docker-entrypoint.sh
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
# Custom CA certificate injection
|
||||
# If a CA cert is mounted at this path (via docker-compose.ca.yml),
|
||||
# add it to the system trust store and configure all Python SSL libraries.
|
||||
CUSTOM_CA_PATH="/usr/local/share/ca-certificates/custom-ca.crt"
|
||||
|
||||
if [ -s "$CUSTOM_CA_PATH" ]; then
|
||||
echo "[entrypoint] Custom CA certificate detected, updating trust store..."
|
||||
update-ca-certificates 2>/dev/null
|
||||
|
||||
# update-ca-certificates creates a combined bundle (system + custom CAs)
|
||||
COMBINED_BUNDLE="/etc/ssl/certs/ca-certificates.crt"
|
||||
export SSL_CERT_FILE="$COMBINED_BUNDLE"
|
||||
export REQUESTS_CA_BUNDLE="$COMBINED_BUNDLE"
|
||||
export CURL_CA_BUNDLE="$COMBINED_BUNDLE"
|
||||
export GRPC_DEFAULT_SSL_ROOTS_FILE_PATH="$COMBINED_BUNDLE"
|
||||
echo "[entrypoint] CA trust store updated (SSL_CERT_FILE=$COMBINED_BUNDLE)"
|
||||
fi
|
||||
|
||||
exec sh /app/runserver.sh
|
||||
6
gpu/self_hosted/uv.lock
generated
6
gpu/self_hosted/uv.lock
generated
@@ -2153,7 +2153,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.32.5"
|
||||
version = "2.33.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
@@ -2161,9 +2161,9 @@ dependencies = [
|
||||
{ name = "idna" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/34/64/8860370b167a9721e8956ae116825caff829224fbca0ca6e7bf8ddef8430/requests-2.33.0.tar.gz", hash = "sha256:c7ebc5e8b0f21837386ad0e1c8fe8b829fa5f544d8df3b2253bff14ef29d7652", size = 134232, upload-time = "2026-03-25T15:10:41.586Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
130
scripts/generate-certs.sh
Executable file
130
scripts/generate-certs.sh
Executable file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Generate a local CA and server certificate for Reflector self-hosted deployments.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/generate-certs.sh DOMAIN [EXTRA_SANS...]
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/generate-certs.sh reflector.local
|
||||
# ./scripts/generate-certs.sh reflector.local "DNS:gpu.local,IP:192.168.1.100"
|
||||
#
|
||||
# Generates in certs/:
|
||||
# ca.key — CA private key (keep secret)
|
||||
# ca.crt — CA certificate (distribute to clients)
|
||||
# server-key.pem — Server private key
|
||||
# server.pem — Server certificate (signed by CA)
|
||||
#
|
||||
# Then use with setup-selfhosted.sh:
|
||||
# ./scripts/setup-selfhosted.sh --gpu --caddy --domain DOMAIN --custom-ca certs/
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
DOMAIN="${1:?Usage: $0 DOMAIN [EXTRA_SANS...]}"
|
||||
EXTRA_SANS="${2:-}"
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
CERTS_DIR="$(cd "$SCRIPT_DIR/.." && pwd)/certs"
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
info() { echo -e "${CYAN}==>${NC} $*"; }
|
||||
ok() { echo -e "${GREEN} ✓${NC} $*"; }
|
||||
|
||||
# Check for openssl
|
||||
if ! command -v openssl &>/dev/null; then
|
||||
echo "Error: openssl is required but not found. Install it first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$CERTS_DIR"
|
||||
|
||||
# Build SAN list
|
||||
SAN_LIST="DNS:$DOMAIN,DNS:localhost,IP:127.0.0.1"
|
||||
if [[ -n "$EXTRA_SANS" ]]; then
|
||||
SAN_LIST="$SAN_LIST,$EXTRA_SANS"
|
||||
fi
|
||||
|
||||
info "Generating CA and server certificate for: $DOMAIN"
|
||||
echo " SANs: $SAN_LIST"
|
||||
echo ""
|
||||
|
||||
# --- Step 1: Generate CA ---
|
||||
if [[ -f "$CERTS_DIR/ca.key" ]] && [[ -f "$CERTS_DIR/ca.crt" ]]; then
|
||||
ok "CA already exists at certs/ca.key + certs/ca.crt — reusing"
|
||||
else
|
||||
info "Generating CA key and certificate..."
|
||||
openssl genrsa -out "$CERTS_DIR/ca.key" 4096 2>/dev/null
|
||||
openssl req -x509 -new -nodes \
|
||||
-key "$CERTS_DIR/ca.key" \
|
||||
-sha256 -days 3650 \
|
||||
-out "$CERTS_DIR/ca.crt" \
|
||||
-subj "/CN=Reflector Local CA/O=Reflector Self-Hosted"
|
||||
ok "CA certificate generated (valid for 10 years)"
|
||||
fi
|
||||
|
||||
# --- Step 2: Generate server key ---
|
||||
info "Generating server key..."
|
||||
openssl genrsa -out "$CERTS_DIR/server-key.pem" 2048 2>/dev/null
|
||||
ok "Server key generated"
|
||||
|
||||
# --- Step 3: Create CSR with SANs ---
|
||||
info "Creating certificate signing request..."
|
||||
openssl req -new \
|
||||
-key "$CERTS_DIR/server-key.pem" \
|
||||
-out "$CERTS_DIR/server.csr" \
|
||||
-subj "/CN=$DOMAIN" \
|
||||
-addext "subjectAltName=$SAN_LIST"
|
||||
ok "CSR created"
|
||||
|
||||
# --- Step 4: Sign with CA ---
|
||||
info "Signing server certificate with CA..."
|
||||
openssl x509 -req \
|
||||
-in "$CERTS_DIR/server.csr" \
|
||||
-CA "$CERTS_DIR/ca.crt" \
|
||||
-CAkey "$CERTS_DIR/ca.key" \
|
||||
-CAcreateserial \
|
||||
-out "$CERTS_DIR/server.pem" \
|
||||
-days 365 -sha256 \
|
||||
-copy_extensions copyall \
|
||||
2>/dev/null
|
||||
ok "Server certificate signed (valid for 1 year)"
|
||||
|
||||
# --- Cleanup ---
|
||||
rm -f "$CERTS_DIR/server.csr" "$CERTS_DIR/ca.srl"
|
||||
|
||||
# --- Set permissions ---
|
||||
chmod 644 "$CERTS_DIR/ca.crt" "$CERTS_DIR/server.pem"
|
||||
chmod 600 "$CERTS_DIR/ca.key" "$CERTS_DIR/server-key.pem"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo -e " ${GREEN}Certificates generated in certs/${NC}"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo " certs/ca.key CA private key (keep secret)"
|
||||
echo " certs/ca.crt CA certificate (distribute to clients)"
|
||||
echo " certs/server-key.pem Server private key"
|
||||
echo " certs/server.pem Server certificate for $DOMAIN"
|
||||
echo ""
|
||||
echo " SANs: $SAN_LIST"
|
||||
echo ""
|
||||
echo "Use with setup-selfhosted.sh:"
|
||||
echo " ./scripts/setup-selfhosted.sh --gpu --caddy --domain $DOMAIN --custom-ca certs/"
|
||||
echo ""
|
||||
echo "Trust the CA on your machine:"
|
||||
case "$(uname -s)" in
|
||||
Darwin)
|
||||
echo " sudo security add-trusted-cert -d -r trustRoot -k /Library/Keychains/System.keychain certs/ca.crt"
|
||||
;;
|
||||
Linux)
|
||||
echo " sudo cp certs/ca.crt /usr/local/share/ca-certificates/reflector-ca.crt"
|
||||
echo " sudo update-ca-certificates"
|
||||
;;
|
||||
*)
|
||||
echo " See docsv2/custom-ca-setup.md for your platform"
|
||||
;;
|
||||
esac
|
||||
echo ""
|
||||
167
scripts/run-integration-tests.sh
Executable file
167
scripts/run-integration-tests.sh
Executable file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Run integration tests locally.
|
||||
#
|
||||
# Spins up the full stack via Docker Compose, runs the three integration tests,
|
||||
# and tears everything down afterward.
|
||||
#
|
||||
# Required environment variables:
|
||||
# LLM_URL — OpenAI-compatible LLM endpoint (e.g. https://api.openai.com/v1)
|
||||
# LLM_API_KEY — API key for the LLM endpoint
|
||||
# HF_TOKEN — HuggingFace token for pyannote gated models
|
||||
#
|
||||
# Optional:
|
||||
# LLM_MODEL — Model name (default: qwen2.5:14b)
|
||||
#
|
||||
# Flags:
|
||||
# --build — Rebuild backend Docker images (server, workers, test-runner)
|
||||
#
|
||||
# Usage:
|
||||
# export LLM_URL="https://api.openai.com/v1"
|
||||
# export LLM_API_KEY="sk-..."
|
||||
# export HF_TOKEN="hf_..."
|
||||
# ./scripts/run-integration-tests.sh
|
||||
# ./scripts/run-integration-tests.sh --build # rebuild backend images
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
BUILD_FLAG=""
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--build) BUILD_FLAG="--build" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
COMPOSE_DIR="$REPO_ROOT/server/tests"
|
||||
COMPOSE_FILE="$COMPOSE_DIR/docker-compose.integration.yml"
|
||||
COMPOSE="docker compose -f $COMPOSE_FILE"
|
||||
|
||||
# ── Validate required env vars ──────────────────────────────────────────────
|
||||
for var in LLM_URL LLM_API_KEY HF_TOKEN; do
|
||||
if [[ -z "${!var:-}" ]]; then
|
||||
echo "ERROR: $var is not set. See script header for required env vars."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
export LLM_MODEL="${LLM_MODEL:-qwen2.5:14b}"
|
||||
|
||||
# ── Helpers ─────────────────────────────────────────────────────────────────
|
||||
info() { echo -e "\n\033[1;34m▸ $*\033[0m"; }
|
||||
ok() { echo -e "\033[1;32m ✓ $*\033[0m"; }
|
||||
fail() { echo -e "\033[1;31m ✗ $*\033[0m"; }
|
||||
|
||||
wait_for() {
|
||||
local desc="$1" cmd="$2" max="${3:-60}"
|
||||
info "Waiting for $desc (up to ${max}s)..."
|
||||
for i in $(seq 1 "$max"); do
|
||||
if eval "$cmd" &>/dev/null; then
|
||||
ok "$desc is ready"
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
fail "$desc did not become ready within ${max}s"
|
||||
return 1
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
info "Tearing down..."
|
||||
$COMPOSE down -v --remove-orphans 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Always tear down on exit
|
||||
trap cleanup EXIT
|
||||
|
||||
# ── Step 1: Build and start infrastructure ──────────────────────────────────
|
||||
info "Building and starting infrastructure services..."
|
||||
$COMPOSE up -d --build postgres redis garage hatchet mock-daily mailpit
|
||||
|
||||
# ── Step 2: Set up Garage (S3 bucket + keys) ───────────────────────────────
|
||||
wait_for "Garage" "$COMPOSE exec -T garage /garage stats" 60
|
||||
|
||||
info "Setting up Garage bucket and keys..."
|
||||
GARAGE="$COMPOSE exec -T garage /garage"
|
||||
|
||||
# Hardcoded test credentials — ephemeral containers, destroyed after tests
|
||||
export GARAGE_KEY_ID="GK0123456789abcdef01234567" # gitleaks:allow
|
||||
export GARAGE_KEY_SECRET="0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
|
||||
|
||||
# Layout
|
||||
NODE_ID=$($GARAGE node id -q 2>&1 | tr -d '[:space:]')
|
||||
LAYOUT_STATUS=$($GARAGE layout show 2>&1 || true)
|
||||
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
|
||||
$GARAGE layout assign "$NODE_ID" -c 1G -z dc1
|
||||
$GARAGE layout apply --version 1
|
||||
fi
|
||||
|
||||
# Bucket
|
||||
$GARAGE bucket info reflector-media >/dev/null 2>&1 || $GARAGE bucket create reflector-media
|
||||
|
||||
# Import key with known credentials
|
||||
if ! $GARAGE key info reflector-test >/dev/null 2>&1; then
|
||||
$GARAGE key import --yes "$GARAGE_KEY_ID" "$GARAGE_KEY_SECRET"
|
||||
$GARAGE key rename "$GARAGE_KEY_ID" reflector-test
|
||||
fi
|
||||
|
||||
# Permissions
|
||||
$GARAGE bucket allow reflector-media --read --write --key reflector-test
|
||||
|
||||
ok "Garage ready with hardcoded test credentials"
|
||||
|
||||
# ── Step 3: Generate Hatchet API token ──────────────────────────────────────
|
||||
wait_for "Hatchet" "$COMPOSE exec -T hatchet curl -sf http://localhost:8888/api/live" 90
|
||||
|
||||
info "Generating Hatchet API token..."
|
||||
HATCHET_TOKEN_OUTPUT=$($COMPOSE exec -T hatchet /hatchet-admin token create --config /config --name local-test 2>&1)
|
||||
export HATCHET_CLIENT_TOKEN=$(echo "$HATCHET_TOKEN_OUTPUT" | grep -o 'eyJ[A-Za-z0-9_.\-]*')
|
||||
|
||||
if [[ -z "$HATCHET_CLIENT_TOKEN" ]]; then
|
||||
fail "Failed to extract Hatchet token (JWT not found in output)"
|
||||
echo " Output was: $HATCHET_TOKEN_OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
ok "Hatchet token generated"
|
||||
|
||||
# ── Step 4: Start backend services ──────────────────────────────────────────
|
||||
info "Starting backend services..."
|
||||
$COMPOSE up -d $BUILD_FLAG server worker hatchet-worker-cpu hatchet-worker-llm test-runner
|
||||
|
||||
# ── Step 5: Wait for server + run migrations ────────────────────────────────
|
||||
wait_for "Server" "$COMPOSE exec -T test-runner curl -sf http://server:1250/health" 60
|
||||
|
||||
info "Running database migrations..."
|
||||
$COMPOSE exec -T server uv run alembic upgrade head
|
||||
ok "Migrations applied"
|
||||
|
||||
# ── Step 6: Run integration tests ───────────────────────────────────────────
|
||||
info "Running integration tests..."
|
||||
echo ""
|
||||
|
||||
LOGS_DIR="$COMPOSE_DIR/integration/logs"
|
||||
mkdir -p "$LOGS_DIR"
|
||||
RUN_TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||
TEST_LOG="$LOGS_DIR/$RUN_TIMESTAMP.txt"
|
||||
|
||||
if $COMPOSE exec -T test-runner uv run pytest tests/integration/ -v -x 2>&1 | tee "$TEST_LOG.pytest"; then
|
||||
echo ""
|
||||
ok "All integration tests passed!"
|
||||
EXIT_CODE=0
|
||||
else
|
||||
echo ""
|
||||
fail "Integration tests failed!"
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
|
||||
# Always collect service logs + test output into a single file
|
||||
info "Collecting logs..."
|
||||
$COMPOSE logs --tail=500 > "$TEST_LOG" 2>&1
|
||||
echo -e "\n\n=== PYTEST OUTPUT ===\n" >> "$TEST_LOG"
|
||||
cat "$TEST_LOG.pytest" >> "$TEST_LOG" 2>/dev/null
|
||||
rm -f "$TEST_LOG.pytest"
|
||||
echo " Logs saved to: server/tests/integration/logs/$RUN_TIMESTAMP.txt"
|
||||
|
||||
# cleanup runs via trap
|
||||
exit $EXIT_CODE
|
||||
496
scripts/setup-gpu-host.sh
Executable file
496
scripts/setup-gpu-host.sh
Executable file
@@ -0,0 +1,496 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Standalone GPU service setup for Reflector.
|
||||
# Deploys ONLY the GPU transcription/diarization/translation service on a dedicated machine.
|
||||
# The main Reflector instance connects to this machine over HTTPS.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/setup-gpu-host.sh [--domain DOMAIN] [--custom-ca PATH] [--extra-ca FILE] [--api-key KEY] [--cpu] [--build]
|
||||
#
|
||||
# Options:
|
||||
# --domain DOMAIN Domain name for this GPU host (e.g., gpu.example.com)
|
||||
# With --custom-ca: uses custom TLS cert. Without: uses Let's Encrypt.
|
||||
# --custom-ca PATH Custom CA certificate (dir with ca.crt + server.pem + server-key.pem, or single PEM file)
|
||||
# --extra-ca FILE Additional CA cert to trust (repeatable)
|
||||
# --api-key KEY API key to protect the GPU service (recommended for internet-facing deployments)
|
||||
# --cpu Use CPU-only Dockerfile (no NVIDIA GPU required)
|
||||
# --build Build image from source (default: build, since no pre-built GPU image is published)
|
||||
# --port PORT Host port to expose (default: 443 with Caddy, 8000 without)
|
||||
#
|
||||
# Examples:
|
||||
# # GPU on LAN with custom CA
|
||||
# ./scripts/generate-certs.sh gpu.local
|
||||
# ./scripts/setup-gpu-host.sh --domain gpu.local --custom-ca certs/ --api-key my-secret-key
|
||||
#
|
||||
# # GPU on public internet with Let's Encrypt
|
||||
# ./scripts/setup-gpu-host.sh --domain gpu.example.com --api-key my-secret-key
|
||||
#
|
||||
# # GPU on LAN, IP access only (self-signed cert)
|
||||
# ./scripts/setup-gpu-host.sh --api-key my-secret-key
|
||||
#
|
||||
# # CPU-only mode (no NVIDIA GPU)
|
||||
# ./scripts/setup-gpu-host.sh --cpu --api-key my-secret-key
|
||||
#
|
||||
# After setup, configure the main Reflector instance to use this GPU:
|
||||
# In server/.env on the Reflector machine:
|
||||
# TRANSCRIPT_BACKEND=modal
|
||||
# TRANSCRIPT_URL=https://gpu.example.com
|
||||
# TRANSCRIPT_MODAL_API_KEY=my-secret-key
|
||||
# DIARIZATION_BACKEND=modal
|
||||
# DIARIZATION_URL=https://gpu.example.com
|
||||
# DIARIZATION_MODAL_API_KEY=my-secret-key
|
||||
# TRANSLATION_BACKEND=modal
|
||||
# TRANSLATE_URL=https://gpu.example.com
|
||||
# TRANSLATION_MODAL_API_KEY=my-secret-key
|
||||
#
|
||||
# DNS Resolution:
|
||||
# - Public domain: Create a DNS A record pointing to this machine's public IP.
|
||||
# - Internal domain (e.g., gpu.local): Add to /etc/hosts on both machines:
|
||||
# <GPU_MACHINE_IP> gpu.local
|
||||
# - IP-only: Use the machine's IP directly in TRANSCRIPT_URL/DIARIZATION_URL.
|
||||
# The Reflector backend must trust the CA or accept self-signed certs.
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
GPU_DIR="$ROOT_DIR/gpu/self_hosted"
|
||||
OS="$(uname -s)"
|
||||
|
||||
# --- Colors ---
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
info() { echo -e "${CYAN}==>${NC} $*"; }
|
||||
ok() { echo -e "${GREEN} ✓${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW} !${NC} $*"; }
|
||||
err() { echo -e "${RED} ✗${NC} $*" >&2; }
|
||||
|
||||
# --- Parse arguments ---
|
||||
CUSTOM_DOMAIN=""
|
||||
CUSTOM_CA=""
|
||||
EXTRA_CA_FILES=()
|
||||
API_KEY=""
|
||||
USE_CPU=false
|
||||
HOST_PORT=""
|
||||
|
||||
SKIP_NEXT=false
|
||||
ARGS=("$@")
|
||||
for i in "${!ARGS[@]}"; do
|
||||
if [[ "$SKIP_NEXT" == "true" ]]; then
|
||||
SKIP_NEXT=false
|
||||
continue
|
||||
fi
|
||||
arg="${ARGS[$i]}"
|
||||
case "$arg" in
|
||||
--domain)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--domain requires a domain name"
|
||||
exit 1
|
||||
fi
|
||||
CUSTOM_DOMAIN="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--custom-ca)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--custom-ca requires a path to a directory or PEM certificate file"
|
||||
exit 1
|
||||
fi
|
||||
CUSTOM_CA="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--extra-ca)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--extra-ca requires a path to a PEM certificate file"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f "${ARGS[$next_i]}" ]]; then
|
||||
err "--extra-ca file not found: ${ARGS[$next_i]}"
|
||||
exit 1
|
||||
fi
|
||||
EXTRA_CA_FILES+=("${ARGS[$next_i]}")
|
||||
SKIP_NEXT=true ;;
|
||||
--api-key)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--api-key requires a key value"
|
||||
exit 1
|
||||
fi
|
||||
API_KEY="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--cpu)
|
||||
USE_CPU=true ;;
|
||||
--port)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--port requires a port number"
|
||||
exit 1
|
||||
fi
|
||||
HOST_PORT="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--build)
|
||||
;; # Always build from source for GPU, flag accepted for compatibility
|
||||
*)
|
||||
err "Unknown argument: $arg"
|
||||
err "Usage: $0 [--domain DOMAIN] [--custom-ca PATH] [--extra-ca FILE] [--api-key KEY] [--cpu] [--port PORT]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Resolve CA paths ---
|
||||
CA_CERT_PATH=""
|
||||
TLS_CERT_PATH=""
|
||||
TLS_KEY_PATH=""
|
||||
USE_CUSTOM_CA=false
|
||||
USE_CADDY=false
|
||||
|
||||
if [[ -n "$CUSTOM_CA" ]] || [[ -n "${EXTRA_CA_FILES[0]+x}" ]]; then
|
||||
USE_CUSTOM_CA=true
|
||||
fi
|
||||
|
||||
if [[ -n "$CUSTOM_CA" ]]; then
|
||||
CUSTOM_CA="${CUSTOM_CA%/}"
|
||||
if [[ -d "$CUSTOM_CA" ]]; then
|
||||
[[ -f "$CUSTOM_CA/ca.crt" ]] || { err "$CUSTOM_CA/ca.crt not found"; exit 1; }
|
||||
CA_CERT_PATH="$CUSTOM_CA/ca.crt"
|
||||
if [[ -f "$CUSTOM_CA/server.pem" ]] && [[ -f "$CUSTOM_CA/server-key.pem" ]]; then
|
||||
TLS_CERT_PATH="$CUSTOM_CA/server.pem"
|
||||
TLS_KEY_PATH="$CUSTOM_CA/server-key.pem"
|
||||
elif [[ -f "$CUSTOM_CA/server.pem" ]] || [[ -f "$CUSTOM_CA/server-key.pem" ]]; then
|
||||
warn "Found only one of server.pem/server-key.pem — both needed for TLS. Skipping."
|
||||
fi
|
||||
elif [[ -f "$CUSTOM_CA" ]]; then
|
||||
CA_CERT_PATH="$CUSTOM_CA"
|
||||
else
|
||||
err "--custom-ca path not found: $CUSTOM_CA"
|
||||
exit 1
|
||||
fi
|
||||
elif [[ -n "${EXTRA_CA_FILES[0]+x}" ]]; then
|
||||
CA_CERT_PATH="${EXTRA_CA_FILES[0]}"
|
||||
unset 'EXTRA_CA_FILES[0]'
|
||||
EXTRA_CA_FILES=("${EXTRA_CA_FILES[@]+"${EXTRA_CA_FILES[@]}"}")
|
||||
fi
|
||||
|
||||
# Caddy if we have a domain or TLS certs
|
||||
if [[ -n "$CUSTOM_DOMAIN" ]] || [[ -n "$TLS_CERT_PATH" ]]; then
|
||||
USE_CADDY=true
|
||||
fi
|
||||
|
||||
# Default port
|
||||
if [[ -z "$HOST_PORT" ]]; then
|
||||
if [[ "$USE_CADDY" == "true" ]]; then
|
||||
HOST_PORT="443"
|
||||
else
|
||||
HOST_PORT="8000"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Detect primary IP
|
||||
PRIMARY_IP=""
|
||||
if [[ "$OS" == "Linux" ]]; then
|
||||
PRIMARY_IP=$(hostname -I 2>/dev/null | awk '{print $1}' || true)
|
||||
if [[ "$PRIMARY_IP" == "127."* ]] || [[ -z "$PRIMARY_IP" ]]; then
|
||||
PRIMARY_IP=$(ip -4 route get 1 2>/dev/null | sed -n 's/.*src \([0-9.]*\).*/\1/p' || true)
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Display config ---
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " Reflector — Standalone GPU Host Setup"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo " Mode: $(if [[ "$USE_CPU" == "true" ]]; then echo "CPU-only"; else echo "NVIDIA GPU"; fi)"
|
||||
echo " Caddy: $USE_CADDY"
|
||||
[[ -n "$CUSTOM_DOMAIN" ]] && echo " Domain: $CUSTOM_DOMAIN"
|
||||
[[ "$USE_CUSTOM_CA" == "true" ]] && echo " CA: Custom"
|
||||
[[ -n "$TLS_CERT_PATH" ]] && echo " TLS: Custom cert"
|
||||
[[ -n "$API_KEY" ]] && echo " Auth: API key protected"
|
||||
[[ -z "$API_KEY" ]] && echo " Auth: NONE (open access — use --api-key for production!)"
|
||||
echo " Port: $HOST_PORT"
|
||||
echo ""
|
||||
|
||||
# --- Prerequisites ---
|
||||
info "Checking prerequisites"
|
||||
|
||||
if ! command -v docker &>/dev/null; then
|
||||
err "Docker not found. Install Docker first."
|
||||
exit 1
|
||||
fi
|
||||
ok "Docker available"
|
||||
|
||||
if ! docker compose version &>/dev/null; then
|
||||
err "Docker Compose V2 not found."
|
||||
exit 1
|
||||
fi
|
||||
ok "Docker Compose V2 available"
|
||||
|
||||
if [[ "$USE_CPU" != "true" ]]; then
|
||||
if ! docker info 2>/dev/null | grep -qi nvidia; then
|
||||
warn "NVIDIA runtime not detected in Docker. GPU mode may fail."
|
||||
warn "Install nvidia-container-toolkit if you have an NVIDIA GPU."
|
||||
else
|
||||
ok "NVIDIA Docker runtime available"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Stage certificates ---
|
||||
CERTS_DIR="$ROOT_DIR/certs"
|
||||
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
|
||||
info "Staging certificates"
|
||||
mkdir -p "$CERTS_DIR"
|
||||
|
||||
if [[ -n "$CA_CERT_PATH" ]]; then
|
||||
local_ca_dest="$CERTS_DIR/ca.crt"
|
||||
src_id=$(ls -i "$CA_CERT_PATH" 2>/dev/null | awk '{print $1}')
|
||||
dst_id=$(ls -i "$local_ca_dest" 2>/dev/null | awk '{print $1}')
|
||||
if [[ "$src_id" != "$dst_id" ]] || [[ -z "$dst_id" ]]; then
|
||||
cp "$CA_CERT_PATH" "$local_ca_dest"
|
||||
fi
|
||||
chmod 644 "$local_ca_dest"
|
||||
ok "CA certificate staged"
|
||||
|
||||
# Append extra CAs
|
||||
for extra_ca in "${EXTRA_CA_FILES[@]+"${EXTRA_CA_FILES[@]}"}"; do
|
||||
echo "" >> "$local_ca_dest"
|
||||
cat "$extra_ca" >> "$local_ca_dest"
|
||||
ok "Appended extra CA: $extra_ca"
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ -n "$TLS_CERT_PATH" ]]; then
|
||||
cert_dest="$CERTS_DIR/server.pem"
|
||||
key_dest="$CERTS_DIR/server-key.pem"
|
||||
src_id=$(ls -i "$TLS_CERT_PATH" 2>/dev/null | awk '{print $1}')
|
||||
dst_id=$(ls -i "$cert_dest" 2>/dev/null | awk '{print $1}')
|
||||
if [[ "$src_id" != "$dst_id" ]] || [[ -z "$dst_id" ]]; then
|
||||
cp "$TLS_CERT_PATH" "$cert_dest"
|
||||
cp "$TLS_KEY_PATH" "$key_dest"
|
||||
fi
|
||||
chmod 644 "$cert_dest"
|
||||
chmod 600 "$key_dest"
|
||||
ok "TLS cert/key staged"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Build profiles and compose command ---
|
||||
COMPOSE_FILE="$ROOT_DIR/docker-compose.gpu-host.yml"
|
||||
COMPOSE_PROFILES=()
|
||||
GPU_SERVICE="gpu"
|
||||
|
||||
if [[ "$USE_CPU" == "true" ]]; then
|
||||
COMPOSE_PROFILES+=("cpu")
|
||||
GPU_SERVICE="cpu"
|
||||
else
|
||||
COMPOSE_PROFILES+=("gpu")
|
||||
fi
|
||||
if [[ "$USE_CADDY" == "true" ]]; then
|
||||
COMPOSE_PROFILES+=("caddy")
|
||||
fi
|
||||
|
||||
# Compose command helper
|
||||
compose_cmd() {
|
||||
local profiles="" files="-f $COMPOSE_FILE"
|
||||
if [[ "$USE_CUSTOM_CA" == "true" ]] && [[ -f "$ROOT_DIR/docker-compose.gpu-ca.yml" ]]; then
|
||||
files="$files -f $ROOT_DIR/docker-compose.gpu-ca.yml"
|
||||
fi
|
||||
for p in "${COMPOSE_PROFILES[@]}"; do
|
||||
profiles="$profiles --profile $p"
|
||||
done
|
||||
docker compose $files $profiles "$@"
|
||||
}
|
||||
|
||||
# Generate CA compose override if needed (mounts certs into containers)
|
||||
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
|
||||
info "Generating docker-compose.gpu-ca.yml override"
|
||||
ca_override="$ROOT_DIR/docker-compose.gpu-ca.yml"
|
||||
cat > "$ca_override" << 'CAEOF'
|
||||
# Generated by setup-gpu-host.sh — custom CA trust.
|
||||
# Do not edit manually; re-run setup-gpu-host.sh with --custom-ca to regenerate.
|
||||
services:
|
||||
gpu:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
cpu:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
CAEOF
|
||||
|
||||
if [[ -n "$TLS_CERT_PATH" ]]; then
|
||||
cat >> "$ca_override" << 'CADDYCAEOF'
|
||||
caddy:
|
||||
volumes:
|
||||
- ./certs:/etc/caddy/certs:ro
|
||||
CADDYCAEOF
|
||||
fi
|
||||
ok "Generated docker-compose.gpu-ca.yml"
|
||||
else
|
||||
rm -f "$ROOT_DIR/docker-compose.gpu-ca.yml"
|
||||
fi
|
||||
|
||||
# --- Generate Caddyfile ---
|
||||
if [[ "$USE_CADDY" == "true" ]]; then
|
||||
info "Generating Caddyfile.gpu-host"
|
||||
|
||||
CADDYFILE="$ROOT_DIR/Caddyfile.gpu-host"
|
||||
|
||||
if [[ -n "$TLS_CERT_PATH" ]] && [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
cat > "$CADDYFILE" << CADDYEOF
|
||||
# Generated by setup-gpu-host.sh — Custom TLS cert for $CUSTOM_DOMAIN
|
||||
$CUSTOM_DOMAIN {
|
||||
tls /etc/caddy/certs/server.pem /etc/caddy/certs/server-key.pem
|
||||
reverse_proxy transcription:8000
|
||||
}
|
||||
CADDYEOF
|
||||
ok "Caddyfile: custom TLS for $CUSTOM_DOMAIN"
|
||||
elif [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
cat > "$CADDYFILE" << CADDYEOF
|
||||
# Generated by setup-gpu-host.sh — Let's Encrypt for $CUSTOM_DOMAIN
|
||||
$CUSTOM_DOMAIN {
|
||||
reverse_proxy transcription:8000
|
||||
}
|
||||
CADDYEOF
|
||||
ok "Caddyfile: Let's Encrypt for $CUSTOM_DOMAIN"
|
||||
else
|
||||
cat > "$CADDYFILE" << 'CADDYEOF'
|
||||
# Generated by setup-gpu-host.sh — self-signed cert for IP access
|
||||
:443 {
|
||||
tls internal
|
||||
reverse_proxy transcription:8000
|
||||
}
|
||||
CADDYEOF
|
||||
ok "Caddyfile: self-signed cert for IP access"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Generate .env ---
|
||||
info "Generating GPU service .env"
|
||||
|
||||
GPU_ENV="$ROOT_DIR/.env.gpu-host"
|
||||
cat > "$GPU_ENV" << EOF
|
||||
# Generated by setup-gpu-host.sh
|
||||
# HuggingFace token for pyannote diarization models
|
||||
HF_TOKEN=${HF_TOKEN:-}
|
||||
# API key to protect the GPU service (set via --api-key)
|
||||
REFLECTOR_GPU_APIKEY=${API_KEY:-}
|
||||
# Port configuration
|
||||
GPU_HOST_PORT=${HOST_PORT}
|
||||
CADDY_HTTPS_PORT=${HOST_PORT}
|
||||
EOF
|
||||
|
||||
if [[ -z "${HF_TOKEN:-}" ]]; then
|
||||
warn "HF_TOKEN not set. Diarization requires a HuggingFace token."
|
||||
warn "Set it: export HF_TOKEN=your-token-here and re-run, or edit .env.gpu-host"
|
||||
fi
|
||||
|
||||
ok "Generated .env.gpu-host"
|
||||
|
||||
# --- Build and start ---
|
||||
info "Building $GPU_SERVICE image (first build downloads ML models — may take a while)..."
|
||||
compose_cmd --env-file "$GPU_ENV" build "$GPU_SERVICE"
|
||||
ok "$GPU_SERVICE image built"
|
||||
|
||||
info "Starting services..."
|
||||
compose_cmd --env-file "$GPU_ENV" up -d
|
||||
ok "Services started"
|
||||
|
||||
# --- Wait for health ---
|
||||
info "Waiting for GPU service to be healthy (model loading takes 1-2 minutes)..."
|
||||
local_url="http://localhost:8000"
|
||||
for i in $(seq 1 40); do
|
||||
if curl -sf "$local_url/docs" >/dev/null 2>&1; then
|
||||
ok "GPU service is healthy!"
|
||||
break
|
||||
fi
|
||||
if [[ $i -eq 40 ]]; then
|
||||
err "GPU service did not become healthy after 5 minutes."
|
||||
err "Check logs: docker compose -f docker-compose.gpu-host.yml logs gpu"
|
||||
exit 1
|
||||
fi
|
||||
sleep 8
|
||||
done
|
||||
|
||||
# --- Summary ---
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo -e " ${GREEN}GPU service is running!${NC}"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if [[ "$USE_CADDY" == "true" ]]; then
|
||||
if [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
echo " URL: https://$CUSTOM_DOMAIN"
|
||||
elif [[ -n "$PRIMARY_IP" ]]; then
|
||||
echo " URL: https://$PRIMARY_IP"
|
||||
else
|
||||
echo " URL: https://localhost"
|
||||
fi
|
||||
else
|
||||
if [[ -n "$PRIMARY_IP" ]]; then
|
||||
echo " URL: http://$PRIMARY_IP:$HOST_PORT"
|
||||
else
|
||||
echo " URL: http://localhost:$HOST_PORT"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo " Health: curl \$(URL)/docs"
|
||||
[[ -n "$API_KEY" ]] && echo " API key: $API_KEY"
|
||||
echo ""
|
||||
echo " Configure the main Reflector instance (in server/.env):"
|
||||
echo ""
|
||||
|
||||
local_gpu_url=""
|
||||
if [[ "$USE_CADDY" == "true" ]]; then
|
||||
if [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
local_gpu_url="https://$CUSTOM_DOMAIN"
|
||||
elif [[ -n "$PRIMARY_IP" ]]; then
|
||||
local_gpu_url="https://$PRIMARY_IP"
|
||||
else
|
||||
local_gpu_url="https://localhost"
|
||||
fi
|
||||
else
|
||||
if [[ -n "$PRIMARY_IP" ]]; then
|
||||
local_gpu_url="http://$PRIMARY_IP:$HOST_PORT"
|
||||
else
|
||||
local_gpu_url="http://localhost:$HOST_PORT"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo " TRANSCRIPT_BACKEND=modal"
|
||||
echo " TRANSCRIPT_URL=$local_gpu_url"
|
||||
[[ -n "$API_KEY" ]] && echo " TRANSCRIPT_MODAL_API_KEY=$API_KEY"
|
||||
echo " DIARIZATION_BACKEND=modal"
|
||||
echo " DIARIZATION_URL=$local_gpu_url"
|
||||
[[ -n "$API_KEY" ]] && echo " DIARIZATION_MODAL_API_KEY=$API_KEY"
|
||||
echo " TRANSLATION_BACKEND=modal"
|
||||
echo " TRANSLATE_URL=$local_gpu_url"
|
||||
[[ -n "$API_KEY" ]] && echo " TRANSLATION_MODAL_API_KEY=$API_KEY"
|
||||
echo ""
|
||||
|
||||
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
|
||||
echo " The Reflector instance must also trust this CA."
|
||||
echo " On the Reflector machine, run setup-selfhosted.sh with:"
|
||||
echo " --extra-ca /path/to/this-machines-ca.crt"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo " DNS Resolution:"
|
||||
if [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
echo " Ensure '$CUSTOM_DOMAIN' resolves to this machine's IP."
|
||||
echo " Public: Create a DNS A record."
|
||||
echo " Internal: Add to /etc/hosts on the Reflector machine:"
|
||||
echo " ${PRIMARY_IP:-<GPU_IP>} $CUSTOM_DOMAIN"
|
||||
else
|
||||
echo " Use this machine's IP directly in TRANSCRIPT_URL/DIARIZATION_URL."
|
||||
fi
|
||||
echo ""
|
||||
echo " To stop: docker compose -f docker-compose.gpu-host.yml down"
|
||||
echo " To re-run: ./scripts/setup-gpu-host.sh $*"
|
||||
echo " Logs: docker compose -f docker-compose.gpu-host.yml logs -f gpu"
|
||||
echo ""
|
||||
@@ -4,7 +4,7 @@
|
||||
# Single script to configure and launch everything on one server.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/setup-selfhosted.sh <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASSWORD] [--build]
|
||||
# ./scripts/setup-selfhosted.sh <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--custom-ca PATH] [--password PASSWORD] [--build]
|
||||
#
|
||||
# ML processing modes (pick ONE — required):
|
||||
# --gpu NVIDIA GPU container for transcription/diarization/translation
|
||||
@@ -23,6 +23,13 @@
|
||||
# --domain DOMAIN Use a real domain for Caddy (enables Let's Encrypt auto-HTTPS)
|
||||
# Requires: DNS pointing to this server + ports 80/443 open
|
||||
# Without --domain: Caddy uses self-signed cert for IP access
|
||||
# --custom-ca PATH Custom CA certificate for private HTTPS services
|
||||
# PATH can be a directory (containing ca.crt, optionally server.pem + server-key.pem)
|
||||
# or a single PEM file (CA trust only, no Caddy TLS)
|
||||
# With server.pem+server-key.pem: Caddy serves HTTPS using those certs (requires --domain)
|
||||
# Without: only injects CA trust into backend containers for outbound calls
|
||||
# --extra-ca FILE Additional CA cert to trust (can be repeated for multiple CAs)
|
||||
# Appended to the CA bundle so backends trust multiple authorities
|
||||
# --password PASS Enable password auth with admin@localhost user
|
||||
# --build Build backend and frontend images from source instead of pulling
|
||||
#
|
||||
@@ -35,6 +42,8 @@
|
||||
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy --password mysecretpass
|
||||
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy
|
||||
# ./scripts/setup-selfhosted.sh --cpu
|
||||
# ./scripts/setup-selfhosted.sh --gpu --caddy --domain reflector.local --custom-ca certs/
|
||||
# ./scripts/setup-selfhosted.sh --hosted --custom-ca /path/to/corporate-ca.crt
|
||||
#
|
||||
# The script auto-detects Daily.co (DAILY_API_KEY) and Whereby (WHEREBY_API_KEY)
|
||||
# from server/.env. If Daily.co is configured, Hatchet workflow services are
|
||||
@@ -154,16 +163,19 @@ env_set() {
|
||||
}
|
||||
|
||||
compose_cmd() {
|
||||
local profiles=""
|
||||
local profiles="" files="-f $COMPOSE_FILE"
|
||||
[[ "$USE_CUSTOM_CA" == "true" ]] && files="$files -f $ROOT_DIR/docker-compose.ca.yml"
|
||||
for p in "${COMPOSE_PROFILES[@]}"; do
|
||||
profiles="$profiles --profile $p"
|
||||
done
|
||||
docker compose -f "$COMPOSE_FILE" $profiles "$@"
|
||||
docker compose $files $profiles "$@"
|
||||
}
|
||||
|
||||
# Compose command with only garage profile (for garage-only operations before full stack start)
|
||||
compose_garage_cmd() {
|
||||
docker compose -f "$COMPOSE_FILE" --profile garage "$@"
|
||||
local files="-f $COMPOSE_FILE"
|
||||
[[ "$USE_CUSTOM_CA" == "true" ]] && files="$files -f $ROOT_DIR/docker-compose.ca.yml"
|
||||
docker compose $files --profile garage "$@"
|
||||
}
|
||||
|
||||
# --- Parse arguments ---
|
||||
@@ -174,6 +186,9 @@ USE_CADDY=false
|
||||
CUSTOM_DOMAIN="" # optional domain for Let's Encrypt HTTPS
|
||||
BUILD_IMAGES=false # build backend/frontend from source
|
||||
ADMIN_PASSWORD="" # optional admin password for password auth
|
||||
CUSTOM_CA="" # --custom-ca: path to dir or CA cert file
|
||||
USE_CUSTOM_CA=false # derived flag: true when --custom-ca is provided
|
||||
EXTRA_CA_FILES=() # --extra-ca: additional CA certs to trust (can be repeated)
|
||||
|
||||
SKIP_NEXT=false
|
||||
ARGS=("$@")
|
||||
@@ -227,18 +242,95 @@ for i in "${!ARGS[@]}"; do
|
||||
CUSTOM_DOMAIN="${ARGS[$next_i]}"
|
||||
USE_CADDY=true # --domain implies --caddy
|
||||
SKIP_NEXT=true ;;
|
||||
--custom-ca)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--custom-ca requires a path to a directory or PEM certificate file"
|
||||
exit 1
|
||||
fi
|
||||
CUSTOM_CA="${ARGS[$next_i]}"
|
||||
USE_CUSTOM_CA=true
|
||||
SKIP_NEXT=true ;;
|
||||
--extra-ca)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--extra-ca requires a path to a PEM certificate file"
|
||||
exit 1
|
||||
fi
|
||||
extra_ca_file="${ARGS[$next_i]}"
|
||||
if [[ ! -f "$extra_ca_file" ]]; then
|
||||
err "--extra-ca file not found: $extra_ca_file"
|
||||
exit 1
|
||||
fi
|
||||
EXTRA_CA_FILES+=("$extra_ca_file")
|
||||
USE_CUSTOM_CA=true
|
||||
SKIP_NEXT=true ;;
|
||||
*)
|
||||
err "Unknown argument: $arg"
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASS] [--build]"
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--custom-ca PATH] [--password PASS] [--build]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Resolve --custom-ca flag ---
|
||||
CA_CERT_PATH="" # resolved path to CA certificate
|
||||
TLS_CERT_PATH="" # resolved path to server cert (optional, for Caddy TLS)
|
||||
TLS_KEY_PATH="" # resolved path to server key (optional, for Caddy TLS)
|
||||
|
||||
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
|
||||
# Strip trailing slashes to avoid double-slash paths
|
||||
CUSTOM_CA="${CUSTOM_CA%/}"
|
||||
|
||||
if [[ -z "$CUSTOM_CA" ]] && [[ -n "${EXTRA_CA_FILES[0]+x}" ]]; then
|
||||
# --extra-ca only (no --custom-ca): use first extra CA as the base
|
||||
CA_CERT_PATH="${EXTRA_CA_FILES[0]}"
|
||||
unset 'EXTRA_CA_FILES[0]'
|
||||
EXTRA_CA_FILES=("${EXTRA_CA_FILES[@]+"${EXTRA_CA_FILES[@]}"}")
|
||||
elif [[ -d "$CUSTOM_CA" ]]; then
|
||||
# Directory mode: look for convention files
|
||||
if [[ ! -f "$CUSTOM_CA/ca.crt" ]]; then
|
||||
err "CA certificate not found: $CUSTOM_CA/ca.crt"
|
||||
err "Directory must contain ca.crt (and optionally server.pem + server-key.pem)"
|
||||
exit 1
|
||||
fi
|
||||
CA_CERT_PATH="$CUSTOM_CA/ca.crt"
|
||||
# Server cert/key are optional — if both present, use for Caddy TLS
|
||||
if [[ -f "$CUSTOM_CA/server.pem" ]] && [[ -f "$CUSTOM_CA/server-key.pem" ]]; then
|
||||
TLS_CERT_PATH="$CUSTOM_CA/server.pem"
|
||||
TLS_KEY_PATH="$CUSTOM_CA/server-key.pem"
|
||||
elif [[ -f "$CUSTOM_CA/server.pem" ]] || [[ -f "$CUSTOM_CA/server-key.pem" ]]; then
|
||||
warn "Found only one of server.pem/server-key.pem in $CUSTOM_CA — both are needed for Caddy TLS. Skipping."
|
||||
fi
|
||||
elif [[ -f "$CUSTOM_CA" ]]; then
|
||||
# Single file mode: CA trust only (no Caddy TLS certs)
|
||||
CA_CERT_PATH="$CUSTOM_CA"
|
||||
else
|
||||
err "--custom-ca path not found: $CUSTOM_CA"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate PEM format
|
||||
if ! head -1 "$CA_CERT_PATH" | grep -q "BEGIN"; then
|
||||
err "CA certificate does not appear to be PEM format: $CA_CERT_PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# If server cert/key found, require --domain and imply --caddy
|
||||
if [[ -n "$TLS_CERT_PATH" ]]; then
|
||||
if [[ -z "$CUSTOM_DOMAIN" ]]; then
|
||||
err "Server cert/key found in $CUSTOM_CA but --domain not set."
|
||||
err "Provide --domain to specify the domain name matching the certificate."
|
||||
exit 1
|
||||
fi
|
||||
USE_CADDY=true # custom TLS certs imply --caddy
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "$MODEL_MODE" ]]; then
|
||||
err "No model mode specified. You must choose --gpu, --cpu, or --hosted."
|
||||
err ""
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASS] [--build]"
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--custom-ca PATH] [--password PASS] [--build]"
|
||||
err ""
|
||||
err "ML processing modes (required):"
|
||||
err " --gpu NVIDIA GPU container for transcription/diarization/translation"
|
||||
@@ -255,15 +347,19 @@ if [[ -z "$MODEL_MODE" ]]; then
|
||||
err " --garage Local S3-compatible storage (Garage)"
|
||||
err " --caddy Caddy reverse proxy with self-signed cert"
|
||||
err " --domain DOMAIN Use a real domain with Let's Encrypt HTTPS (implies --caddy)"
|
||||
err " --custom-ca PATH Custom CA cert (dir with ca.crt[+server.pem+server-key.pem] or single PEM file)"
|
||||
err " --extra-ca FILE Additional CA cert to trust (repeatable for multiple CAs)"
|
||||
err " --password PASS Enable password auth (admin@localhost) instead of public mode"
|
||||
err " --build Build backend/frontend images from source instead of pulling"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Build profiles list — one profile per feature
|
||||
# Only --gpu needs a compose profile; --cpu and --hosted use in-process/remote backends
|
||||
# Hatchet + hatchet-worker-llm are always-on (no profile needed).
|
||||
# gpu/cpu profiles only control the ML container (transcription service).
|
||||
COMPOSE_PROFILES=()
|
||||
[[ "$MODEL_MODE" == "gpu" ]] && COMPOSE_PROFILES+=("gpu")
|
||||
[[ "$MODEL_MODE" == "cpu" ]] && COMPOSE_PROFILES+=("cpu")
|
||||
[[ -n "$OLLAMA_MODE" ]] && COMPOSE_PROFILES+=("$OLLAMA_MODE")
|
||||
[[ "$USE_GARAGE" == "true" ]] && COMPOSE_PROFILES+=("garage")
|
||||
[[ "$USE_CADDY" == "true" ]] && COMPOSE_PROFILES+=("caddy")
|
||||
@@ -364,6 +460,103 @@ print(f'pbkdf2:sha256:100000\$\$' + salt + '\$\$' + dk.hex())
|
||||
ok "Secrets ready"
|
||||
}
|
||||
|
||||
# =========================================================
|
||||
# Step 1b: Custom CA certificate setup
|
||||
# =========================================================
|
||||
step_custom_ca() {
|
||||
if [[ "$USE_CUSTOM_CA" != "true" ]]; then
|
||||
# Clean up stale override from previous runs
|
||||
rm -f "$ROOT_DIR/docker-compose.ca.yml"
|
||||
return
|
||||
fi
|
||||
|
||||
info "Configuring custom CA certificate"
|
||||
local certs_dir="$ROOT_DIR/certs"
|
||||
mkdir -p "$certs_dir"
|
||||
|
||||
# Stage CA certificate (skip copy if source and dest are the same file)
|
||||
local ca_dest="$certs_dir/ca.crt"
|
||||
local src_id dst_id
|
||||
src_id=$(ls -i "$CA_CERT_PATH" 2>/dev/null | awk '{print $1}')
|
||||
dst_id=$(ls -i "$ca_dest" 2>/dev/null | awk '{print $1}')
|
||||
if [[ "$src_id" != "$dst_id" ]] || [[ -z "$dst_id" ]]; then
|
||||
cp "$CA_CERT_PATH" "$ca_dest"
|
||||
fi
|
||||
chmod 644 "$ca_dest"
|
||||
ok "CA certificate staged at certs/ca.crt"
|
||||
|
||||
# Append extra CA certs (--extra-ca flags)
|
||||
for extra_ca in "${EXTRA_CA_FILES[@]+"${EXTRA_CA_FILES[@]}"}"; do
|
||||
if ! head -1 "$extra_ca" | grep -q "BEGIN"; then
|
||||
warn "Skipping $extra_ca — does not appear to be PEM format"
|
||||
continue
|
||||
fi
|
||||
echo "" >> "$ca_dest"
|
||||
cat "$extra_ca" >> "$ca_dest"
|
||||
ok "Appended extra CA: $extra_ca"
|
||||
done
|
||||
|
||||
# Stage TLS cert/key if present (for Caddy)
|
||||
if [[ -n "$TLS_CERT_PATH" ]]; then
|
||||
local cert_dest="$certs_dir/server.pem"
|
||||
local key_dest="$certs_dir/server-key.pem"
|
||||
src_id=$(ls -i "$TLS_CERT_PATH" 2>/dev/null | awk '{print $1}')
|
||||
dst_id=$(ls -i "$cert_dest" 2>/dev/null | awk '{print $1}')
|
||||
if [[ "$src_id" != "$dst_id" ]] || [[ -z "$dst_id" ]]; then
|
||||
cp "$TLS_CERT_PATH" "$cert_dest"
|
||||
cp "$TLS_KEY_PATH" "$key_dest"
|
||||
fi
|
||||
chmod 644 "$cert_dest"
|
||||
chmod 600 "$key_dest"
|
||||
ok "TLS cert/key staged at certs/server.pem, certs/server-key.pem"
|
||||
fi
|
||||
|
||||
# Generate docker-compose.ca.yml override
|
||||
local ca_override="$ROOT_DIR/docker-compose.ca.yml"
|
||||
cat > "$ca_override" << 'CAEOF'
|
||||
# Generated by setup-selfhosted.sh — custom CA trust for backend services.
|
||||
# Do not edit manually; re-run setup-selfhosted.sh with --custom-ca to regenerate.
|
||||
services:
|
||||
server:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
worker:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
beat:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
hatchet-worker-llm:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
hatchet-worker-cpu:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
gpu:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
cpu:
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
web:
|
||||
environment:
|
||||
NODE_EXTRA_CA_CERTS: /usr/local/share/ca-certificates/custom-ca.crt
|
||||
volumes:
|
||||
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
|
||||
CAEOF
|
||||
|
||||
# If TLS cert/key present, also mount certs dir into Caddy
|
||||
if [[ -n "$TLS_CERT_PATH" ]]; then
|
||||
cat >> "$ca_override" << 'CADDYCAEOF'
|
||||
caddy:
|
||||
volumes:
|
||||
- ./certs:/etc/caddy/certs:ro
|
||||
CADDYCAEOF
|
||||
fi
|
||||
|
||||
ok "Generated docker-compose.ca.yml override"
|
||||
}
|
||||
|
||||
# =========================================================
|
||||
# Step 2: Generate server/.env
|
||||
# =========================================================
|
||||
@@ -557,12 +750,10 @@ step_server_env() {
|
||||
ok "CPU mode — file processing timeouts set to 3600s (1 hour)"
|
||||
fi
|
||||
|
||||
# If Daily.co is manually configured, ensure Hatchet connectivity vars are set
|
||||
if env_has_key "$SERVER_ENV" "DAILY_API_KEY" && [[ -n "$(env_get "$SERVER_ENV" "DAILY_API_KEY")" ]]; then
|
||||
env_set "$SERVER_ENV" "HATCHET_CLIENT_SERVER_URL" "http://hatchet:8888"
|
||||
env_set "$SERVER_ENV" "HATCHET_CLIENT_HOST_PORT" "hatchet:7077"
|
||||
ok "Daily.co detected — Hatchet connectivity configured"
|
||||
fi
|
||||
# Hatchet is always required (file, live, and multitrack pipelines all use it)
|
||||
env_set "$SERVER_ENV" "HATCHET_CLIENT_SERVER_URL" "http://hatchet:8888"
|
||||
env_set "$SERVER_ENV" "HATCHET_CLIENT_HOST_PORT" "hatchet:7077"
|
||||
ok "Hatchet connectivity configured (workflow engine for processing pipelines)"
|
||||
|
||||
ok "server/.env ready"
|
||||
}
|
||||
@@ -799,7 +990,25 @@ step_caddyfile() {
|
||||
rm -rf "$caddyfile"
|
||||
fi
|
||||
|
||||
if [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
if [[ -n "$TLS_CERT_PATH" ]] && [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
# Custom domain with user-provided TLS certificate (from --custom-ca directory)
|
||||
cat > "$caddyfile" << CADDYEOF
|
||||
# Generated by setup-selfhosted.sh — Custom TLS cert for $CUSTOM_DOMAIN
|
||||
$CUSTOM_DOMAIN {
|
||||
tls /etc/caddy/certs/server.pem /etc/caddy/certs/server-key.pem
|
||||
handle /v1/* {
|
||||
reverse_proxy server:1250
|
||||
}
|
||||
handle /health {
|
||||
reverse_proxy server:1250
|
||||
}
|
||||
handle {
|
||||
reverse_proxy web:3000
|
||||
}
|
||||
}
|
||||
CADDYEOF
|
||||
ok "Created Caddyfile for $CUSTOM_DOMAIN (custom TLS certificate)"
|
||||
elif [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
# Real domain: Caddy auto-provisions Let's Encrypt certificate
|
||||
cat > "$caddyfile" << CADDYEOF
|
||||
# Generated by setup-selfhosted.sh — Let's Encrypt HTTPS for $CUSTOM_DOMAIN
|
||||
@@ -886,15 +1095,22 @@ step_services() {
|
||||
compose_cmd pull server web || warn "Pull failed — using cached images"
|
||||
fi
|
||||
|
||||
# Build hatchet workers if Daily.co is configured (same backend image)
|
||||
if [[ "$DAILY_DETECTED" == "true" ]] && [[ "$BUILD_IMAGES" == "true" ]]; then
|
||||
# Hatchet is always needed (all processing pipelines use it)
|
||||
local NEEDS_HATCHET=true
|
||||
|
||||
# Build hatchet workers if Hatchet is needed (same backend image)
|
||||
if [[ "$NEEDS_HATCHET" == "true" ]] && [[ "$BUILD_IMAGES" == "true" ]]; then
|
||||
info "Building Hatchet worker images..."
|
||||
compose_cmd build hatchet-worker-cpu hatchet-worker-llm
|
||||
if [[ "$DAILY_DETECTED" == "true" ]]; then
|
||||
compose_cmd build hatchet-worker-cpu hatchet-worker-llm
|
||||
else
|
||||
compose_cmd build hatchet-worker-llm
|
||||
fi
|
||||
ok "Hatchet worker images built"
|
||||
fi
|
||||
|
||||
# Ensure hatchet database exists before starting hatchet (init-hatchet-db.sql only runs on fresh postgres volumes)
|
||||
if [[ "$DAILY_DETECTED" == "true" ]]; then
|
||||
if [[ "$NEEDS_HATCHET" == "true" ]]; then
|
||||
info "Ensuring postgres is running for Hatchet database setup..."
|
||||
compose_cmd up -d postgres
|
||||
local pg_ready=false
|
||||
@@ -1049,24 +1265,22 @@ step_health() {
|
||||
fi
|
||||
fi
|
||||
|
||||
# Hatchet (if Daily.co detected)
|
||||
if [[ "$DAILY_DETECTED" == "true" ]]; then
|
||||
info "Waiting for Hatchet workflow engine..."
|
||||
local hatchet_ok=false
|
||||
for i in $(seq 1 60); do
|
||||
if curl -sf http://localhost:8888/api/live > /dev/null 2>&1; then
|
||||
hatchet_ok=true
|
||||
break
|
||||
fi
|
||||
echo -ne "\r Waiting for Hatchet... ($i/60)"
|
||||
sleep 3
|
||||
done
|
||||
echo ""
|
||||
if [[ "$hatchet_ok" == "true" ]]; then
|
||||
ok "Hatchet workflow engine healthy"
|
||||
else
|
||||
warn "Hatchet not ready yet. Check: docker compose logs hatchet"
|
||||
# Hatchet (always-on)
|
||||
info "Waiting for Hatchet workflow engine..."
|
||||
local hatchet_ok=false
|
||||
for i in $(seq 1 60); do
|
||||
if curl -sf http://localhost:8888/api/live > /dev/null 2>&1; then
|
||||
hatchet_ok=true
|
||||
break
|
||||
fi
|
||||
echo -ne "\r Waiting for Hatchet... ($i/60)"
|
||||
sleep 3
|
||||
done
|
||||
echo ""
|
||||
if [[ "$hatchet_ok" == "true" ]]; then
|
||||
ok "Hatchet workflow engine healthy"
|
||||
else
|
||||
warn "Hatchet not ready yet. Check: docker compose logs hatchet"
|
||||
fi
|
||||
|
||||
# LLM warning for non-Ollama modes
|
||||
@@ -1087,12 +1301,10 @@ step_health() {
|
||||
}
|
||||
|
||||
# =========================================================
|
||||
# Step 8: Hatchet token generation (Daily.co only)
|
||||
# Step 8: Hatchet token generation (gpu/cpu/Daily.co)
|
||||
# =========================================================
|
||||
step_hatchet_token() {
|
||||
if [[ "$DAILY_DETECTED" != "true" ]]; then
|
||||
return
|
||||
fi
|
||||
# Hatchet is always required — no gating needed
|
||||
|
||||
# Skip if token already set
|
||||
if env_has_key "$SERVER_ENV" "HATCHET_CLIENT_TOKEN" && [[ -n "$(env_get "$SERVER_ENV" "HATCHET_CLIENT_TOKEN")" ]]; then
|
||||
@@ -1147,7 +1359,9 @@ step_hatchet_token() {
|
||||
|
||||
# Restart services that need the token
|
||||
info "Restarting services with new Hatchet token..."
|
||||
compose_cmd restart server worker hatchet-worker-cpu hatchet-worker-llm
|
||||
local restart_services="server worker hatchet-worker-llm"
|
||||
[[ "$DAILY_DETECTED" == "true" ]] && restart_services="$restart_services hatchet-worker-cpu"
|
||||
compose_cmd restart $restart_services
|
||||
ok "Services restarted with Hatchet token"
|
||||
}
|
||||
|
||||
@@ -1165,6 +1379,8 @@ main() {
|
||||
echo " Garage: $USE_GARAGE"
|
||||
echo " Caddy: $USE_CADDY"
|
||||
[[ -n "$CUSTOM_DOMAIN" ]] && echo " Domain: $CUSTOM_DOMAIN"
|
||||
[[ "$USE_CUSTOM_CA" == "true" ]] && echo " CA: Custom ($CUSTOM_CA)"
|
||||
[[ -n "$TLS_CERT_PATH" ]] && echo " TLS: Custom cert (from $CUSTOM_CA)"
|
||||
[[ "$BUILD_IMAGES" == "true" ]] && echo " Build: from source"
|
||||
echo ""
|
||||
|
||||
@@ -1195,6 +1411,8 @@ main() {
|
||||
echo ""
|
||||
step_secrets
|
||||
echo ""
|
||||
step_custom_ca
|
||||
echo ""
|
||||
step_server_env
|
||||
echo ""
|
||||
|
||||
@@ -1216,28 +1434,23 @@ main() {
|
||||
ok "Daily.co detected — enabling Hatchet workflow services"
|
||||
fi
|
||||
|
||||
# Generate .env.hatchet for hatchet dashboard config
|
||||
if [[ "$DAILY_DETECTED" == "true" ]]; then
|
||||
local hatchet_server_url hatchet_cookie_domain
|
||||
if [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
hatchet_server_url="https://${CUSTOM_DOMAIN}:8888"
|
||||
hatchet_cookie_domain="$CUSTOM_DOMAIN"
|
||||
elif [[ -n "$PRIMARY_IP" ]]; then
|
||||
hatchet_server_url="http://${PRIMARY_IP}:8888"
|
||||
hatchet_cookie_domain="$PRIMARY_IP"
|
||||
else
|
||||
hatchet_server_url="http://localhost:8888"
|
||||
hatchet_cookie_domain="localhost"
|
||||
fi
|
||||
cat > "$ROOT_DIR/.env.hatchet" << EOF
|
||||
# Generate .env.hatchet for hatchet dashboard config (always needed)
|
||||
local hatchet_server_url hatchet_cookie_domain
|
||||
if [[ -n "$CUSTOM_DOMAIN" ]]; then
|
||||
hatchet_server_url="https://${CUSTOM_DOMAIN}:8888"
|
||||
hatchet_cookie_domain="$CUSTOM_DOMAIN"
|
||||
elif [[ -n "$PRIMARY_IP" ]]; then
|
||||
hatchet_server_url="http://${PRIMARY_IP}:8888"
|
||||
hatchet_cookie_domain="$PRIMARY_IP"
|
||||
else
|
||||
hatchet_server_url="http://localhost:8888"
|
||||
hatchet_cookie_domain="localhost"
|
||||
fi
|
||||
cat > "$ROOT_DIR/.env.hatchet" << EOF
|
||||
SERVER_URL=$hatchet_server_url
|
||||
SERVER_AUTH_COOKIE_DOMAIN=$hatchet_cookie_domain
|
||||
EOF
|
||||
ok "Generated .env.hatchet (dashboard URL=$hatchet_server_url)"
|
||||
else
|
||||
# Create empty .env.hatchet so compose doesn't fail if dailyco profile is ever activated manually
|
||||
touch "$ROOT_DIR/.env.hatchet"
|
||||
fi
|
||||
ok "Generated .env.hatchet (dashboard URL=$hatchet_server_url)"
|
||||
|
||||
step_www_env
|
||||
echo ""
|
||||
@@ -1282,7 +1495,17 @@ EOF
|
||||
[[ "$DAILY_DETECTED" == "true" ]] && echo " Video: Daily.co (live rooms + multitrack processing via Hatchet)"
|
||||
[[ "$WHEREBY_DETECTED" == "true" ]] && echo " Video: Whereby (live rooms)"
|
||||
[[ "$ANY_PLATFORM_DETECTED" != "true" ]] && echo " Video: None (rooms disabled)"
|
||||
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
|
||||
echo " CA: Custom (certs/ca.crt)"
|
||||
[[ -n "$TLS_CERT_PATH" ]] && echo " TLS: Custom cert (certs/server.pem)"
|
||||
fi
|
||||
echo ""
|
||||
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
|
||||
echo " NOTE: Clients must trust the CA certificate to avoid browser warnings."
|
||||
echo " CA cert location: certs/ca.crt"
|
||||
echo " See docsv2/custom-ca-setup.md for instructions."
|
||||
echo ""
|
||||
fi
|
||||
echo " To stop: docker compose -f docker-compose.selfhosted.yml down"
|
||||
echo " To re-run: ./scripts/setup-selfhosted.sh $*"
|
||||
echo ""
|
||||
|
||||
@@ -6,7 +6,7 @@ ENV PYTHONUNBUFFERED=1 \
|
||||
|
||||
# builder install base dependencies
|
||||
WORKDIR /tmp
|
||||
RUN apt-get update && apt-get install -y curl ffmpeg && apt-get clean
|
||||
RUN apt-get update && apt-get install -y curl ffmpeg ca-certificates && apt-get clean
|
||||
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
||||
RUN sh /uv-installer.sh && rm /uv-installer.sh
|
||||
ENV PATH="/root/.local/bin/:$PATH"
|
||||
@@ -18,7 +18,7 @@ COPY pyproject.toml uv.lock README.md /app/
|
||||
RUN uv sync --compile-bytecode --locked
|
||||
|
||||
# bootstrap
|
||||
COPY alembic.ini runserver.sh /app/
|
||||
COPY alembic.ini docker-entrypoint.sh runserver.sh /app/
|
||||
COPY images /app/images
|
||||
COPY migrations /app/migrations
|
||||
COPY reflector /app/reflector
|
||||
@@ -35,4 +35,6 @@ RUN if [ "$(uname -m)" = "aarch64" ] && [ ! -f /usr/lib/libgomp.so.1 ]; then \
|
||||
# Pre-check just to make sure the image will not fail
|
||||
RUN uv run python -c "import silero_vad.model"
|
||||
|
||||
CMD ["./runserver.sh"]
|
||||
RUN chmod +x /app/docker-entrypoint.sh
|
||||
|
||||
CMD ["./docker-entrypoint.sh"]
|
||||
|
||||
22
server/docker-entrypoint.sh
Normal file
22
server/docker-entrypoint.sh
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Custom CA certificate injection
|
||||
# If a CA cert is mounted at this path (via docker-compose.ca.yml),
|
||||
# add it to the system trust store and configure all Python SSL libraries.
|
||||
CUSTOM_CA_PATH="/usr/local/share/ca-certificates/custom-ca.crt"
|
||||
|
||||
if [ -s "$CUSTOM_CA_PATH" ]; then
|
||||
echo "[entrypoint] Custom CA certificate detected, updating trust store..."
|
||||
update-ca-certificates 2>/dev/null
|
||||
|
||||
# update-ca-certificates creates a combined bundle (system + custom CAs)
|
||||
COMBINED_BUNDLE="/etc/ssl/certs/ca-certificates.crt"
|
||||
export SSL_CERT_FILE="$COMBINED_BUNDLE"
|
||||
export REQUESTS_CA_BUNDLE="$COMBINED_BUNDLE"
|
||||
export CURL_CA_BUNDLE="$COMBINED_BUNDLE"
|
||||
export GRPC_DEFAULT_SSL_ROOTS_FILE_PATH="$COMBINED_BUNDLE"
|
||||
echo "[entrypoint] CA trust store updated (SSL_CERT_FILE=$COMBINED_BUNDLE)"
|
||||
fi
|
||||
|
||||
exec ./runserver.sh
|
||||
@@ -419,3 +419,18 @@ User-room broadcasts to `user:{user_id}`:
|
||||
- `TRANSCRIPT_STATUS`
|
||||
- `TRANSCRIPT_FINAL_TITLE`
|
||||
- `TRANSCRIPT_DURATION`
|
||||
|
||||
## Failed Runs Monitor (Hatchet Cron)
|
||||
|
||||
A `FailedRunsMonitor` Hatchet cron workflow runs hourly (`0 * * * *`) and checks for failed pipeline runs
|
||||
(DiarizationPipeline, FilePipeline, LivePostProcessingPipeline) in the last hour. For each failed run,
|
||||
it renders a DAG status overview and posts it to Zulip.
|
||||
|
||||
**Required env vars** (all must be set to enable):
|
||||
- `ZULIP_REALM` — Zulip server hostname
|
||||
- `ZULIP_API_KEY` — Zulip bot API key
|
||||
- `ZULIP_BOT_EMAIL` — Zulip bot email
|
||||
- `ZULIP_DAG_STREAM` — Zulip stream for alerts
|
||||
- `ZULIP_DAG_TOPIC` — Zulip topic for alerts
|
||||
|
||||
If any of these are unset, the monitor workflow is not registered with the Hatchet worker.
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
"""add soft delete fields to transcript and recording
|
||||
|
||||
Revision ID: 501c73a6b0d5
|
||||
Revises: e1f093f7f124
|
||||
Create Date: 2026-03-19 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "501c73a6b0d5"
|
||||
down_revision: Union[str, None] = "e1f093f7f124"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"transcript",
|
||||
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"recording",
|
||||
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
|
||||
)
|
||||
op.create_index(
|
||||
"idx_transcript_not_deleted",
|
||||
"transcript",
|
||||
["id"],
|
||||
postgresql_where=sa.text("deleted_at IS NULL"),
|
||||
)
|
||||
op.create_index(
|
||||
"idx_recording_not_deleted",
|
||||
"recording",
|
||||
["id"],
|
||||
postgresql_where=sa.text("deleted_at IS NULL"),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("idx_recording_not_deleted", table_name="recording")
|
||||
op.drop_index("idx_transcript_not_deleted", table_name="transcript")
|
||||
op.drop_column("recording", "deleted_at")
|
||||
op.drop_column("transcript", "deleted_at")
|
||||
@@ -0,0 +1,29 @@
|
||||
"""add email_recipients to meeting
|
||||
|
||||
Revision ID: a2b3c4d5e6f7
|
||||
Revises: 501c73a6b0d5
|
||||
Create Date: 2026-03-20 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
|
||||
revision: str = "a2b3c4d5e6f7"
|
||||
down_revision: Union[str, None] = "501c73a6b0d5"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"meeting",
|
||||
sa.Column("email_recipients", JSONB, nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("meeting", "email_recipients")
|
||||
@@ -0,0 +1,28 @@
|
||||
"""add email_transcript_to to room
|
||||
|
||||
Revision ID: b4c7e8f9a012
|
||||
Revises: a2b3c4d5e6f7
|
||||
Create Date: 2026-03-24 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision: str = "b4c7e8f9a012"
|
||||
down_revision: Union[str, None] = "a2b3c4d5e6f7"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"room",
|
||||
sa.Column("email_transcript_to", sa.String(), nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("room", "email_transcript_to")
|
||||
@@ -40,6 +40,8 @@ dependencies = [
|
||||
"icalendar>=6.0.0",
|
||||
"hatchet-sdk==1.22.16",
|
||||
"pydantic>=2.12.5",
|
||||
"aiosmtplib>=3.0.0",
|
||||
"email-validator>=2.0.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
@@ -116,9 +118,10 @@ source = ["reflector"]
|
||||
ENVIRONMENT = "pytest"
|
||||
DATABASE_URL = "postgresql://test_user:test_password@localhost:15432/reflector_test"
|
||||
AUTH_BACKEND = "jwt"
|
||||
HATCHET_CLIENT_TOKEN = "test-dummy-token"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
|
||||
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v --ignore=tests/integration"
|
||||
testpaths = ["tests"]
|
||||
asyncio_mode = "auto"
|
||||
markers = [
|
||||
|
||||
@@ -13,18 +13,21 @@ from reflector.events import subscribers_shutdown, subscribers_startup
|
||||
from reflector.logger import logger
|
||||
from reflector.metrics import metrics_init
|
||||
from reflector.settings import settings
|
||||
from reflector.views.config import router as config_router
|
||||
from reflector.views.daily import router as daily_router
|
||||
from reflector.views.meetings import router as meetings_router
|
||||
from reflector.views.rooms import router as rooms_router
|
||||
from reflector.views.rtc_offer import router as rtc_offer_router
|
||||
from reflector.views.transcripts import router as transcripts_router
|
||||
from reflector.views.transcripts_audio import router as transcripts_audio_router
|
||||
from reflector.views.transcripts_download import router as transcripts_download_router
|
||||
from reflector.views.transcripts_participants import (
|
||||
router as transcripts_participants_router,
|
||||
)
|
||||
from reflector.views.transcripts_process import router as transcripts_process_router
|
||||
from reflector.views.transcripts_speaker import router as transcripts_speaker_router
|
||||
from reflector.views.transcripts_upload import router as transcripts_upload_router
|
||||
from reflector.views.transcripts_video import router as transcripts_video_router
|
||||
from reflector.views.transcripts_webrtc import router as transcripts_webrtc_router
|
||||
from reflector.views.transcripts_websocket import router as transcripts_websocket_router
|
||||
from reflector.views.user import router as user_router
|
||||
@@ -97,12 +100,15 @@ app.include_router(transcripts_audio_router, prefix="/v1")
|
||||
app.include_router(transcripts_participants_router, prefix="/v1")
|
||||
app.include_router(transcripts_speaker_router, prefix="/v1")
|
||||
app.include_router(transcripts_upload_router, prefix="/v1")
|
||||
app.include_router(transcripts_download_router, prefix="/v1")
|
||||
app.include_router(transcripts_video_router, prefix="/v1")
|
||||
app.include_router(transcripts_websocket_router, prefix="/v1")
|
||||
app.include_router(transcripts_webrtc_router, prefix="/v1")
|
||||
app.include_router(transcripts_process_router, prefix="/v1")
|
||||
app.include_router(user_router, prefix="/v1")
|
||||
app.include_router(user_api_keys_router, prefix="/v1")
|
||||
app.include_router(user_ws_router, prefix="/v1")
|
||||
app.include_router(config_router, prefix="/v1")
|
||||
app.include_router(zulip_router, prefix="/v1")
|
||||
app.include_router(whereby_router, prefix="/v1")
|
||||
app.include_router(daily_router, prefix="/v1/daily")
|
||||
|
||||
@@ -12,6 +12,7 @@ AccessTokenInfo = auth_module.AccessTokenInfo
|
||||
authenticated = auth_module.authenticated
|
||||
current_user = auth_module.current_user
|
||||
current_user_optional = auth_module.current_user_optional
|
||||
current_user_optional_if_public_mode = auth_module.current_user_optional_if_public_mode
|
||||
parse_ws_bearer_token = auth_module.parse_ws_bearer_token
|
||||
current_user_ws_optional = auth_module.current_user_ws_optional
|
||||
verify_raw_token = auth_module.verify_raw_token
|
||||
|
||||
@@ -129,6 +129,17 @@ async def current_user_optional(
|
||||
return await _authenticate_user(jwt_token, api_key, jwtauth)
|
||||
|
||||
|
||||
async def current_user_optional_if_public_mode(
|
||||
jwt_token: Annotated[Optional[str], Depends(oauth2_scheme)],
|
||||
api_key: Annotated[Optional[str], Depends(api_key_header)],
|
||||
jwtauth: JWTAuth = Depends(),
|
||||
) -> Optional[UserInfo]:
|
||||
user = await _authenticate_user(jwt_token, api_key, jwtauth)
|
||||
if user is None and not settings.PUBLIC_MODE:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
return user
|
||||
|
||||
|
||||
def parse_ws_bearer_token(
|
||||
websocket: "WebSocket",
|
||||
) -> tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -21,6 +21,11 @@ def current_user_optional():
|
||||
return None
|
||||
|
||||
|
||||
def current_user_optional_if_public_mode():
|
||||
# auth_none means no authentication at all — always public
|
||||
return None
|
||||
|
||||
|
||||
def parse_ws_bearer_token(websocket):
|
||||
return None, None
|
||||
|
||||
|
||||
@@ -150,6 +150,16 @@ async def current_user_optional(
|
||||
return await _authenticate_user(jwt_token, api_key)
|
||||
|
||||
|
||||
async def current_user_optional_if_public_mode(
|
||||
jwt_token: Annotated[Optional[str], Depends(oauth2_scheme)],
|
||||
api_key: Annotated[Optional[str], Depends(api_key_header)],
|
||||
) -> Optional[UserInfo]:
|
||||
user = await _authenticate_user(jwt_token, api_key)
|
||||
if user is None and not settings.PUBLIC_MODE:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
return user
|
||||
|
||||
|
||||
# --- WebSocket auth (same pattern as auth_jwt.py) ---
|
||||
def parse_ws_bearer_token(
|
||||
websocket: "WebSocket",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Literal
|
||||
|
||||
@@ -66,6 +67,8 @@ meetings = sa.Table(
|
||||
# Daily.co composed video (Brady Bunch grid layout) - Daily.co only, not Whereby
|
||||
sa.Column("daily_composed_video_s3_key", sa.String, nullable=True),
|
||||
sa.Column("daily_composed_video_duration", sa.Integer, nullable=True),
|
||||
# Email recipients for transcript notification
|
||||
sa.Column("email_recipients", JSONB, nullable=True),
|
||||
sa.Index("idx_meeting_room_id", "room_id"),
|
||||
sa.Index("idx_meeting_calendar_event", "calendar_event_id"),
|
||||
)
|
||||
@@ -116,6 +119,8 @@ class Meeting(BaseModel):
|
||||
# Daily.co composed video (Brady Bunch grid) - Daily.co only
|
||||
daily_composed_video_s3_key: str | None = None
|
||||
daily_composed_video_duration: int | None = None
|
||||
# Email recipients for transcript notification
|
||||
email_recipients: list[str] | None = None
|
||||
|
||||
|
||||
class MeetingController:
|
||||
@@ -388,6 +393,24 @@ class MeetingController:
|
||||
# If was_null=False, the WHERE clause prevented the update
|
||||
return was_null
|
||||
|
||||
@asynccontextmanager
|
||||
async def transaction(self):
|
||||
"""A context manager for database transaction."""
|
||||
async with get_database().transaction(isolation="serializable"):
|
||||
yield
|
||||
|
||||
async def add_email_recipient(self, meeting_id: str, email: str) -> list[str]:
|
||||
"""Add an email to the meeting's email_recipients list (no duplicates)."""
|
||||
async with self.transaction():
|
||||
meeting = await self.get_by_id(meeting_id)
|
||||
if not meeting:
|
||||
raise ValueError(f"Meeting {meeting_id} not found")
|
||||
current = meeting.email_recipients or []
|
||||
if email not in current:
|
||||
current.append(email)
|
||||
await self.update_meeting(meeting_id, email_recipients=current)
|
||||
return current
|
||||
|
||||
async def increment_num_clients(self, meeting_id: str) -> None:
|
||||
"""Atomically increment participant count."""
|
||||
query = (
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from typing import Literal
|
||||
|
||||
import sqlalchemy as sa
|
||||
@@ -24,6 +24,7 @@ recordings = sa.Table(
|
||||
),
|
||||
sa.Column("meeting_id", sa.String),
|
||||
sa.Column("track_keys", sa.JSON, nullable=True),
|
||||
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Index("idx_recording_meeting_id", "meeting_id"),
|
||||
)
|
||||
|
||||
@@ -40,6 +41,7 @@ class Recording(BaseModel):
|
||||
# track_keys can be empty list [] if recording finished but no audio was captured (silence/muted)
|
||||
# None means not a multitrack recording, [] means multitrack with no tracks
|
||||
track_keys: list[str] | None = None
|
||||
deleted_at: datetime | None = None
|
||||
|
||||
@property
|
||||
def is_multitrack(self) -> bool:
|
||||
@@ -69,7 +71,11 @@ class RecordingController:
|
||||
return Recording(**result) if result else None
|
||||
|
||||
async def remove_by_id(self, id: str) -> None:
|
||||
query = recordings.delete().where(recordings.c.id == id)
|
||||
query = (
|
||||
recordings.update()
|
||||
.where(recordings.c.id == id)
|
||||
.values(deleted_at=datetime.now(timezone.utc))
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
async def set_meeting_id(
|
||||
@@ -114,6 +120,7 @@ class RecordingController:
|
||||
.where(
|
||||
recordings.c.bucket_name == bucket_name,
|
||||
recordings.c.track_keys.isnot(None),
|
||||
recordings.c.deleted_at.is_(None),
|
||||
or_(
|
||||
transcripts.c.id.is_(None),
|
||||
transcripts.c.status == "error",
|
||||
|
||||
@@ -63,6 +63,7 @@ rooms = sqlalchemy.Table(
|
||||
nullable=False,
|
||||
server_default=sqlalchemy.sql.false(),
|
||||
),
|
||||
sqlalchemy.Column("email_transcript_to", sqlalchemy.String, nullable=True),
|
||||
sqlalchemy.Index("idx_room_is_shared", "is_shared"),
|
||||
sqlalchemy.Index("idx_room_ics_enabled", "ics_enabled"),
|
||||
)
|
||||
@@ -92,6 +93,7 @@ class Room(BaseModel):
|
||||
ics_last_etag: str | None = None
|
||||
platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
|
||||
skip_consent: bool = False
|
||||
email_transcript_to: str | None = None
|
||||
|
||||
|
||||
class RoomController:
|
||||
@@ -147,6 +149,7 @@ class RoomController:
|
||||
ics_enabled: bool = False,
|
||||
platform: Platform = settings.DEFAULT_VIDEO_PLATFORM,
|
||||
skip_consent: bool = False,
|
||||
email_transcript_to: str | None = None,
|
||||
):
|
||||
"""
|
||||
Add a new room
|
||||
@@ -172,6 +175,7 @@ class RoomController:
|
||||
"ics_enabled": ics_enabled,
|
||||
"platform": platform,
|
||||
"skip_consent": skip_consent,
|
||||
"email_transcript_to": email_transcript_to,
|
||||
}
|
||||
|
||||
room = Room(**room_data)
|
||||
|
||||
@@ -387,6 +387,8 @@ class SearchController:
|
||||
transcripts.join(rooms, transcripts.c.room_id == rooms.c.id, isouter=True)
|
||||
)
|
||||
|
||||
base_query = base_query.where(transcripts.c.deleted_at.is_(None))
|
||||
|
||||
if params.query_text is not None:
|
||||
# because already initialized based on params.query_text presence above
|
||||
assert search_query is not None
|
||||
|
||||
@@ -91,6 +91,7 @@ transcripts = sqlalchemy.Table(
|
||||
sqlalchemy.Column("webvtt", sqlalchemy.Text),
|
||||
# Hatchet workflow run ID for resumption of failed workflows
|
||||
sqlalchemy.Column("workflow_run_id", sqlalchemy.String),
|
||||
sqlalchemy.Column("deleted_at", sqlalchemy.DateTime(timezone=True), nullable=True),
|
||||
sqlalchemy.Column(
|
||||
"change_seq",
|
||||
sqlalchemy.BigInteger,
|
||||
@@ -238,6 +239,7 @@ class Transcript(BaseModel):
|
||||
webvtt: str | None = None
|
||||
workflow_run_id: str | None = None # Hatchet workflow run ID for resumption
|
||||
change_seq: int | None = None
|
||||
deleted_at: datetime | None = None
|
||||
|
||||
@field_serializer("created_at", when_used="json")
|
||||
def serialize_datetime(self, dt: datetime) -> str:
|
||||
@@ -418,6 +420,8 @@ class TranscriptController:
|
||||
rooms, transcripts.c.room_id == rooms.c.id, isouter=True
|
||||
)
|
||||
|
||||
query = query.where(transcripts.c.deleted_at.is_(None))
|
||||
|
||||
if user_id:
|
||||
query = query.where(
|
||||
or_(transcripts.c.user_id == user_id, rooms.c.is_shared)
|
||||
@@ -500,7 +504,10 @@ class TranscriptController:
|
||||
"""
|
||||
Get transcripts by room_id (direct access without joins)
|
||||
"""
|
||||
query = transcripts.select().where(transcripts.c.room_id == room_id)
|
||||
query = transcripts.select().where(
|
||||
transcripts.c.room_id == room_id,
|
||||
transcripts.c.deleted_at.is_(None),
|
||||
)
|
||||
if "user_id" in kwargs:
|
||||
query = query.where(transcripts.c.user_id == kwargs["user_id"])
|
||||
if "order_by" in kwargs:
|
||||
@@ -531,8 +538,11 @@ class TranscriptController:
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
|
||||
# if the transcript is anonymous, share mode is not checked
|
||||
transcript = Transcript(**result)
|
||||
if transcript.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
|
||||
# if the transcript is anonymous, share mode is not checked
|
||||
if transcript.user_id is None:
|
||||
return transcript
|
||||
|
||||
@@ -632,56 +642,49 @@ class TranscriptController:
|
||||
user_id: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Remove a transcript by id
|
||||
Soft-delete a transcript by id.
|
||||
|
||||
Sets deleted_at on the transcript and its associated recording.
|
||||
All files (S3 and local) are preserved for later retrieval.
|
||||
"""
|
||||
transcript = await self.get_by_id(transcript_id)
|
||||
if not transcript:
|
||||
return
|
||||
if user_id is not None and transcript.user_id != user_id:
|
||||
return
|
||||
if transcript.audio_location == "storage" and not transcript.audio_deleted:
|
||||
try:
|
||||
await get_transcripts_storage().delete_file(
|
||||
transcript.storage_audio_path
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete transcript audio from storage",
|
||||
exc_info=e,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
transcript.unlink()
|
||||
if transcript.deleted_at is not None:
|
||||
return
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Soft-delete the associated recording (keeps S3 files intact)
|
||||
if transcript.recording_id:
|
||||
try:
|
||||
recording = await recordings_controller.get_by_id(
|
||||
transcript.recording_id
|
||||
)
|
||||
if recording:
|
||||
try:
|
||||
await get_transcripts_storage().delete_file(
|
||||
recording.object_key, bucket=recording.bucket_name
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete recording object from S3",
|
||||
exc_info=e,
|
||||
recording_id=transcript.recording_id,
|
||||
)
|
||||
await recordings_controller.remove_by_id(transcript.recording_id)
|
||||
await recordings_controller.remove_by_id(transcript.recording_id)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to delete recording row",
|
||||
"Failed to soft-delete recording",
|
||||
exc_info=e,
|
||||
recording_id=transcript.recording_id,
|
||||
)
|
||||
query = transcripts.delete().where(transcripts.c.id == transcript_id)
|
||||
|
||||
# Soft-delete the transcript (keeps all files intact)
|
||||
query = (
|
||||
transcripts.update()
|
||||
.where(transcripts.c.id == transcript_id)
|
||||
.values(deleted_at=now)
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
async def remove_by_recording_id(self, recording_id: str):
|
||||
"""
|
||||
Remove a transcript by recording_id
|
||||
Soft-delete a transcript by recording_id
|
||||
"""
|
||||
query = transcripts.delete().where(transcripts.c.recording_id == recording_id)
|
||||
query = (
|
||||
transcripts.update()
|
||||
.where(transcripts.c.recording_id == recording_id)
|
||||
.values(deleted_at=datetime.now(timezone.utc))
|
||||
)
|
||||
await get_database().execute(query)
|
||||
|
||||
@staticmethod
|
||||
@@ -697,6 +700,18 @@ class TranscriptController:
|
||||
return False
|
||||
return user_id and transcript.user_id == user_id
|
||||
|
||||
@staticmethod
|
||||
def check_can_mutate(transcript: Transcript, user_id: str | None) -> None:
|
||||
"""
|
||||
Raises HTTP 403 if the user cannot mutate the transcript.
|
||||
|
||||
Policy:
|
||||
- Anonymous transcripts (user_id is None) are editable by anyone
|
||||
- Owned transcripts can only be mutated by their owner
|
||||
"""
|
||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
|
||||
@asynccontextmanager
|
||||
async def transaction(self):
|
||||
"""
|
||||
|
||||
84
server/reflector/email.py
Normal file
84
server/reflector/email.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
|
||||
import aiosmtplib
|
||||
import structlog
|
||||
|
||||
from reflector.db.transcripts import Transcript
|
||||
from reflector.settings import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
def is_email_configured() -> bool:
|
||||
return bool(settings.SMTP_HOST and settings.SMTP_FROM_EMAIL)
|
||||
|
||||
|
||||
def get_transcript_url(transcript: Transcript) -> str:
|
||||
return f"{settings.UI_BASE_URL}/transcripts/{transcript.id}"
|
||||
|
||||
|
||||
def _build_plain_text(transcript: Transcript, url: str) -> str:
|
||||
title = transcript.title or "Unnamed recording"
|
||||
lines = [
|
||||
f"Your transcript is ready: {title}",
|
||||
"",
|
||||
f"View it here: {url}",
|
||||
]
|
||||
if transcript.short_summary:
|
||||
lines.extend(["", "Summary:", transcript.short_summary])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_html(transcript: Transcript, url: str) -> str:
|
||||
title = transcript.title or "Unnamed recording"
|
||||
summary_html = ""
|
||||
if transcript.short_summary:
|
||||
summary_html = f"<p style='color:#555;'>{transcript.short_summary}</p>"
|
||||
|
||||
return f"""\
|
||||
<div style="font-family:sans-serif;max-width:600px;margin:0 auto;">
|
||||
<h2>Your transcript is ready</h2>
|
||||
<p><strong>{title}</strong></p>
|
||||
{summary_html}
|
||||
<p><a href="{url}" style="display:inline-block;padding:10px 20px;background:#4A90D9;color:#fff;text-decoration:none;border-radius:4px;">View Transcript</a></p>
|
||||
<p style="color:#999;font-size:12px;">This email was sent because you requested to receive the transcript from a meeting.</p>
|
||||
</div>"""
|
||||
|
||||
|
||||
async def send_transcript_email(to_emails: list[str], transcript: Transcript) -> int:
|
||||
"""Send transcript notification to all emails. Returns count sent."""
|
||||
if not is_email_configured() or not to_emails:
|
||||
return 0
|
||||
|
||||
url = get_transcript_url(transcript)
|
||||
title = transcript.title or "Unnamed recording"
|
||||
sent = 0
|
||||
|
||||
for email_addr in to_emails:
|
||||
msg = MIMEMultipart("alternative")
|
||||
msg["Subject"] = f"Transcript Ready: {title}"
|
||||
msg["From"] = settings.SMTP_FROM_EMAIL
|
||||
msg["To"] = email_addr
|
||||
|
||||
msg.attach(MIMEText(_build_plain_text(transcript, url), "plain"))
|
||||
msg.attach(MIMEText(_build_html(transcript, url), "html"))
|
||||
|
||||
try:
|
||||
await aiosmtplib.send(
|
||||
msg,
|
||||
hostname=settings.SMTP_HOST,
|
||||
port=settings.SMTP_PORT,
|
||||
username=settings.SMTP_USERNAME,
|
||||
password=settings.SMTP_PASSWORD,
|
||||
start_tls=settings.SMTP_USE_TLS,
|
||||
)
|
||||
sent += 1
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to send transcript email",
|
||||
to=email_addr,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
|
||||
return sent
|
||||
@@ -21,11 +21,27 @@ class TaskName(StrEnum):
|
||||
CLEANUP_CONSENT = "cleanup_consent"
|
||||
POST_ZULIP = "post_zulip"
|
||||
SEND_WEBHOOK = "send_webhook"
|
||||
SEND_EMAIL = "send_email"
|
||||
PAD_TRACK = "pad_track"
|
||||
TRANSCRIBE_TRACK = "transcribe_track"
|
||||
DETECT_CHUNK_TOPIC = "detect_chunk_topic"
|
||||
GENERATE_DETAILED_SUMMARY = "generate_detailed_summary"
|
||||
|
||||
# File pipeline tasks
|
||||
EXTRACT_AUDIO = "extract_audio"
|
||||
UPLOAD_AUDIO = "upload_audio"
|
||||
TRANSCRIBE = "transcribe"
|
||||
DIARIZE = "diarize"
|
||||
ASSEMBLE_TRANSCRIPT = "assemble_transcript"
|
||||
GENERATE_SUMMARIES = "generate_summaries"
|
||||
|
||||
# Live post-processing pipeline tasks
|
||||
WAVEFORM = "waveform"
|
||||
CONVERT_MP3 = "convert_mp3"
|
||||
UPLOAD_MP3 = "upload_mp3"
|
||||
REMOVE_UPLOAD = "remove_upload"
|
||||
FINAL_SUMMARIES = "final_summaries"
|
||||
|
||||
|
||||
# Rate limit key for LLM API calls (shared across all LLM-calling tasks)
|
||||
LLM_RATE_LIMIT_KEY = "llm"
|
||||
@@ -44,7 +60,7 @@ TIMEOUT_AUDIO = 720 # Audio processing: padding, mixdown (Hatchet execution_tim
|
||||
TIMEOUT_AUDIO_HTTP = (
|
||||
660 # httpx timeout for pad_track — below 720 so Hatchet doesn't race
|
||||
)
|
||||
TIMEOUT_HEAVY = 600 # Transcription, fan-out LLM tasks (Hatchet execution_timeout)
|
||||
TIMEOUT_HEAVY = 1200 # Transcription, fan-out LLM tasks (Hatchet execution_timeout)
|
||||
TIMEOUT_HEAVY_HTTP = (
|
||||
540 # httpx timeout for transcribe_track — below 600 so Hatchet doesn't race
|
||||
1150 # httpx timeout for transcribe_track — below 1200 so Hatchet doesn't race
|
||||
)
|
||||
|
||||
@@ -10,10 +10,13 @@ from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
daily_multitrack_pipeline,
|
||||
)
|
||||
from reflector.hatchet.workflows.file_pipeline import file_pipeline
|
||||
from reflector.hatchet.workflows.live_post_pipeline import live_post_pipeline
|
||||
from reflector.hatchet.workflows.subject_processing import subject_workflow
|
||||
from reflector.hatchet.workflows.topic_chunk_processing import topic_chunk_workflow
|
||||
from reflector.hatchet.workflows.track_processing import track_workflow
|
||||
from reflector.logger import logger
|
||||
from reflector.settings import settings
|
||||
|
||||
SLOTS = 10
|
||||
WORKER_NAME = "llm-worker-pool"
|
||||
@@ -32,6 +35,38 @@ def main():
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
workflows = [
|
||||
daily_multitrack_pipeline,
|
||||
file_pipeline,
|
||||
live_post_pipeline,
|
||||
topic_chunk_workflow,
|
||||
subject_workflow,
|
||||
track_workflow,
|
||||
]
|
||||
|
||||
_zulip_dag_enabled = all(
|
||||
[
|
||||
settings.ZULIP_REALM,
|
||||
settings.ZULIP_API_KEY,
|
||||
settings.ZULIP_BOT_EMAIL,
|
||||
settings.ZULIP_DAG_STREAM,
|
||||
settings.ZULIP_DAG_TOPIC,
|
||||
]
|
||||
)
|
||||
if _zulip_dag_enabled:
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import ( # noqa: PLC0415
|
||||
failed_runs_monitor,
|
||||
)
|
||||
|
||||
workflows.append(failed_runs_monitor)
|
||||
logger.info(
|
||||
"FailedRunsMonitor cron enabled",
|
||||
stream=settings.ZULIP_DAG_STREAM,
|
||||
topic=settings.ZULIP_DAG_TOPIC,
|
||||
)
|
||||
else:
|
||||
logger.info("FailedRunsMonitor cron disabled (Zulip DAG not configured)")
|
||||
|
||||
logger.info(
|
||||
"Starting Hatchet LLM worker pool (all tasks except mixdown)",
|
||||
worker_name=WORKER_NAME,
|
||||
@@ -45,12 +80,7 @@ def main():
|
||||
labels={
|
||||
"pool": POOL,
|
||||
},
|
||||
workflows=[
|
||||
daily_multitrack_pipeline,
|
||||
topic_chunk_workflow,
|
||||
subject_workflow,
|
||||
track_workflow,
|
||||
],
|
||||
workflows=workflows,
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
@@ -33,6 +33,7 @@ from hatchet_sdk.labels import DesiredWorkerLabel
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.dailyco_api.client import DailyApiClient
|
||||
from reflector.email import is_email_configured, send_transcript_email
|
||||
from reflector.hatchet.broadcast import (
|
||||
append_event_and_broadcast,
|
||||
set_status_and_broadcast,
|
||||
@@ -51,6 +52,7 @@ from reflector.hatchet.error_classification import is_non_retryable
|
||||
from reflector.hatchet.workflows.models import (
|
||||
ActionItemsResult,
|
||||
ConsentResult,
|
||||
EmailResult,
|
||||
FinalizeResult,
|
||||
MixdownResult,
|
||||
PaddedTrackInfo,
|
||||
@@ -307,7 +309,9 @@ async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
|
||||
ctx.log(
|
||||
f"get_recording: calling Daily.co API for recording_id={input.recording_id}..."
|
||||
)
|
||||
async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
|
||||
async with DailyApiClient(
|
||||
api_key=settings.DAILY_API_KEY, base_url=settings.DAILY_API_URL
|
||||
) as client:
|
||||
recording = await client.get_recording(input.recording_id)
|
||||
ctx.log(f"get_recording: Daily.co API returned successfully")
|
||||
|
||||
@@ -374,7 +378,9 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
||||
settings.DAILY_API_KEY, "DAILY_API_KEY is required"
|
||||
)
|
||||
|
||||
async with DailyApiClient(api_key=daily_api_key) as client:
|
||||
async with DailyApiClient(
|
||||
api_key=daily_api_key, base_url=settings.DAILY_API_URL
|
||||
) as client:
|
||||
participants = await client.get_meeting_participants(mtg_session_id)
|
||||
|
||||
id_to_name = {}
|
||||
@@ -1461,6 +1467,69 @@ async def send_webhook(input: PipelineInput, ctx: Context) -> WebhookResult:
|
||||
return WebhookResult(webhook_sent=False)
|
||||
|
||||
|
||||
@daily_multitrack_pipeline.task(
|
||||
parents=[cleanup_consent],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.SEND_EMAIL, set_error_status=False)
|
||||
async def send_email(input: PipelineInput, ctx: Context) -> EmailResult:
|
||||
"""Send transcript email to collected recipients."""
|
||||
ctx.log(f"send_email: transcript_id={input.transcript_id}")
|
||||
|
||||
if not is_email_configured():
|
||||
ctx.log("send_email skipped (SMTP not configured)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.meetings import meetings_controller # noqa: PLC0415
|
||||
from reflector.db.recordings import recordings_controller # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
ctx.log("send_email skipped (transcript not found)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
meeting = None
|
||||
if transcript.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
if not meeting and transcript.recording_id:
|
||||
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||
if recording and recording.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
|
||||
recipients = (
|
||||
list(meeting.email_recipients)
|
||||
if meeting and meeting.email_recipients
|
||||
else []
|
||||
)
|
||||
|
||||
# Also check room-level email
|
||||
from reflector.db.rooms import rooms_controller # noqa: PLC0415
|
||||
|
||||
if transcript.room_id:
|
||||
room = await rooms_controller.get_by_id(transcript.room_id)
|
||||
if room and room.email_transcript_to:
|
||||
if room.email_transcript_to not in recipients:
|
||||
recipients.append(room.email_transcript_to)
|
||||
|
||||
if not recipients:
|
||||
ctx.log("send_email skipped (no email recipients)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
# For room-level emails, do NOT change share_mode (only set public if meeting had recipients)
|
||||
if meeting and meeting.email_recipients:
|
||||
await transcripts_controller.update(transcript, {"share_mode": "public"})
|
||||
|
||||
count = await send_transcript_email(recipients, transcript)
|
||||
ctx.log(f"send_email complete: sent {count} emails")
|
||||
|
||||
return EmailResult(emails_sent=count)
|
||||
|
||||
|
||||
async def on_workflow_failure(input: PipelineInput, ctx: Context) -> None:
|
||||
"""Run when the workflow is truly dead (all retries exhausted).
|
||||
|
||||
|
||||
109
server/reflector/hatchet/workflows/failed_runs_monitor.py
Normal file
109
server/reflector/hatchet/workflows/failed_runs_monitor.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Hatchet cron workflow: FailedRunsMonitor
|
||||
|
||||
Runs hourly, queries Hatchet for failed pipeline runs in the last hour,
|
||||
and posts details to Zulip for visibility.
|
||||
|
||||
Only registered with the worker when Zulip DAG settings are configured.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from hatchet_sdk import Context
|
||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.logger import logger
|
||||
from reflector.settings import settings
|
||||
from reflector.tools.render_hatchet_run import render_run_detail
|
||||
from reflector.zulip import send_message_to_zulip
|
||||
|
||||
MONITORED_PIPELINES = {
|
||||
"DiarizationPipeline",
|
||||
"FilePipeline",
|
||||
"LivePostProcessingPipeline",
|
||||
}
|
||||
|
||||
LOOKBACK_HOURS = 1
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
failed_runs_monitor = hatchet.workflow(
|
||||
name="FailedRunsMonitor",
|
||||
on_crons=["0 * * * *"],
|
||||
)
|
||||
|
||||
|
||||
async def _check_failed_runs() -> dict:
|
||||
"""Core logic: query for failed pipeline runs and post each to Zulip.
|
||||
|
||||
Extracted from the Hatchet task for testability.
|
||||
"""
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
since = now - timedelta(hours=LOOKBACK_HOURS)
|
||||
|
||||
client = HatchetClientManager.get_client()
|
||||
|
||||
try:
|
||||
result = await client.runs.aio_list(
|
||||
statuses=[V1TaskStatus.FAILED],
|
||||
since=since,
|
||||
until=now,
|
||||
limit=200,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("[FailedRunsMonitor] Failed to list runs from Hatchet")
|
||||
return {"checked": 0, "reported": 0, "error": "failed to list runs"}
|
||||
|
||||
rows = result.rows or []
|
||||
|
||||
# Filter to main pipelines only (skip child workflows like TrackProcessing, etc.)
|
||||
failed_main_runs = [run for run in rows if run.workflow_name in MONITORED_PIPELINES]
|
||||
|
||||
if not failed_main_runs:
|
||||
logger.info(
|
||||
"[FailedRunsMonitor] No failed pipeline runs in the last hour",
|
||||
total_failed=len(rows),
|
||||
since=since.isoformat(),
|
||||
)
|
||||
return {"checked": len(rows), "reported": 0}
|
||||
|
||||
logger.info(
|
||||
"[FailedRunsMonitor] Found failed pipeline runs",
|
||||
count=len(failed_main_runs),
|
||||
since=since.isoformat(),
|
||||
)
|
||||
|
||||
reported = 0
|
||||
for run in failed_main_runs:
|
||||
try:
|
||||
details = await client.runs.aio_get(run.workflow_run_external_id)
|
||||
content = render_run_detail(details)
|
||||
await send_message_to_zulip(
|
||||
settings.ZULIP_DAG_STREAM,
|
||||
settings.ZULIP_DAG_TOPIC,
|
||||
content,
|
||||
)
|
||||
reported += 1
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"[FailedRunsMonitor] Failed to report run",
|
||||
workflow_run_id=run.workflow_run_external_id,
|
||||
workflow_name=run.workflow_name,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"[FailedRunsMonitor] Finished reporting",
|
||||
reported=reported,
|
||||
total_failed_main=len(failed_main_runs),
|
||||
)
|
||||
return {"checked": len(rows), "reported": reported}
|
||||
|
||||
|
||||
@failed_runs_monitor.task(
|
||||
execution_timeout=timedelta(seconds=120),
|
||||
retries=1,
|
||||
)
|
||||
async def check_failed_runs(input, ctx: Context) -> dict:
|
||||
"""Hatchet task entry point — delegates to _check_failed_runs."""
|
||||
return await _check_failed_runs()
|
||||
951
server/reflector/hatchet/workflows/file_pipeline.py
Normal file
951
server/reflector/hatchet/workflows/file_pipeline.py
Normal file
@@ -0,0 +1,951 @@
|
||||
"""
|
||||
Hatchet workflow: FilePipeline
|
||||
|
||||
Processing pipeline for file uploads and Whereby recordings.
|
||||
Orchestrates: extract audio → upload → transcribe/diarize/waveform (parallel)
|
||||
→ assemble → detect topics → title/summaries (parallel) → finalize
|
||||
→ cleanup consent → post zulip / send webhook.
|
||||
|
||||
Note: This file uses deferred imports (inside functions/tasks) intentionally.
|
||||
Hatchet workers run in forked processes; fresh imports per task ensure DB connections
|
||||
are not shared across forks, avoiding connection pooling issues.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from hatchet_sdk import Context
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.email import is_email_configured, send_transcript_email
|
||||
from reflector.hatchet.broadcast import (
|
||||
append_event_and_broadcast,
|
||||
set_status_and_broadcast,
|
||||
)
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.constants import (
|
||||
TIMEOUT_HEAVY,
|
||||
TIMEOUT_MEDIUM,
|
||||
TIMEOUT_SHORT,
|
||||
TIMEOUT_TITLE,
|
||||
TaskName,
|
||||
)
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
fresh_db_connection,
|
||||
set_workflow_error_status,
|
||||
with_error_handling,
|
||||
)
|
||||
from reflector.hatchet.workflows.models import (
|
||||
ConsentResult,
|
||||
EmailResult,
|
||||
TitleResult,
|
||||
TopicsResult,
|
||||
WaveformResult,
|
||||
WebhookResult,
|
||||
ZulipResult,
|
||||
)
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines import topic_processing
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.audio_constants import WAVEFORM_SEGMENTS
|
||||
from reflector.utils.audio_waveform import get_audio_waveform
|
||||
|
||||
|
||||
class FilePipelineInput(BaseModel):
|
||||
transcript_id: str
|
||||
room_id: str | None = None
|
||||
|
||||
|
||||
# --- Result models specific to file pipeline ---
|
||||
|
||||
|
||||
class ExtractAudioResult(BaseModel):
|
||||
audio_path: str
|
||||
duration_ms: float = 0.0
|
||||
|
||||
|
||||
class UploadAudioResult(BaseModel):
|
||||
audio_url: str
|
||||
audio_path: str
|
||||
|
||||
|
||||
class TranscribeResult(BaseModel):
|
||||
words: list[dict]
|
||||
translation: str | None = None
|
||||
|
||||
|
||||
class DiarizeResult(BaseModel):
|
||||
diarization: list[dict] | None = None
|
||||
|
||||
|
||||
class AssembleTranscriptResult(BaseModel):
|
||||
assembled: bool
|
||||
|
||||
|
||||
class SummariesResult(BaseModel):
|
||||
generated: bool
|
||||
|
||||
|
||||
class FinalizeResult(BaseModel):
|
||||
status: str
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
file_pipeline = hatchet.workflow(name="FilePipeline", input_validator=FilePipelineInput)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.EXTRACT_AUDIO)
|
||||
async def extract_audio(input: FilePipelineInput, ctx: Context) -> ExtractAudioResult:
|
||||
"""Extract audio from upload file, convert to MP3."""
|
||||
ctx.log(f"extract_audio: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
await set_status_and_broadcast(input.transcript_id, "processing", logger=logger)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
# Clear transcript as we're going to regenerate everything
|
||||
await transcripts_controller.update(
|
||||
transcript,
|
||||
{
|
||||
"events": [],
|
||||
"topics": [],
|
||||
},
|
||||
)
|
||||
|
||||
# Find upload file
|
||||
audio_file = next(transcript.data_path.glob("upload.*"), None)
|
||||
if not audio_file:
|
||||
audio_file = next(transcript.data_path.glob("audio.*"), None)
|
||||
if not audio_file:
|
||||
raise ValueError("No audio file found to process")
|
||||
|
||||
ctx.log(f"extract_audio: processing {audio_file}")
|
||||
|
||||
# Extract audio and write as MP3
|
||||
import av # noqa: PLC0415
|
||||
|
||||
from reflector.processors import AudioFileWriterProcessor # noqa: PLC0415
|
||||
|
||||
duration_ms_container = [0.0]
|
||||
|
||||
async def capture_duration(d):
|
||||
duration_ms_container[0] = d
|
||||
|
||||
mp3_writer = AudioFileWriterProcessor(
|
||||
path=transcript.audio_mp3_filename,
|
||||
on_duration=capture_duration,
|
||||
)
|
||||
input_container = av.open(str(audio_file))
|
||||
for frame in input_container.decode(audio=0):
|
||||
await mp3_writer.push(frame)
|
||||
await mp3_writer.flush()
|
||||
input_container.close()
|
||||
|
||||
duration_ms = duration_ms_container[0]
|
||||
audio_path = str(transcript.audio_mp3_filename)
|
||||
|
||||
# Persist duration to database and broadcast to websocket clients
|
||||
from reflector.db.transcripts import TranscriptDuration # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller as tc
|
||||
|
||||
await tc.update(transcript, {"duration": duration_ms})
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"DURATION",
|
||||
TranscriptDuration(duration=duration_ms),
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log(f"extract_audio complete: {audio_path}, duration={duration_ms}ms")
|
||||
return ExtractAudioResult(audio_path=audio_path, duration_ms=duration_ms)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[extract_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.UPLOAD_AUDIO)
|
||||
async def upload_audio(input: FilePipelineInput, ctx: Context) -> UploadAudioResult:
|
||||
"""Upload audio to S3/storage, return audio_url."""
|
||||
ctx.log(f"upload_audio: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
extract_result = ctx.task_output(extract_audio)
|
||||
audio_path = extract_result.audio_path
|
||||
|
||||
from reflector.storage import get_transcripts_storage # noqa: PLC0415
|
||||
|
||||
storage = get_transcripts_storage()
|
||||
if not storage:
|
||||
raise ValueError(
|
||||
"Storage backend required for file processing. "
|
||||
"Configure TRANSCRIPT_STORAGE_* settings."
|
||||
)
|
||||
|
||||
with open(audio_path, "rb") as f:
|
||||
audio_data = f.read()
|
||||
|
||||
storage_path = f"file_pipeline/{input.transcript_id}/audio.mp3"
|
||||
await storage.put_file(storage_path, audio_data)
|
||||
audio_url = await storage.get_file_url(storage_path)
|
||||
|
||||
ctx.log(f"upload_audio complete: {audio_url}")
|
||||
return UploadAudioResult(audio_url=audio_url, audio_path=audio_path)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[upload_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.TRANSCRIBE)
|
||||
async def transcribe(input: FilePipelineInput, ctx: Context) -> TranscribeResult:
|
||||
"""Transcribe the audio file using the configured backend."""
|
||||
ctx.log(f"transcribe: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
upload_result = ctx.task_output(upload_audio)
|
||||
audio_url = upload_result.audio_url
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
source_language = transcript.source_language
|
||||
|
||||
from reflector.pipelines.transcription_helpers import ( # noqa: PLC0415
|
||||
transcribe_file_with_processor,
|
||||
)
|
||||
|
||||
result = await transcribe_file_with_processor(audio_url, source_language)
|
||||
|
||||
ctx.log(f"transcribe complete: {len(result.words)} words")
|
||||
return TranscribeResult(
|
||||
words=[w.model_dump() for w in result.words],
|
||||
translation=result.translation,
|
||||
)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[upload_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.DIARIZE)
|
||||
async def diarize(input: FilePipelineInput, ctx: Context) -> DiarizeResult:
|
||||
"""Diarize the audio file (speaker identification)."""
|
||||
ctx.log(f"diarize: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
if not settings.DIARIZATION_BACKEND:
|
||||
ctx.log("diarize: diarization disabled, skipping")
|
||||
return DiarizeResult(diarization=None)
|
||||
|
||||
upload_result = ctx.task_output(upload_audio)
|
||||
audio_url = upload_result.audio_url
|
||||
|
||||
from reflector.processors.file_diarization import ( # noqa: PLC0415
|
||||
FileDiarizationInput,
|
||||
)
|
||||
from reflector.processors.file_diarization_auto import ( # noqa: PLC0415
|
||||
FileDiarizationAutoProcessor,
|
||||
)
|
||||
|
||||
processor = FileDiarizationAutoProcessor()
|
||||
input_data = FileDiarizationInput(audio_url=audio_url)
|
||||
|
||||
result = None
|
||||
|
||||
async def capture_result(diarization_output):
|
||||
nonlocal result
|
||||
result = diarization_output.diarization
|
||||
|
||||
try:
|
||||
processor.on(capture_result)
|
||||
await processor.push(input_data)
|
||||
await processor.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"Diarization failed: {e}")
|
||||
return DiarizeResult(diarization=None)
|
||||
|
||||
ctx.log(f"diarize complete: {len(result) if result else 0} segments")
|
||||
return DiarizeResult(diarization=list(result) if result else None)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[upload_audio],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_WAVEFORM)
|
||||
async def generate_waveform(input: FilePipelineInput, ctx: Context) -> WaveformResult:
|
||||
"""Generate audio waveform visualization."""
|
||||
ctx.log(f"generate_waveform: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
upload_result = ctx.task_output(upload_audio)
|
||||
audio_path = upload_result.audio_path
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptWaveform,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
waveform = get_audio_waveform(
|
||||
path=Path(audio_path), segments_count=WAVEFORM_SEGMENTS
|
||||
)
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
transcript.data_path.mkdir(parents=True, exist_ok=True)
|
||||
with open(transcript.audio_waveform_filename, "w") as f:
|
||||
json.dump(waveform, f)
|
||||
|
||||
waveform_data = TranscriptWaveform(waveform=waveform)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"WAVEFORM",
|
||||
waveform_data,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log("generate_waveform complete")
|
||||
return WaveformResult(waveform_generated=True)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[transcribe, diarize, generate_waveform],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.ASSEMBLE_TRANSCRIPT)
|
||||
async def assemble_transcript(
|
||||
input: FilePipelineInput, ctx: Context
|
||||
) -> AssembleTranscriptResult:
|
||||
"""Merge transcription + diarization results."""
|
||||
ctx.log(f"assemble_transcript: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
transcribe_result = ctx.task_output(transcribe)
|
||||
diarize_result = ctx.task_output(diarize)
|
||||
|
||||
from reflector.processors.transcript_diarization_assembler import ( # noqa: PLC0415
|
||||
TranscriptDiarizationAssemblerInput,
|
||||
TranscriptDiarizationAssemblerProcessor,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
DiarizationSegment,
|
||||
Word,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
Transcript as TranscriptType,
|
||||
)
|
||||
|
||||
words = [Word(**w) for w in transcribe_result.words]
|
||||
transcript_data = TranscriptType(
|
||||
words=words, translation=transcribe_result.translation
|
||||
)
|
||||
|
||||
diarization = None
|
||||
if diarize_result.diarization:
|
||||
diarization = [DiarizationSegment(**s) for s in diarize_result.diarization]
|
||||
|
||||
processor = TranscriptDiarizationAssemblerProcessor()
|
||||
assembler_input = TranscriptDiarizationAssemblerInput(
|
||||
transcript=transcript_data, diarization=diarization or []
|
||||
)
|
||||
|
||||
diarized_transcript = None
|
||||
|
||||
async def capture_result(transcript):
|
||||
nonlocal diarized_transcript
|
||||
diarized_transcript = transcript
|
||||
|
||||
processor.on(capture_result)
|
||||
await processor.push(assembler_input)
|
||||
await processor.flush()
|
||||
|
||||
if not diarized_transcript:
|
||||
raise ValueError("No diarized transcript captured")
|
||||
|
||||
# Save the assembled transcript events to the database
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptText,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
assembled_text = diarized_transcript.text if diarized_transcript else ""
|
||||
assembled_translation = (
|
||||
diarized_transcript.translation if diarized_transcript else None
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"TRANSCRIPT",
|
||||
TranscriptText(text=assembled_text, translation=assembled_translation),
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log("assemble_transcript complete")
|
||||
return AssembleTranscriptResult(assembled=True)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[assemble_transcript],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.DETECT_TOPICS)
|
||||
async def detect_topics(input: FilePipelineInput, ctx: Context) -> TopicsResult:
|
||||
"""Detect topics from the assembled transcript."""
|
||||
ctx.log(f"detect_topics: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
# Re-read the transcript to get the diarized words
|
||||
transcribe_result = ctx.task_output(transcribe)
|
||||
diarize_result = ctx.task_output(diarize)
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptTopic,
|
||||
transcripts_controller,
|
||||
)
|
||||
from reflector.processors.transcript_diarization_assembler import ( # noqa: PLC0415
|
||||
TranscriptDiarizationAssemblerInput,
|
||||
TranscriptDiarizationAssemblerProcessor,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
DiarizationSegment,
|
||||
Word,
|
||||
)
|
||||
from reflector.processors.types import ( # noqa: PLC0415
|
||||
Transcript as TranscriptType,
|
||||
)
|
||||
|
||||
words = [Word(**w) for w in transcribe_result.words]
|
||||
transcript_data = TranscriptType(
|
||||
words=words, translation=transcribe_result.translation
|
||||
)
|
||||
|
||||
diarization = None
|
||||
if diarize_result.diarization:
|
||||
diarization = [DiarizationSegment(**s) for s in diarize_result.diarization]
|
||||
|
||||
# Re-assemble to get the diarized transcript for topic detection
|
||||
processor = TranscriptDiarizationAssemblerProcessor()
|
||||
assembler_input = TranscriptDiarizationAssemblerInput(
|
||||
transcript=transcript_data, diarization=diarization or []
|
||||
)
|
||||
|
||||
diarized_transcript = None
|
||||
|
||||
async def capture_result(transcript):
|
||||
nonlocal diarized_transcript
|
||||
diarized_transcript = transcript
|
||||
|
||||
processor.on(capture_result)
|
||||
await processor.push(assembler_input)
|
||||
await processor.flush()
|
||||
|
||||
if not diarized_transcript:
|
||||
raise ValueError("No diarized transcript for topic detection")
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
target_language = transcript.target_language
|
||||
|
||||
empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
|
||||
|
||||
async def on_topic_callback(data):
|
||||
topic = TranscriptTopic(
|
||||
title=data.title,
|
||||
summary=data.summary,
|
||||
timestamp=data.timestamp,
|
||||
transcript=data.transcript.text
|
||||
if hasattr(data.transcript, "text")
|
||||
else "",
|
||||
words=data.transcript.words
|
||||
if hasattr(data.transcript, "words")
|
||||
else [],
|
||||
)
|
||||
await transcripts_controller.upsert_topic(transcript, topic)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id, transcript, "TOPIC", topic, logger=logger
|
||||
)
|
||||
|
||||
topics = await topic_processing.detect_topics(
|
||||
diarized_transcript,
|
||||
target_language,
|
||||
on_topic_callback=on_topic_callback,
|
||||
empty_pipeline=empty_pipeline,
|
||||
)
|
||||
|
||||
ctx.log(f"detect_topics complete: {len(topics)} topics")
|
||||
return TopicsResult(topics=topics)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[detect_topics],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_TITLE),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_TITLE)
|
||||
async def generate_title(input: FilePipelineInput, ctx: Context) -> TitleResult:
|
||||
"""Generate meeting title using LLM."""
|
||||
ctx.log(f"generate_title: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
topics_result = ctx.task_output(detect_topics)
|
||||
topics = topics_result.topics
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptFinalTitle,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
|
||||
title_result = None
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
async def on_title_callback(data):
|
||||
nonlocal title_result
|
||||
title_result = data.title
|
||||
final_title = TranscriptFinalTitle(title=data.title)
|
||||
if not transcript.title:
|
||||
await transcripts_controller.update(
|
||||
transcript, {"title": final_title.title}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"FINAL_TITLE",
|
||||
final_title,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
await topic_processing.generate_title(
|
||||
topics,
|
||||
on_title_callback=on_title_callback,
|
||||
empty_pipeline=empty_pipeline,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log(f"generate_title complete: '{title_result}'")
|
||||
return TitleResult(title=title_result)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[detect_topics],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_SUMMARIES)
|
||||
async def generate_summaries(input: FilePipelineInput, ctx: Context) -> SummariesResult:
|
||||
"""Generate long/short summaries and action items."""
|
||||
ctx.log(f"generate_summaries: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
topics_result = ctx.task_output(detect_topics)
|
||||
topics = topics_result.topics
|
||||
|
||||
from reflector.db.transcripts import ( # noqa: PLC0415
|
||||
TranscriptActionItems,
|
||||
TranscriptFinalLongSummary,
|
||||
TranscriptFinalShortSummary,
|
||||
transcripts_controller,
|
||||
)
|
||||
|
||||
empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
|
||||
|
||||
async with fresh_db_connection():
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
async def on_long_summary_callback(data):
|
||||
final_long = TranscriptFinalLongSummary(long_summary=data.long_summary)
|
||||
await transcripts_controller.update(
|
||||
transcript, {"long_summary": final_long.long_summary}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"FINAL_LONG_SUMMARY",
|
||||
final_long,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
async def on_short_summary_callback(data):
|
||||
final_short = TranscriptFinalShortSummary(short_summary=data.short_summary)
|
||||
await transcripts_controller.update(
|
||||
transcript, {"short_summary": final_short.short_summary}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"FINAL_SHORT_SUMMARY",
|
||||
final_short,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
async def on_action_items_callback(data):
|
||||
action_items = TranscriptActionItems(action_items=data.action_items)
|
||||
await transcripts_controller.update(
|
||||
transcript, {"action_items": action_items.action_items}
|
||||
)
|
||||
await append_event_and_broadcast(
|
||||
input.transcript_id,
|
||||
transcript,
|
||||
"ACTION_ITEMS",
|
||||
action_items,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
await topic_processing.generate_summaries(
|
||||
topics,
|
||||
transcript,
|
||||
on_long_summary_callback=on_long_summary_callback,
|
||||
on_short_summary_callback=on_short_summary_callback,
|
||||
on_action_items_callback=on_action_items_callback,
|
||||
empty_pipeline=empty_pipeline,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
ctx.log("generate_summaries complete")
|
||||
return SummariesResult(generated=True)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[generate_title, generate_summaries],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=5,
|
||||
)
|
||||
@with_error_handling(TaskName.FINALIZE)
|
||||
async def finalize(input: FilePipelineInput, ctx: Context) -> FinalizeResult:
|
||||
"""Set transcript status to 'ended' and broadcast."""
|
||||
ctx.log("finalize: setting status to 'ended'")
|
||||
|
||||
async with fresh_db_connection():
|
||||
await set_status_and_broadcast(input.transcript_id, "ended", logger=logger)
|
||||
|
||||
ctx.log("finalize complete")
|
||||
return FinalizeResult(status="COMPLETED")
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[finalize],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.CLEANUP_CONSENT, set_error_status=False)
|
||||
async def cleanup_consent(input: FilePipelineInput, ctx: Context) -> ConsentResult:
|
||||
"""Check consent and delete audio files if any participant denied."""
|
||||
ctx.log(f"cleanup_consent: transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.meetings import ( # noqa: PLC0415
|
||||
meeting_consent_controller,
|
||||
meetings_controller,
|
||||
)
|
||||
from reflector.db.recordings import recordings_controller # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.storage import get_transcripts_storage # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
ctx.log("cleanup_consent: transcript not found")
|
||||
return ConsentResult()
|
||||
|
||||
consent_denied = False
|
||||
recording = None
|
||||
if transcript.recording_id:
|
||||
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||
if recording and recording.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
if meeting:
|
||||
consent_denied = await meeting_consent_controller.has_any_denial(
|
||||
meeting.id
|
||||
)
|
||||
|
||||
if not consent_denied:
|
||||
ctx.log("cleanup_consent: consent approved, keeping all files")
|
||||
return ConsentResult()
|
||||
|
||||
ctx.log("cleanup_consent: consent denied, deleting audio files")
|
||||
|
||||
deletion_errors = []
|
||||
if recording and recording.bucket_name:
|
||||
keys_to_delete = []
|
||||
if recording.track_keys:
|
||||
keys_to_delete = recording.track_keys
|
||||
elif recording.object_key:
|
||||
keys_to_delete = [recording.object_key]
|
||||
|
||||
master_storage = get_transcripts_storage()
|
||||
for key in keys_to_delete:
|
||||
try:
|
||||
await master_storage.delete_file(key, bucket=recording.bucket_name)
|
||||
ctx.log(f"Deleted recording file: {recording.bucket_name}/{key}")
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to delete {key}: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
deletion_errors.append(error_msg)
|
||||
|
||||
if transcript.audio_location == "storage":
|
||||
storage = get_transcripts_storage()
|
||||
try:
|
||||
await storage.delete_file(transcript.storage_audio_path)
|
||||
ctx.log(f"Deleted processed audio: {transcript.storage_audio_path}")
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to delete processed audio: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
deletion_errors.append(error_msg)
|
||||
|
||||
try:
|
||||
if (
|
||||
hasattr(transcript, "audio_mp3_filename")
|
||||
and transcript.audio_mp3_filename
|
||||
):
|
||||
transcript.audio_mp3_filename.unlink(missing_ok=True)
|
||||
if (
|
||||
hasattr(transcript, "audio_wav_filename")
|
||||
and transcript.audio_wav_filename
|
||||
):
|
||||
transcript.audio_wav_filename.unlink(missing_ok=True)
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to delete local audio files: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
deletion_errors.append(error_msg)
|
||||
|
||||
if deletion_errors:
|
||||
logger.warning(
|
||||
"[Hatchet] cleanup_consent completed with errors",
|
||||
transcript_id=input.transcript_id,
|
||||
error_count=len(deletion_errors),
|
||||
)
|
||||
else:
|
||||
await transcripts_controller.update(transcript, {"audio_deleted": True})
|
||||
ctx.log("cleanup_consent: all audio deleted successfully")
|
||||
|
||||
return ConsentResult()
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[cleanup_consent],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.POST_ZULIP, set_error_status=False)
|
||||
async def post_zulip(input: FilePipelineInput, ctx: Context) -> ZulipResult:
|
||||
"""Post notification to Zulip."""
|
||||
ctx.log(f"post_zulip: transcript_id={input.transcript_id}")
|
||||
|
||||
if not settings.ZULIP_REALM:
|
||||
ctx.log("post_zulip skipped (Zulip not configured)")
|
||||
return ZulipResult(zulip_message_id=None, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.zulip import post_transcript_notification # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
message_id = await post_transcript_notification(transcript)
|
||||
ctx.log(f"post_zulip complete: zulip_message_id={message_id}")
|
||||
else:
|
||||
message_id = None
|
||||
|
||||
return ZulipResult(zulip_message_id=message_id)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[cleanup_consent],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.SEND_WEBHOOK, set_error_status=False)
|
||||
async def send_webhook(input: FilePipelineInput, ctx: Context) -> WebhookResult:
|
||||
"""Send completion webhook to external service."""
|
||||
ctx.log(f"send_webhook: transcript_id={input.transcript_id}")
|
||||
|
||||
if not input.room_id:
|
||||
ctx.log("send_webhook skipped (no room_id)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.rooms import rooms_controller # noqa: PLC0415
|
||||
from reflector.utils.webhook import ( # noqa: PLC0415
|
||||
fetch_transcript_webhook_payload,
|
||||
send_webhook_request,
|
||||
)
|
||||
|
||||
room = await rooms_controller.get_by_id(input.room_id)
|
||||
if not room or not room.webhook_url:
|
||||
ctx.log("send_webhook skipped (no webhook_url configured)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
payload = await fetch_transcript_webhook_payload(
|
||||
transcript_id=input.transcript_id,
|
||||
room_id=input.room_id,
|
||||
)
|
||||
|
||||
if isinstance(payload, str):
|
||||
ctx.log(f"send_webhook skipped (could not build payload): {payload}")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
import httpx # noqa: PLC0415
|
||||
|
||||
try:
|
||||
response = await send_webhook_request(
|
||||
url=room.webhook_url,
|
||||
payload=payload,
|
||||
event_type="transcript.completed",
|
||||
webhook_secret=room.webhook_secret,
|
||||
timeout=30.0,
|
||||
)
|
||||
ctx.log(f"send_webhook complete: status_code={response.status_code}")
|
||||
return WebhookResult(webhook_sent=True, response_code=response.status_code)
|
||||
except httpx.HTTPStatusError as e:
|
||||
ctx.log(f"send_webhook failed (HTTP {e.response.status_code}), continuing")
|
||||
return WebhookResult(
|
||||
webhook_sent=False, response_code=e.response.status_code
|
||||
)
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
ctx.log(f"send_webhook failed ({e}), continuing")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
except Exception as e:
|
||||
ctx.log(f"send_webhook unexpected error: {e}")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
|
||||
|
||||
@file_pipeline.task(
|
||||
parents=[cleanup_consent],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.SEND_EMAIL, set_error_status=False)
|
||||
async def send_email(input: FilePipelineInput, ctx: Context) -> EmailResult:
|
||||
"""Send transcript email to collected recipients."""
|
||||
ctx.log(f"send_email: transcript_id={input.transcript_id}")
|
||||
|
||||
if not is_email_configured():
|
||||
ctx.log("send_email skipped (SMTP not configured)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.meetings import meetings_controller # noqa: PLC0415
|
||||
from reflector.db.recordings import recordings_controller # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
ctx.log("send_email skipped (transcript not found)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
# Try transcript.meeting_id first, then fall back to recording.meeting_id
|
||||
meeting = None
|
||||
if transcript.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
if not meeting and transcript.recording_id:
|
||||
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||
if recording and recording.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
|
||||
recipients = (
|
||||
list(meeting.email_recipients)
|
||||
if meeting and meeting.email_recipients
|
||||
else []
|
||||
)
|
||||
|
||||
# Also check room-level email
|
||||
from reflector.db.rooms import rooms_controller # noqa: PLC0415
|
||||
|
||||
if transcript.room_id:
|
||||
room = await rooms_controller.get_by_id(transcript.room_id)
|
||||
if room and room.email_transcript_to:
|
||||
if room.email_transcript_to not in recipients:
|
||||
recipients.append(room.email_transcript_to)
|
||||
|
||||
if not recipients:
|
||||
ctx.log("send_email skipped (no email recipients)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
# For room-level emails, do NOT change share_mode (only set public if meeting had recipients)
|
||||
if meeting and meeting.email_recipients:
|
||||
await transcripts_controller.update(transcript, {"share_mode": "public"})
|
||||
|
||||
count = await send_transcript_email(recipients, transcript)
|
||||
ctx.log(f"send_email complete: sent {count} emails")
|
||||
|
||||
return EmailResult(emails_sent=count)
|
||||
|
||||
|
||||
# --- On failure handler ---
|
||||
|
||||
|
||||
async def on_workflow_failure(input: FilePipelineInput, ctx: Context) -> None:
|
||||
"""Set transcript status to 'error' only if not already 'ended'."""
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript and transcript.status == "ended":
|
||||
logger.info(
|
||||
"[Hatchet] FilePipeline on_workflow_failure: transcript already ended, skipping error status",
|
||||
transcript_id=input.transcript_id,
|
||||
)
|
||||
ctx.log(
|
||||
"on_workflow_failure: transcript already ended, skipping error status"
|
||||
)
|
||||
return
|
||||
await set_workflow_error_status(input.transcript_id)
|
||||
|
||||
|
||||
@file_pipeline.on_failure_task()
|
||||
async def _register_on_workflow_failure(input: FilePipelineInput, ctx: Context) -> None:
|
||||
await on_workflow_failure(input, ctx)
|
||||
454
server/reflector/hatchet/workflows/live_post_pipeline.py
Normal file
454
server/reflector/hatchet/workflows/live_post_pipeline.py
Normal file
@@ -0,0 +1,454 @@
|
||||
"""
|
||||
Hatchet workflow: LivePostProcessingPipeline
|
||||
|
||||
Post-processing pipeline for live WebRTC meetings.
|
||||
Triggered after a live meeting ends. Orchestrates:
|
||||
Left branch: waveform → convert_mp3 → upload_mp3 → remove_upload → diarize → cleanup_consent
|
||||
Right branch: generate_title (parallel with left branch)
|
||||
Fan-in: final_summaries → post_zulip → send_webhook
|
||||
|
||||
Note: This file uses deferred imports (inside functions/tasks) intentionally.
|
||||
Hatchet workers run in forked processes; fresh imports per task ensure DB connections
|
||||
are not shared across forks, avoiding connection pooling issues.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from hatchet_sdk import Context
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.email import is_email_configured, send_transcript_email
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.constants import (
|
||||
TIMEOUT_HEAVY,
|
||||
TIMEOUT_MEDIUM,
|
||||
TIMEOUT_SHORT,
|
||||
TIMEOUT_TITLE,
|
||||
TaskName,
|
||||
)
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
fresh_db_connection,
|
||||
set_workflow_error_status,
|
||||
with_error_handling,
|
||||
)
|
||||
from reflector.hatchet.workflows.models import (
|
||||
ConsentResult,
|
||||
EmailResult,
|
||||
TitleResult,
|
||||
WaveformResult,
|
||||
WebhookResult,
|
||||
ZulipResult,
|
||||
)
|
||||
from reflector.logger import logger
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
class LivePostPipelineInput(BaseModel):
|
||||
transcript_id: str
|
||||
room_id: str | None = None
|
||||
|
||||
|
||||
# --- Result models specific to live post pipeline ---
|
||||
|
||||
|
||||
class ConvertMp3Result(BaseModel):
|
||||
converted: bool
|
||||
|
||||
|
||||
class UploadMp3Result(BaseModel):
|
||||
uploaded: bool
|
||||
|
||||
|
||||
class RemoveUploadResult(BaseModel):
|
||||
removed: bool
|
||||
|
||||
|
||||
class DiarizeResult(BaseModel):
|
||||
diarized: bool
|
||||
|
||||
|
||||
class FinalSummariesResult(BaseModel):
|
||||
generated: bool
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
live_post_pipeline = hatchet.workflow(
|
||||
name="LivePostProcessingPipeline", input_validator=LivePostPipelineInput
|
||||
)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.WAVEFORM)
|
||||
async def waveform(input: LivePostPipelineInput, ctx: Context) -> WaveformResult:
|
||||
"""Generate waveform visualization from recorded audio."""
|
||||
ctx.log(f"waveform: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
PipelineMainWaveform,
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
raise ValueError(f"Transcript {input.transcript_id} not found")
|
||||
|
||||
runner = PipelineMainWaveform(transcript_id=transcript.id)
|
||||
await runner.run()
|
||||
|
||||
ctx.log("waveform complete")
|
||||
return WaveformResult(waveform_generated=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_TITLE),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.GENERATE_TITLE)
|
||||
async def generate_title(input: LivePostPipelineInput, ctx: Context) -> TitleResult:
|
||||
"""Generate meeting title from topics (runs in parallel with audio chain)."""
|
||||
ctx.log(f"generate_title: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
PipelineMainTitle,
|
||||
)
|
||||
|
||||
runner = PipelineMainTitle(transcript_id=input.transcript_id)
|
||||
await runner.run()
|
||||
|
||||
ctx.log("generate_title complete")
|
||||
return TitleResult(title=None)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[waveform],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.CONVERT_MP3)
|
||||
async def convert_mp3(input: LivePostPipelineInput, ctx: Context) -> ConvertMp3Result:
|
||||
"""Convert WAV recording to MP3."""
|
||||
ctx.log(f"convert_mp3: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_convert_to_mp3,
|
||||
)
|
||||
|
||||
await pipeline_convert_to_mp3(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("convert_mp3 complete")
|
||||
return ConvertMp3Result(converted=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[convert_mp3],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.UPLOAD_MP3)
|
||||
async def upload_mp3(input: LivePostPipelineInput, ctx: Context) -> UploadMp3Result:
|
||||
"""Upload MP3 to external storage."""
|
||||
ctx.log(f"upload_mp3: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_upload_mp3,
|
||||
)
|
||||
|
||||
await pipeline_upload_mp3(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("upload_mp3 complete")
|
||||
return UploadMp3Result(uploaded=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[upload_mp3],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=5,
|
||||
)
|
||||
@with_error_handling(TaskName.REMOVE_UPLOAD)
|
||||
async def remove_upload(
|
||||
input: LivePostPipelineInput, ctx: Context
|
||||
) -> RemoveUploadResult:
|
||||
"""Remove the original upload file."""
|
||||
ctx.log(f"remove_upload: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_remove_upload,
|
||||
)
|
||||
|
||||
await pipeline_remove_upload(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("remove_upload complete")
|
||||
return RemoveUploadResult(removed=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[remove_upload],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.DIARIZE)
|
||||
async def diarize(input: LivePostPipelineInput, ctx: Context) -> DiarizeResult:
|
||||
"""Run diarization on the recorded audio."""
|
||||
ctx.log(f"diarize: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_diarization,
|
||||
)
|
||||
|
||||
await pipeline_diarization(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("diarize complete")
|
||||
return DiarizeResult(diarized=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[diarize],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=10,
|
||||
)
|
||||
@with_error_handling(TaskName.CLEANUP_CONSENT, set_error_status=False)
|
||||
async def cleanup_consent(input: LivePostPipelineInput, ctx: Context) -> ConsentResult:
|
||||
"""Check consent and delete audio files if any participant denied."""
|
||||
ctx.log(f"cleanup_consent: transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
cleanup_consent as _cleanup_consent,
|
||||
)
|
||||
|
||||
await _cleanup_consent(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("cleanup_consent complete")
|
||||
return ConsentResult()
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[cleanup_consent, generate_title],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||
retries=3,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=30,
|
||||
)
|
||||
@with_error_handling(TaskName.FINAL_SUMMARIES)
|
||||
async def final_summaries(
|
||||
input: LivePostPipelineInput, ctx: Context
|
||||
) -> FinalSummariesResult:
|
||||
"""Generate final summaries (fan-in after audio chain + title)."""
|
||||
ctx.log(f"final_summaries: starting for transcript_id={input.transcript_id}")
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.pipelines.main_live_pipeline import ( # noqa: PLC0415
|
||||
pipeline_summaries,
|
||||
)
|
||||
|
||||
await pipeline_summaries(transcript_id=input.transcript_id)
|
||||
|
||||
ctx.log("final_summaries complete")
|
||||
return FinalSummariesResult(generated=True)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[final_summaries],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.POST_ZULIP, set_error_status=False)
|
||||
async def post_zulip(input: LivePostPipelineInput, ctx: Context) -> ZulipResult:
|
||||
"""Post notification to Zulip."""
|
||||
ctx.log(f"post_zulip: transcript_id={input.transcript_id}")
|
||||
|
||||
if not settings.ZULIP_REALM:
|
||||
ctx.log("post_zulip skipped (Zulip not configured)")
|
||||
return ZulipResult(zulip_message_id=None, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.zulip import post_transcript_notification # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript:
|
||||
message_id = await post_transcript_notification(transcript)
|
||||
ctx.log(f"post_zulip complete: zulip_message_id={message_id}")
|
||||
else:
|
||||
message_id = None
|
||||
|
||||
return ZulipResult(zulip_message_id=message_id)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[final_summaries],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.SEND_WEBHOOK, set_error_status=False)
|
||||
async def send_webhook(input: LivePostPipelineInput, ctx: Context) -> WebhookResult:
|
||||
"""Send completion webhook to external service."""
|
||||
ctx.log(f"send_webhook: transcript_id={input.transcript_id}")
|
||||
|
||||
if not input.room_id:
|
||||
ctx.log("send_webhook skipped (no room_id)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.rooms import rooms_controller # noqa: PLC0415
|
||||
from reflector.utils.webhook import ( # noqa: PLC0415
|
||||
fetch_transcript_webhook_payload,
|
||||
send_webhook_request,
|
||||
)
|
||||
|
||||
room = await rooms_controller.get_by_id(input.room_id)
|
||||
if not room or not room.webhook_url:
|
||||
ctx.log("send_webhook skipped (no webhook_url configured)")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
payload = await fetch_transcript_webhook_payload(
|
||||
transcript_id=input.transcript_id,
|
||||
room_id=input.room_id,
|
||||
)
|
||||
|
||||
if isinstance(payload, str):
|
||||
ctx.log(f"send_webhook skipped (could not build payload): {payload}")
|
||||
return WebhookResult(webhook_sent=False, skipped=True)
|
||||
|
||||
import httpx # noqa: PLC0415
|
||||
|
||||
try:
|
||||
response = await send_webhook_request(
|
||||
url=room.webhook_url,
|
||||
payload=payload,
|
||||
event_type="transcript.completed",
|
||||
webhook_secret=room.webhook_secret,
|
||||
timeout=30.0,
|
||||
)
|
||||
ctx.log(f"send_webhook complete: status_code={response.status_code}")
|
||||
return WebhookResult(webhook_sent=True, response_code=response.status_code)
|
||||
except httpx.HTTPStatusError as e:
|
||||
ctx.log(f"send_webhook failed (HTTP {e.response.status_code}), continuing")
|
||||
return WebhookResult(
|
||||
webhook_sent=False, response_code=e.response.status_code
|
||||
)
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
ctx.log(f"send_webhook failed ({e}), continuing")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
except Exception as e:
|
||||
ctx.log(f"send_webhook unexpected error: {e}")
|
||||
return WebhookResult(webhook_sent=False)
|
||||
|
||||
|
||||
@live_post_pipeline.task(
|
||||
parents=[final_summaries],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||
retries=5,
|
||||
backoff_factor=2.0,
|
||||
backoff_max_seconds=15,
|
||||
)
|
||||
@with_error_handling(TaskName.SEND_EMAIL, set_error_status=False)
|
||||
async def send_email(input: LivePostPipelineInput, ctx: Context) -> EmailResult:
|
||||
"""Send transcript email to collected recipients."""
|
||||
ctx.log(f"send_email: transcript_id={input.transcript_id}")
|
||||
|
||||
if not is_email_configured():
|
||||
ctx.log("send_email skipped (SMTP not configured)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.meetings import meetings_controller # noqa: PLC0415
|
||||
from reflector.db.recordings import recordings_controller # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
ctx.log("send_email skipped (transcript not found)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
meeting = None
|
||||
if transcript.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
if not meeting and transcript.recording_id:
|
||||
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||
if recording and recording.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
|
||||
recipients = (
|
||||
list(meeting.email_recipients)
|
||||
if meeting and meeting.email_recipients
|
||||
else []
|
||||
)
|
||||
|
||||
# Also check room-level email
|
||||
from reflector.db.rooms import rooms_controller # noqa: PLC0415
|
||||
|
||||
if transcript.room_id:
|
||||
room = await rooms_controller.get_by_id(transcript.room_id)
|
||||
if room and room.email_transcript_to:
|
||||
if room.email_transcript_to not in recipients:
|
||||
recipients.append(room.email_transcript_to)
|
||||
|
||||
if not recipients:
|
||||
ctx.log("send_email skipped (no email recipients)")
|
||||
return EmailResult(skipped=True)
|
||||
|
||||
# For room-level emails, do NOT change share_mode (only set public if meeting had recipients)
|
||||
if meeting and meeting.email_recipients:
|
||||
await transcripts_controller.update(transcript, {"share_mode": "public"})
|
||||
|
||||
count = await send_transcript_email(recipients, transcript)
|
||||
ctx.log(f"send_email complete: sent {count} emails")
|
||||
|
||||
return EmailResult(emails_sent=count)
|
||||
|
||||
|
||||
# --- On failure handler ---
|
||||
|
||||
|
||||
async def on_workflow_failure(input: LivePostPipelineInput, ctx: Context) -> None:
|
||||
"""Set transcript status to 'error' only if not already 'ended'."""
|
||||
async with fresh_db_connection():
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if transcript and transcript.status == "ended":
|
||||
logger.info(
|
||||
"[Hatchet] LivePostProcessingPipeline on_workflow_failure: transcript already ended",
|
||||
transcript_id=input.transcript_id,
|
||||
)
|
||||
ctx.log(
|
||||
"on_workflow_failure: transcript already ended, skipping error status"
|
||||
)
|
||||
return
|
||||
await set_workflow_error_status(input.transcript_id)
|
||||
|
||||
|
||||
@live_post_pipeline.on_failure_task()
|
||||
async def _register_on_workflow_failure(
|
||||
input: LivePostPipelineInput, ctx: Context
|
||||
) -> None:
|
||||
await on_workflow_failure(input, ctx)
|
||||
@@ -170,3 +170,10 @@ class WebhookResult(BaseModel):
|
||||
webhook_sent: bool
|
||||
skipped: bool = False
|
||||
response_code: int | None = None
|
||||
|
||||
|
||||
class EmailResult(BaseModel):
|
||||
"""Result from send_email task."""
|
||||
|
||||
emails_sent: int = 0
|
||||
skipped: bool = False
|
||||
|
||||
@@ -17,7 +17,7 @@ from contextlib import asynccontextmanager
|
||||
from typing import Generic
|
||||
|
||||
import av
|
||||
from celery import chord, current_task, group, shared_task
|
||||
from celery import current_task, shared_task
|
||||
from pydantic import BaseModel
|
||||
from structlog import BoundLogger as Logger
|
||||
|
||||
@@ -397,7 +397,9 @@ class PipelineMainLive(PipelineMainBase):
|
||||
# when the pipeline ends, connect to the post pipeline
|
||||
logger.info("Pipeline main live ended", transcript_id=self.transcript_id)
|
||||
logger.info("Scheduling pipeline main post", transcript_id=self.transcript_id)
|
||||
pipeline_post(transcript_id=self.transcript_id)
|
||||
transcript = await transcripts_controller.get_by_id(self.transcript_id)
|
||||
room_id = transcript.room_id if transcript else None
|
||||
await pipeline_post(transcript_id=self.transcript_id, room_id=room_id)
|
||||
|
||||
|
||||
class PipelineMainDiarization(PipelineMainBase[AudioDiarizationInput]):
|
||||
@@ -792,29 +794,20 @@ async def task_pipeline_post_to_zulip(*, transcript_id: str):
|
||||
await pipeline_post_to_zulip(transcript_id=transcript_id)
|
||||
|
||||
|
||||
def pipeline_post(*, transcript_id: str):
|
||||
async def pipeline_post(*, transcript_id: str, room_id: str | None = None):
|
||||
"""
|
||||
Run the post pipeline
|
||||
Run the post pipeline via Hatchet.
|
||||
"""
|
||||
chain_mp3_and_diarize = (
|
||||
task_pipeline_waveform.si(transcript_id=transcript_id)
|
||||
| task_pipeline_convert_to_mp3.si(transcript_id=transcript_id)
|
||||
| task_pipeline_upload_mp3.si(transcript_id=transcript_id)
|
||||
| task_pipeline_remove_upload.si(transcript_id=transcript_id)
|
||||
| task_pipeline_diarization.si(transcript_id=transcript_id)
|
||||
| task_cleanup_consent.si(transcript_id=transcript_id)
|
||||
)
|
||||
chain_title_preview = task_pipeline_title.si(transcript_id=transcript_id)
|
||||
chain_final_summaries = task_pipeline_final_summaries.si(
|
||||
transcript_id=transcript_id
|
||||
)
|
||||
from reflector.hatchet.client import HatchetClientManager # noqa: PLC0415
|
||||
|
||||
chain = chord(
|
||||
group(chain_mp3_and_diarize, chain_title_preview),
|
||||
chain_final_summaries,
|
||||
) | task_pipeline_post_to_zulip.si(transcript_id=transcript_id)
|
||||
|
||||
return chain.delay()
|
||||
await HatchetClientManager.start_workflow(
|
||||
"LivePostProcessingPipeline",
|
||||
{
|
||||
"transcript_id": str(transcript_id),
|
||||
"room_id": str(room_id) if room_id else None,
|
||||
},
|
||||
additional_metadata={"transcript_id": str(transcript_id)},
|
||||
)
|
||||
|
||||
|
||||
@get_transcript
|
||||
|
||||
@@ -10,7 +10,6 @@ from dataclasses import dataclass
|
||||
from typing import Literal, Union, assert_never
|
||||
|
||||
import celery
|
||||
from celery.result import AsyncResult
|
||||
from hatchet_sdk.clients.rest.exceptions import ApiException, NotFoundException
|
||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||
|
||||
@@ -18,7 +17,6 @@ from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.transcripts import Transcript, transcripts_controller
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||
from reflector.utils.string import NonEmptyString
|
||||
|
||||
|
||||
@@ -105,11 +103,8 @@ async def validate_transcript_for_processing(
|
||||
):
|
||||
return ValidationNotReady(detail="Recording is not ready for processing")
|
||||
|
||||
# Check Celery tasks
|
||||
# Check Celery tasks (multitrack still uses Celery for some paths)
|
||||
if task_is_scheduled_or_active(
|
||||
"reflector.pipelines.main_file_pipeline.task_pipeline_file_process",
|
||||
transcript_id=transcript.id,
|
||||
) or task_is_scheduled_or_active(
|
||||
"reflector.pipelines.main_multitrack_pipeline.task_pipeline_multitrack_process",
|
||||
transcript_id=transcript.id,
|
||||
):
|
||||
@@ -175,11 +170,8 @@ async def prepare_transcript_processing(validation: ValidationOk) -> PrepareResu
|
||||
|
||||
async def dispatch_transcript_processing(
|
||||
config: ProcessingConfig, force: bool = False
|
||||
) -> AsyncResult | None:
|
||||
"""Dispatch transcript processing to appropriate backend (Hatchet or Celery).
|
||||
|
||||
Returns AsyncResult for Celery tasks, None for Hatchet workflows.
|
||||
"""
|
||||
) -> None:
|
||||
"""Dispatch transcript processing to Hatchet workflow engine."""
|
||||
if isinstance(config, MultitrackProcessingConfig):
|
||||
# Multitrack processing always uses Hatchet (no Celery fallback)
|
||||
# First check if we can replay (outside transaction since it's read-only)
|
||||
@@ -275,7 +267,21 @@ async def dispatch_transcript_processing(
|
||||
return None
|
||||
|
||||
elif isinstance(config, FileProcessingConfig):
|
||||
return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
|
||||
# File processing uses Hatchet workflow
|
||||
workflow_id = await HatchetClientManager.start_workflow(
|
||||
workflow_name="FilePipeline",
|
||||
input_data={"transcript_id": config.transcript_id},
|
||||
additional_metadata={"transcript_id": config.transcript_id},
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(config.transcript_id)
|
||||
if transcript:
|
||||
await transcripts_controller.update(
|
||||
transcript, {"workflow_run_id": workflow_id}
|
||||
)
|
||||
|
||||
logger.info("File pipeline dispatched via Hatchet", workflow_id=workflow_id)
|
||||
return None
|
||||
else:
|
||||
assert_never(config)
|
||||
|
||||
|
||||
@@ -180,6 +180,7 @@ class Settings(BaseSettings):
|
||||
)
|
||||
|
||||
# Daily.co integration
|
||||
DAILY_API_URL: str = "https://api.daily.co/v1"
|
||||
DAILY_API_KEY: str | None = None
|
||||
DAILY_WEBHOOK_SECRET: str | None = None
|
||||
DAILY_SUBDOMAIN: str | None = None
|
||||
@@ -193,6 +194,16 @@ class Settings(BaseSettings):
|
||||
ZULIP_REALM: str | None = None
|
||||
ZULIP_API_KEY: str | None = None
|
||||
ZULIP_BOT_EMAIL: str | None = None
|
||||
ZULIP_DAG_STREAM: str | None = None
|
||||
ZULIP_DAG_TOPIC: str | None = None
|
||||
|
||||
# Email / SMTP integration (for transcript email notifications)
|
||||
SMTP_HOST: str | None = None
|
||||
SMTP_PORT: int = 587
|
||||
SMTP_USERNAME: str | None = None
|
||||
SMTP_PASSWORD: str | None = None
|
||||
SMTP_FROM_EMAIL: str | None = None
|
||||
SMTP_USE_TLS: bool = True
|
||||
|
||||
# Hatchet workflow orchestration (always enabled for multitrack processing)
|
||||
HATCHET_CLIENT_TOKEN: str | None = None
|
||||
|
||||
@@ -116,9 +116,12 @@ class Storage:
|
||||
expires_in: int = 3600,
|
||||
*,
|
||||
bucket: str | None = None,
|
||||
extra_params: dict | None = None,
|
||||
) -> str:
|
||||
"""Generate presigned URL. bucket: override instance default if provided."""
|
||||
return await self._get_file_url(filename, operation, expires_in, bucket=bucket)
|
||||
return await self._get_file_url(
|
||||
filename, operation, expires_in, bucket=bucket, extra_params=extra_params
|
||||
)
|
||||
|
||||
async def _get_file_url(
|
||||
self,
|
||||
@@ -127,6 +130,7 @@ class Storage:
|
||||
expires_in: int = 3600,
|
||||
*,
|
||||
bucket: str | None = None,
|
||||
extra_params: dict | None = None,
|
||||
) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@@ -170,16 +170,23 @@ class AwsStorage(Storage):
|
||||
expires_in: int = 3600,
|
||||
*,
|
||||
bucket: str | None = None,
|
||||
extra_params: dict | None = None,
|
||||
) -> str:
|
||||
actual_bucket = bucket or self._bucket_name
|
||||
folder = self.aws_folder
|
||||
s3filename = f"{folder}/{filename}" if folder else filename
|
||||
params = {}
|
||||
if extra_params:
|
||||
params.update(extra_params)
|
||||
# Always set Bucket/Key after extra_params to prevent overrides
|
||||
params["Bucket"] = actual_bucket
|
||||
params["Key"] = s3filename
|
||||
async with self.session.client(
|
||||
"s3", config=self.boto_config, endpoint_url=self._endpoint_url
|
||||
) as client:
|
||||
presigned_url = await client.generate_presigned_url(
|
||||
operation,
|
||||
Params={"Bucket": actual_bucket, "Key": s3filename},
|
||||
Params=params,
|
||||
ExpiresIn=expires_in,
|
||||
)
|
||||
|
||||
|
||||
257
server/reflector/tools/deleted_transcripts.py
Normal file
257
server/reflector/tools/deleted_transcripts.py
Normal file
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
CLI tool for managing soft-deleted transcripts.
|
||||
|
||||
Usage:
|
||||
uv run python -m reflector.tools.deleted_transcripts list
|
||||
uv run python -m reflector.tools.deleted_transcripts files <transcript_id>
|
||||
uv run python -m reflector.tools.deleted_transcripts download <transcript_id> [--output-dir ./]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import structlog
|
||||
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.transcripts import Transcript, transcripts
|
||||
from reflector.storage import get_source_storage, get_transcripts_storage
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
async def list_deleted():
|
||||
"""List all soft-deleted transcripts."""
|
||||
database = get_database()
|
||||
await database.connect()
|
||||
try:
|
||||
query = (
|
||||
transcripts.select()
|
||||
.where(transcripts.c.deleted_at.isnot(None))
|
||||
.order_by(transcripts.c.deleted_at.desc())
|
||||
)
|
||||
results = await database.fetch_all(query)
|
||||
|
||||
if not results:
|
||||
print("No deleted transcripts found.")
|
||||
return
|
||||
|
||||
print(
|
||||
f"{'ID':<40} {'Title':<40} {'Deleted At':<28} {'Recording ID':<40} {'Meeting ID'}"
|
||||
)
|
||||
print("-" * 180)
|
||||
for row in results:
|
||||
t = Transcript(**row)
|
||||
title = (t.title or "")[:38]
|
||||
deleted = t.deleted_at.isoformat() if t.deleted_at else ""
|
||||
print(
|
||||
f"{t.id:<40} {title:<40} {deleted:<28} {t.recording_id or '':<40} {t.meeting_id or ''}"
|
||||
)
|
||||
|
||||
print(f"\nTotal: {len(results)} deleted transcript(s)")
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
async def list_files(transcript_id: str):
|
||||
"""List all S3 keys associated with a deleted transcript."""
|
||||
database = get_database()
|
||||
await database.connect()
|
||||
try:
|
||||
query = transcripts.select().where(transcripts.c.id == transcript_id)
|
||||
result = await database.fetch_one(query)
|
||||
if not result:
|
||||
print(f"Transcript {transcript_id} not found.")
|
||||
return
|
||||
|
||||
t = Transcript(**result)
|
||||
if t.deleted_at is None:
|
||||
print(f"Transcript {transcript_id} is not deleted.")
|
||||
return
|
||||
|
||||
print(f"Transcript: {t.id}")
|
||||
print(f"Title: {t.title}")
|
||||
print(f"Deleted at: {t.deleted_at}")
|
||||
print()
|
||||
|
||||
files = []
|
||||
|
||||
# Transcript audio
|
||||
if t.audio_location == "storage" and not t.audio_deleted:
|
||||
files.append(("Transcript audio", t.storage_audio_path, None))
|
||||
|
||||
# Recording files
|
||||
if t.recording_id:
|
||||
recording = await recordings_controller.get_by_id(t.recording_id)
|
||||
if recording:
|
||||
if recording.object_key:
|
||||
files.append(
|
||||
(
|
||||
"Recording object_key",
|
||||
recording.object_key,
|
||||
recording.bucket_name,
|
||||
)
|
||||
)
|
||||
if recording.track_keys:
|
||||
for i, key in enumerate(recording.track_keys):
|
||||
files.append((f"Track {i}", key, recording.bucket_name))
|
||||
|
||||
# Cloud video
|
||||
if t.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(t.meeting_id)
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
files.append(("Cloud video", meeting.daily_composed_video_s3_key, None))
|
||||
|
||||
if not files:
|
||||
print("No associated files found.")
|
||||
return
|
||||
|
||||
print(f"{'Type':<25} {'Bucket':<30} {'S3 Key'}")
|
||||
print("-" * 120)
|
||||
for label, key, bucket in files:
|
||||
print(f"{label:<25} {bucket or '(default)':<30} {key}")
|
||||
|
||||
# Generate presigned URLs
|
||||
print("\nPresigned URLs (valid for 1 hour):")
|
||||
print("-" * 120)
|
||||
storage = get_transcripts_storage()
|
||||
for label, key, bucket in files:
|
||||
try:
|
||||
url = await storage.get_file_url(key, bucket=bucket, expires_in=3600)
|
||||
print(f"{label}: {url}")
|
||||
except Exception as e:
|
||||
print(f"{label}: ERROR - {e}")
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
async def download_files(transcript_id: str, output_dir: str):
|
||||
"""Download all files associated with a deleted transcript."""
|
||||
database = get_database()
|
||||
await database.connect()
|
||||
try:
|
||||
query = transcripts.select().where(transcripts.c.id == transcript_id)
|
||||
result = await database.fetch_one(query)
|
||||
if not result:
|
||||
print(f"Transcript {transcript_id} not found.")
|
||||
return
|
||||
|
||||
t = Transcript(**result)
|
||||
if t.deleted_at is None:
|
||||
print(f"Transcript {transcript_id} is not deleted.")
|
||||
return
|
||||
|
||||
dest = os.path.join(output_dir, t.id)
|
||||
os.makedirs(dest, exist_ok=True)
|
||||
|
||||
storage = get_transcripts_storage()
|
||||
|
||||
# Download transcript audio
|
||||
if t.audio_location == "storage" and not t.audio_deleted:
|
||||
try:
|
||||
data = await storage.get_file(t.storage_audio_path)
|
||||
path = os.path.join(dest, "audio.mp3")
|
||||
with open(path, "wb") as f:
|
||||
f.write(data)
|
||||
print(f"Downloaded: {path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download audio: {e}")
|
||||
|
||||
# Download recording files
|
||||
if t.recording_id:
|
||||
recording = await recordings_controller.get_by_id(t.recording_id)
|
||||
if recording and recording.track_keys:
|
||||
tracks_dir = os.path.join(dest, "tracks")
|
||||
os.makedirs(tracks_dir, exist_ok=True)
|
||||
for i, key in enumerate(recording.track_keys):
|
||||
try:
|
||||
data = await storage.get_file(key, bucket=recording.bucket_name)
|
||||
filename = os.path.basename(key) or f"track_{i}"
|
||||
path = os.path.join(tracks_dir, filename)
|
||||
with open(path, "wb") as f:
|
||||
f.write(data)
|
||||
print(f"Downloaded: {path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download track {i}: {e}")
|
||||
|
||||
# Download cloud video
|
||||
if t.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(t.meeting_id)
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
try:
|
||||
source_storage = get_source_storage("daily")
|
||||
data = await source_storage.get_file(
|
||||
meeting.daily_composed_video_s3_key
|
||||
)
|
||||
path = os.path.join(dest, "cloud_video.mp4")
|
||||
with open(path, "wb") as f:
|
||||
f.write(data)
|
||||
print(f"Downloaded: {path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download cloud video: {e}")
|
||||
|
||||
# Write metadata
|
||||
metadata = {
|
||||
"id": t.id,
|
||||
"title": t.title,
|
||||
"created_at": t.created_at.isoformat() if t.created_at else None,
|
||||
"deleted_at": t.deleted_at.isoformat() if t.deleted_at else None,
|
||||
"duration": t.duration,
|
||||
"source_language": t.source_language,
|
||||
"target_language": t.target_language,
|
||||
"short_summary": t.short_summary,
|
||||
"long_summary": t.long_summary,
|
||||
"topics": [topic.model_dump() for topic in t.topics] if t.topics else [],
|
||||
"participants": [p.model_dump() for p in t.participants]
|
||||
if t.participants
|
||||
else [],
|
||||
"action_items": t.action_items,
|
||||
"webvtt": t.webvtt,
|
||||
"recording_id": t.recording_id,
|
||||
"meeting_id": t.meeting_id,
|
||||
}
|
||||
path = os.path.join(dest, "metadata.json")
|
||||
with open(path, "w") as f:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
print(f"Downloaded: {path}")
|
||||
|
||||
print(f"\nAll files saved to: {dest}")
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Manage soft-deleted transcripts")
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
subparsers.add_parser("list", help="List all deleted transcripts")
|
||||
|
||||
files_parser = subparsers.add_parser(
|
||||
"files", help="List S3 keys for a deleted transcript"
|
||||
)
|
||||
files_parser.add_argument("transcript_id", help="Transcript ID")
|
||||
|
||||
download_parser = subparsers.add_parser(
|
||||
"download", help="Download files for a deleted transcript"
|
||||
)
|
||||
download_parser.add_argument("transcript_id", help="Transcript ID")
|
||||
download_parser.add_argument(
|
||||
"--output-dir", default=".", help="Output directory (default: .)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "list":
|
||||
asyncio.run(list_deleted())
|
||||
elif args.command == "files":
|
||||
asyncio.run(list_files(args.transcript_id))
|
||||
elif args.command == "download":
|
||||
asyncio.run(download_files(args.transcript_id, args.output_dir))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -7,7 +7,6 @@ import asyncio
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from urllib.parse import unquote, urlparse
|
||||
@@ -15,10 +14,8 @@ from urllib.parse import unquote, urlparse
|
||||
from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
|
||||
|
||||
from reflector.db.transcripts import SourceKind, TranscriptTopic, transcripts_controller
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.logger import logger
|
||||
from reflector.pipelines.main_file_pipeline import (
|
||||
task_pipeline_file_process as task_pipeline_file_process,
|
||||
)
|
||||
from reflector.pipelines.main_live_pipeline import pipeline_post as live_pipeline_post
|
||||
from reflector.pipelines.main_live_pipeline import (
|
||||
pipeline_process as live_pipeline_process,
|
||||
@@ -237,29 +234,22 @@ async def process_live_pipeline(
|
||||
# assert documented behaviour: after process, the pipeline isn't ended. this is the reason of calling pipeline_post
|
||||
assert pre_final_transcript.status != "ended"
|
||||
|
||||
# at this point, diarization is running but we have no access to it. run diarization in parallel - one will hopefully win after polling
|
||||
result = live_pipeline_post(transcript_id=transcript_id)
|
||||
|
||||
# result.ready() blocks even without await; it mutates result also
|
||||
while not result.ready():
|
||||
print(f"Status: {result.state}")
|
||||
time.sleep(2)
|
||||
# Trigger post-processing via Hatchet (fire-and-forget)
|
||||
await live_pipeline_post(transcript_id=transcript_id)
|
||||
print("Live post-processing pipeline triggered via Hatchet", file=sys.stderr)
|
||||
|
||||
|
||||
async def process_file_pipeline(
|
||||
transcript_id: TranscriptId,
|
||||
):
|
||||
"""Process audio/video file using the optimized file pipeline"""
|
||||
"""Process audio/video file using the optimized file pipeline via Hatchet"""
|
||||
|
||||
# task_pipeline_file_process is a Celery task, need to use .delay() for async execution
|
||||
result = task_pipeline_file_process.delay(transcript_id=transcript_id)
|
||||
|
||||
# Wait for the Celery task to complete
|
||||
while not result.ready():
|
||||
print(f"File pipeline status: {result.state}", file=sys.stderr)
|
||||
time.sleep(2)
|
||||
|
||||
logger.info("File pipeline processing complete")
|
||||
await HatchetClientManager.start_workflow(
|
||||
"FilePipeline",
|
||||
{"transcript_id": str(transcript_id)},
|
||||
additional_metadata={"transcript_id": str(transcript_id)},
|
||||
)
|
||||
print("File pipeline triggered via Hatchet", file=sys.stderr)
|
||||
|
||||
|
||||
async def process(
|
||||
@@ -293,7 +283,16 @@ async def process(
|
||||
|
||||
await handler(transcript_id)
|
||||
|
||||
await extract_result_from_entry(transcript_id, output_path)
|
||||
if pipeline == "file":
|
||||
# File pipeline is async via Hatchet — results not available immediately.
|
||||
# Use reflector.tools.process_transcript with --sync for polling.
|
||||
print(
|
||||
f"File pipeline dispatched for transcript {transcript_id}. "
|
||||
f"Results will be available once the Hatchet workflow completes.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
else:
|
||||
await extract_result_from_entry(transcript_id, output_path)
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
@@ -11,10 +11,8 @@ Usage:
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
from typing import Callable
|
||||
|
||||
from celery.result import AsyncResult
|
||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||
|
||||
import reflector._warnings_filter # noqa: F401 -- side effect: suppress pydantic validate_default warning
|
||||
@@ -39,7 +37,7 @@ async def process_transcript_inner(
|
||||
on_validation: Callable[[ValidationResult], None],
|
||||
on_preprocess: Callable[[PrepareResult], None],
|
||||
force: bool = False,
|
||||
) -> AsyncResult | None:
|
||||
) -> None:
|
||||
validation = await validate_transcript_for_processing(transcript)
|
||||
on_validation(validation)
|
||||
config = await prepare_transcript_processing(validation)
|
||||
@@ -87,56 +85,39 @@ async def process_transcript(
|
||||
elif isinstance(config, FileProcessingConfig):
|
||||
print(f"Dispatching file pipeline", file=sys.stderr)
|
||||
|
||||
result = await process_transcript_inner(
|
||||
await process_transcript_inner(
|
||||
transcript,
|
||||
on_validation=on_validation,
|
||||
on_preprocess=on_preprocess,
|
||||
force=force,
|
||||
)
|
||||
|
||||
if result is None:
|
||||
# Hatchet workflow dispatched
|
||||
if sync:
|
||||
# Re-fetch transcript to get workflow_run_id
|
||||
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||
if not transcript or not transcript.workflow_run_id:
|
||||
print("Error: workflow_run_id not found", file=sys.stderr)
|
||||
if sync:
|
||||
# Re-fetch transcript to get workflow_run_id
|
||||
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||
if not transcript or not transcript.workflow_run_id:
|
||||
print("Error: workflow_run_id not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("Waiting for Hatchet workflow...", file=sys.stderr)
|
||||
while True:
|
||||
status = await HatchetClientManager.get_workflow_run_status(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
print(f" Status: {status.value}", file=sys.stderr)
|
||||
|
||||
if status == V1TaskStatus.COMPLETED:
|
||||
print("Workflow completed successfully", file=sys.stderr)
|
||||
break
|
||||
elif status in (V1TaskStatus.FAILED, V1TaskStatus.CANCELLED):
|
||||
print(f"Workflow failed: {status}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("Waiting for Hatchet workflow...", file=sys.stderr)
|
||||
while True:
|
||||
status = await HatchetClientManager.get_workflow_run_status(
|
||||
transcript.workflow_run_id
|
||||
)
|
||||
print(f" Status: {status.value}", file=sys.stderr)
|
||||
|
||||
if status == V1TaskStatus.COMPLETED:
|
||||
print("Workflow completed successfully", file=sys.stderr)
|
||||
break
|
||||
elif status in (V1TaskStatus.FAILED, V1TaskStatus.CANCELLED):
|
||||
print(f"Workflow failed: {status}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
print(
|
||||
"Task dispatched (use --sync to wait for completion)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
elif sync:
|
||||
print("Waiting for task completion...", file=sys.stderr)
|
||||
while not result.ready():
|
||||
print(f" Status: {result.state}", file=sys.stderr)
|
||||
time.sleep(5)
|
||||
|
||||
if result.successful():
|
||||
print("Task completed successfully", file=sys.stderr)
|
||||
else:
|
||||
print(f"Task failed: {result.result}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
print(
|
||||
"Task dispatched (use --sync to wait for completion)", file=sys.stderr
|
||||
"Task dispatched (use --sync to wait for completion)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
finally:
|
||||
|
||||
412
server/reflector/tools/render_hatchet_run.py
Normal file
412
server/reflector/tools/render_hatchet_run.py
Normal file
@@ -0,0 +1,412 @@
|
||||
"""
|
||||
Render Hatchet workflow runs as text DAG.
|
||||
|
||||
Usage:
|
||||
# Show latest 5 runs (summary table)
|
||||
uv run -m reflector.tools.render_hatchet_run
|
||||
|
||||
# Show specific run with full DAG + task details
|
||||
uv run -m reflector.tools.render_hatchet_run <workflow_run_id>
|
||||
|
||||
# Drill into Nth run from the list (1-indexed)
|
||||
uv run -m reflector.tools.render_hatchet_run --show 1
|
||||
|
||||
# Show latest N runs
|
||||
uv run -m reflector.tools.render_hatchet_run --last 10
|
||||
|
||||
# Filter by status
|
||||
uv run -m reflector.tools.render_hatchet_run --status FAILED
|
||||
uv run -m reflector.tools.render_hatchet_run --status RUNNING
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from hatchet_sdk.clients.rest.models import (
|
||||
V1TaskEvent,
|
||||
V1TaskStatus,
|
||||
V1TaskSummary,
|
||||
V1WorkflowRunDetails,
|
||||
WorkflowRunShapeItemForWorkflowRunDetails,
|
||||
)
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
|
||||
STATUS_ICON = {
|
||||
V1TaskStatus.COMPLETED: "\u2705",
|
||||
V1TaskStatus.RUNNING: "\u23f3",
|
||||
V1TaskStatus.FAILED: "\u274c",
|
||||
V1TaskStatus.QUEUED: "\u23f8\ufe0f",
|
||||
V1TaskStatus.CANCELLED: "\u26a0\ufe0f",
|
||||
}
|
||||
|
||||
STATUS_LABEL = {
|
||||
V1TaskStatus.COMPLETED: "Complete",
|
||||
V1TaskStatus.RUNNING: "Running",
|
||||
V1TaskStatus.FAILED: "FAILED",
|
||||
V1TaskStatus.QUEUED: "Queued",
|
||||
V1TaskStatus.CANCELLED: "Cancelled",
|
||||
}
|
||||
|
||||
|
||||
def _fmt_time(dt: datetime | None) -> str:
|
||||
if dt is None:
|
||||
return "-"
|
||||
return dt.strftime("%H:%M:%S")
|
||||
|
||||
|
||||
def _fmt_duration(ms: int | None) -> str:
|
||||
if ms is None:
|
||||
return "-"
|
||||
secs = ms / 1000
|
||||
if secs < 60:
|
||||
return f"{secs:.1f}s"
|
||||
mins = secs / 60
|
||||
return f"{mins:.1f}m"
|
||||
|
||||
|
||||
def _fmt_status_line(task: V1TaskSummary) -> str:
|
||||
"""Format a status line like: Complete (finished 20:31:44)"""
|
||||
label = STATUS_LABEL.get(task.status, task.status.value)
|
||||
icon = STATUS_ICON.get(task.status, "?")
|
||||
|
||||
if task.status == V1TaskStatus.COMPLETED and task.finished_at:
|
||||
return f"{icon} {label} (finished {_fmt_time(task.finished_at)})"
|
||||
elif task.status == V1TaskStatus.RUNNING and task.started_at:
|
||||
parts = [f"started {_fmt_time(task.started_at)}"]
|
||||
if task.duration:
|
||||
parts.append(f"{_fmt_duration(task.duration)} elapsed")
|
||||
return f"{icon} {label} ({', '.join(parts)})"
|
||||
elif task.status == V1TaskStatus.FAILED and task.finished_at:
|
||||
return f"{icon} {label} (failed {_fmt_time(task.finished_at)})"
|
||||
elif task.status == V1TaskStatus.CANCELLED:
|
||||
return f"{icon} {label}"
|
||||
elif task.status == V1TaskStatus.QUEUED:
|
||||
return f"{icon} {label}"
|
||||
return f"{icon} {label}"
|
||||
|
||||
|
||||
def _topo_sort(
|
||||
shape: list[WorkflowRunShapeItemForWorkflowRunDetails],
|
||||
) -> list[str]:
|
||||
"""Topological sort of step_ids from shape DAG."""
|
||||
step_ids = {s.step_id for s in shape}
|
||||
children_map: dict[str, list[str]] = {}
|
||||
in_degree: dict[str, int] = {sid: 0 for sid in step_ids}
|
||||
|
||||
for s in shape:
|
||||
children = [c for c in (s.children_step_ids or []) if c in step_ids]
|
||||
children_map[s.step_id] = children
|
||||
for c in children:
|
||||
in_degree[c] += 1
|
||||
|
||||
queue = sorted(sid for sid, deg in in_degree.items() if deg == 0)
|
||||
result: list[str] = []
|
||||
while queue:
|
||||
node = queue.pop(0)
|
||||
result.append(node)
|
||||
for c in children_map.get(node, []):
|
||||
in_degree[c] -= 1
|
||||
if in_degree[c] == 0:
|
||||
queue.append(c)
|
||||
queue.sort()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def render_run_detail(details: V1WorkflowRunDetails) -> str:
|
||||
"""Render a single workflow run as markdown DAG with task details."""
|
||||
shape = details.shape or []
|
||||
tasks = details.tasks or []
|
||||
events = details.task_events or []
|
||||
run = details.run
|
||||
|
||||
if not shape:
|
||||
return f"Run {run.metadata.id}: {run.status.value} (no shape data)"
|
||||
|
||||
# Build lookups
|
||||
step_to_shape: dict[str, WorkflowRunShapeItemForWorkflowRunDetails] = {
|
||||
s.step_id: s for s in shape
|
||||
}
|
||||
step_to_name: dict[str, str] = {s.step_id: s.task_name for s in shape}
|
||||
|
||||
# Reverse edges (parents)
|
||||
parents: dict[str, list[str]] = {s.step_id: [] for s in shape}
|
||||
for s in shape:
|
||||
for child_id in s.children_step_ids or []:
|
||||
if child_id in parents:
|
||||
parents[child_id].append(s.step_id)
|
||||
|
||||
# Join tasks by step_id
|
||||
task_by_step: dict[str, V1TaskSummary] = {}
|
||||
for t in tasks:
|
||||
if t.step_id and t.step_id in step_to_name:
|
||||
task_by_step[t.step_id] = t
|
||||
|
||||
# Events indexed by task_external_id
|
||||
events_by_task: dict[str, list[V1TaskEvent]] = defaultdict(list)
|
||||
for ev in events:
|
||||
events_by_task[ev.task_id].append(ev)
|
||||
|
||||
ordered = _topo_sort(shape)
|
||||
|
||||
lines: list[str] = []
|
||||
|
||||
# Run header
|
||||
run_icon = STATUS_ICON.get(run.status, "?")
|
||||
run_name = run.display_name or run.workflow_id
|
||||
dur = _fmt_duration(run.duration)
|
||||
lines.append(f"**{run_name}** {run_icon} {dur}")
|
||||
lines.append(f"ID: `{run.metadata.id}`")
|
||||
if run.additional_metadata:
|
||||
meta_parts = [f"{k}=`{v}`" for k, v in run.additional_metadata.items()]
|
||||
lines.append(f"Meta: {', '.join(meta_parts)}")
|
||||
if run.error_message:
|
||||
# Take first line of error only for header
|
||||
first_line = run.error_message.split("\n")[0]
|
||||
lines.append(f"Error: {first_line}")
|
||||
lines.append("")
|
||||
|
||||
# DAG Status Overview table (collapsible)
|
||||
lines.append("```spoiler DAG Status Overview")
|
||||
lines.append("| Node | Status | Duration | Dependencies |")
|
||||
lines.append("|------|--------|----------|--------------|")
|
||||
|
||||
for step_id in ordered:
|
||||
s = step_to_shape[step_id]
|
||||
t = task_by_step.get(step_id)
|
||||
name = step_to_name[step_id]
|
||||
icon = STATUS_ICON.get(t.status, "?") if t else "?"
|
||||
dur = _fmt_duration(t.duration) if t else "-"
|
||||
|
||||
parent_names = [step_to_name[p] for p in parents[step_id]]
|
||||
child_names = [
|
||||
step_to_name[c] for c in (s.children_step_ids or []) if c in step_to_name
|
||||
]
|
||||
deps_left = ", ".join(parent_names) if parent_names else ""
|
||||
deps_right = ", ".join(child_names) if child_names else ""
|
||||
if deps_left and deps_right:
|
||||
deps = f"{deps_left} \u2192 {deps_right}"
|
||||
elif deps_right:
|
||||
deps = f"\u2192 {deps_right}"
|
||||
elif deps_left:
|
||||
deps = f"{deps_left} \u2192"
|
||||
else:
|
||||
deps = "-"
|
||||
|
||||
lines.append(f"| {name} | {icon} | {dur} | {deps} |")
|
||||
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
|
||||
# Node details (collapsible)
|
||||
lines.append("```spoiler Node Details")
|
||||
for step_id in ordered:
|
||||
t = task_by_step.get(step_id)
|
||||
name = step_to_name[step_id]
|
||||
|
||||
if not t:
|
||||
lines.append(f"**\U0001f4e6 {name}**")
|
||||
lines.append("Status: no task data")
|
||||
lines.append("")
|
||||
continue
|
||||
|
||||
lines.append(f"**\U0001f4e6 {name}**")
|
||||
lines.append(f"Status: {_fmt_status_line(t)}")
|
||||
|
||||
if t.duration:
|
||||
lines.append(f"Duration: {_fmt_duration(t.duration)}")
|
||||
if t.retry_count and t.retry_count > 0:
|
||||
lines.append(f"Retries: {t.retry_count}")
|
||||
|
||||
# Fan-out children
|
||||
if t.num_spawned_children and t.num_spawned_children > 0:
|
||||
children = t.children or []
|
||||
completed = sum(1 for c in children if c.status == V1TaskStatus.COMPLETED)
|
||||
failed = sum(1 for c in children if c.status == V1TaskStatus.FAILED)
|
||||
running = sum(1 for c in children if c.status == V1TaskStatus.RUNNING)
|
||||
lines.append(
|
||||
f"Spawned children: {completed}/{t.num_spawned_children} done"
|
||||
f"{f', {running} running' if running else ''}"
|
||||
f"{f', {failed} failed' if failed else ''}"
|
||||
)
|
||||
|
||||
# Error message (first meaningful line only, full trace in events)
|
||||
if t.error_message:
|
||||
err_lines = t.error_message.strip().split("\n")
|
||||
# Find first non-empty, non-traceback line
|
||||
err_summary = err_lines[0]
|
||||
for line in err_lines:
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith(
|
||||
("Traceback", "File ", "{", ")")
|
||||
):
|
||||
err_summary = stripped
|
||||
break
|
||||
lines.append(f"Error: `{err_summary}`")
|
||||
|
||||
# Events log
|
||||
task_events = sorted(
|
||||
events_by_task.get(t.task_external_id, []),
|
||||
key=lambda e: e.timestamp,
|
||||
)
|
||||
if task_events:
|
||||
lines.append("Events:")
|
||||
for ev in task_events:
|
||||
ts = ev.timestamp.strftime("%H:%M:%S")
|
||||
ev_icon = ""
|
||||
if ev.event_type.value == "FINISHED":
|
||||
ev_icon = "\u2705 "
|
||||
elif ev.event_type.value in ("FAILED", "TIMED_OUT"):
|
||||
ev_icon = "\u274c "
|
||||
elif ev.event_type.value == "STARTED":
|
||||
ev_icon = "\u25b6\ufe0f "
|
||||
elif ev.event_type.value == "RETRYING":
|
||||
ev_icon = "\U0001f504 "
|
||||
elif ev.event_type.value == "CANCELLED":
|
||||
ev_icon = "\u26a0\ufe0f "
|
||||
|
||||
msg = ev.message.strip()
|
||||
if ev.error_message:
|
||||
# Just first line of error in event log
|
||||
err_first = ev.error_message.strip().split("\n")[0]
|
||||
if msg:
|
||||
msg += f" | {err_first}"
|
||||
else:
|
||||
msg = err_first
|
||||
|
||||
if msg:
|
||||
lines.append(f" `{ts}` {ev_icon}{ev.event_type.value}: {msg}")
|
||||
else:
|
||||
lines.append(f" `{ts}` {ev_icon}{ev.event_type.value}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
lines.append("```")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def render_run_summary(idx: int, run: V1TaskSummary) -> str:
|
||||
"""One-line summary for a run in the list view."""
|
||||
icon = STATUS_ICON.get(run.status, "?")
|
||||
name = run.display_name or run.workflow_name or "?"
|
||||
run_id = run.workflow_run_external_id or "?"
|
||||
dur = _fmt_duration(run.duration)
|
||||
started = _fmt_time(run.started_at)
|
||||
meta = ""
|
||||
if run.additional_metadata:
|
||||
meta_parts = [f"{k}=`{v}`" for k, v in run.additional_metadata.items()]
|
||||
meta = f" ({', '.join(meta_parts)})"
|
||||
return (
|
||||
f" {idx}. {icon} **{name}** started={started} dur={dur}{meta}\n"
|
||||
f" `{run_id}`"
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_run_list(
|
||||
count: int = 5,
|
||||
statuses: list[V1TaskStatus] | None = None,
|
||||
) -> list[V1TaskSummary]:
|
||||
client = HatchetClientManager.get_client()
|
||||
since = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
runs = await client.runs.aio_list(
|
||||
since=since,
|
||||
statuses=statuses,
|
||||
limit=count,
|
||||
)
|
||||
return runs.rows or []
|
||||
|
||||
|
||||
async def list_recent_runs(
|
||||
count: int = 5,
|
||||
statuses: list[V1TaskStatus] | None = None,
|
||||
) -> str:
|
||||
"""List recent workflow runs as text."""
|
||||
rows = await _fetch_run_list(count, statuses)
|
||||
|
||||
if not rows:
|
||||
return "No runs found in the last 7 days."
|
||||
|
||||
lines = [f"Recent runs ({len(rows)}):", ""]
|
||||
for i, run in enumerate(rows, 1):
|
||||
lines.append(render_run_summary(i, run))
|
||||
|
||||
lines.append("")
|
||||
lines.append("Use `--show N` to see full DAG for run N")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def show_run(workflow_run_id: str) -> str:
|
||||
"""Fetch and render a single run."""
|
||||
client = HatchetClientManager.get_client()
|
||||
details = await client.runs.aio_get(workflow_run_id)
|
||||
return render_run_detail(details)
|
||||
|
||||
|
||||
async def show_nth_run(
|
||||
n: int,
|
||||
count: int = 5,
|
||||
statuses: list[V1TaskStatus] | None = None,
|
||||
) -> str:
|
||||
"""Fetch list, then drill into Nth run."""
|
||||
rows = await _fetch_run_list(count, statuses)
|
||||
|
||||
if not rows:
|
||||
return "No runs found in the last 7 days."
|
||||
if n < 1 or n > len(rows):
|
||||
return f"Invalid index {n}. Have {len(rows)} runs (1-{len(rows)})."
|
||||
|
||||
run = rows[n - 1]
|
||||
return await show_run(run.workflow_run_external_id)
|
||||
|
||||
|
||||
async def main_async(args: argparse.Namespace) -> None:
|
||||
statuses = [V1TaskStatus(args.status)] if args.status else None
|
||||
|
||||
if args.run_id:
|
||||
output = await show_run(args.run_id)
|
||||
elif args.show is not None:
|
||||
output = await show_nth_run(args.show, count=args.last, statuses=statuses)
|
||||
else:
|
||||
output = await list_recent_runs(count=args.last, statuses=statuses)
|
||||
|
||||
print(output)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Render Hatchet workflow runs as text DAG"
|
||||
)
|
||||
parser.add_argument(
|
||||
"run_id",
|
||||
nargs="?",
|
||||
default=None,
|
||||
help="Workflow run ID to show in detail. If omitted, lists recent runs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--show",
|
||||
type=int,
|
||||
default=None,
|
||||
metavar="N",
|
||||
help="Show full DAG for the Nth run in the list (1-indexed)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--last",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of recent runs to list (default: 5)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--status",
|
||||
choices=["QUEUED", "RUNNING", "COMPLETED", "FAILED", "CANCELLED"],
|
||||
help="Filter by status",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
asyncio.run(main_async(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
20
server/reflector/views/config.py
Normal file
20
server/reflector/views/config.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.email import is_email_configured
|
||||
from reflector.settings import settings
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ConfigResponse(BaseModel):
|
||||
zulip_enabled: bool
|
||||
email_enabled: bool
|
||||
|
||||
|
||||
@router.get("/config", response_model=ConfigResponse)
|
||||
async def get_config():
|
||||
return ConfigResponse(
|
||||
zulip_enabled=bool(settings.ZULIP_REALM),
|
||||
email_enabled=is_email_configured(),
|
||||
)
|
||||
@@ -4,7 +4,7 @@ from typing import Annotated, Any, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, EmailStr
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.dailyco_api import RecordingType
|
||||
@@ -89,14 +89,16 @@ class StartRecordingRequest(BaseModel):
|
||||
|
||||
@router.post("/meetings/{meeting_id}/recordings/start")
|
||||
async def start_recording(
|
||||
meeting_id: NonEmptyString, body: StartRecordingRequest
|
||||
meeting_id: NonEmptyString,
|
||||
body: StartRecordingRequest,
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
],
|
||||
) -> dict[str, Any]:
|
||||
"""Start cloud or raw-tracks recording via Daily.co REST API.
|
||||
|
||||
Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation of allowing only 1 recording at a time.
|
||||
Uses different instanceIds for cloud vs raw-tracks (same won't work)
|
||||
|
||||
Note: No authentication required - anonymous users supported. TODO this is a DOS vector
|
||||
"""
|
||||
meeting = await meetings_controller.get_by_id(meeting_id)
|
||||
if not meeting:
|
||||
@@ -149,3 +151,25 @@ async def start_recording(
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to start recording: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
class AddEmailRecipientRequest(BaseModel):
|
||||
email: EmailStr
|
||||
|
||||
|
||||
@router.post("/meetings/{meeting_id}/email-recipient")
|
||||
async def add_email_recipient(
|
||||
meeting_id: str,
|
||||
request: AddEmailRecipientRequest,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
):
|
||||
"""Add an email address to receive the transcript link when processing completes."""
|
||||
meeting = await meetings_controller.get_by_id(meeting_id)
|
||||
if not meeting:
|
||||
raise HTTPException(status_code=404, detail="Meeting not found")
|
||||
|
||||
recipients = await meetings_controller.add_email_recipient(
|
||||
meeting_id, request.email
|
||||
)
|
||||
|
||||
return {"status": "success", "email_recipients": recipients}
|
||||
|
||||
@@ -17,7 +17,6 @@ from reflector.db.rooms import rooms_controller
|
||||
from reflector.redis_cache import RedisAsyncLock
|
||||
from reflector.schemas.platform import Platform
|
||||
from reflector.services.ics_sync import ics_sync_service
|
||||
from reflector.settings import settings
|
||||
from reflector.utils.url import add_query_param
|
||||
from reflector.video_platforms.factory import create_platform_client
|
||||
from reflector.worker.webhook import test_webhook
|
||||
@@ -45,6 +44,7 @@ class Room(BaseModel):
|
||||
ics_last_etag: Optional[str] = None
|
||||
platform: Platform
|
||||
skip_consent: bool = False
|
||||
email_transcript_to: str | None = None
|
||||
|
||||
|
||||
class RoomDetails(Room):
|
||||
@@ -94,6 +94,7 @@ class CreateRoom(BaseModel):
|
||||
ics_enabled: bool = False
|
||||
platform: Platform
|
||||
skip_consent: bool = False
|
||||
email_transcript_to: str | None = None
|
||||
|
||||
|
||||
class UpdateRoom(BaseModel):
|
||||
@@ -113,6 +114,7 @@ class UpdateRoom(BaseModel):
|
||||
ics_enabled: Optional[bool] = None
|
||||
platform: Optional[Platform] = None
|
||||
skip_consent: Optional[bool] = None
|
||||
email_transcript_to: Optional[str] = None
|
||||
|
||||
|
||||
class CreateRoomMeeting(BaseModel):
|
||||
@@ -178,11 +180,10 @@ router = APIRouter()
|
||||
|
||||
@router.get("/rooms", response_model=Page[RoomDetails])
|
||||
async def rooms_list(
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
],
|
||||
) -> list[RoomDetails]:
|
||||
if not user and not settings.PUBLIC_MODE:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
user_id = user["sub"] if user else None
|
||||
|
||||
paginated = await apaginate(
|
||||
@@ -255,6 +256,7 @@ async def rooms_create(
|
||||
ics_enabled=room.ics_enabled,
|
||||
platform=room.platform,
|
||||
skip_consent=room.skip_consent,
|
||||
email_transcript_to=room.email_transcript_to,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from pydantic import (
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.rooms import rooms_controller
|
||||
from reflector.db.search import (
|
||||
@@ -39,6 +40,7 @@ from reflector.db.transcripts import (
|
||||
transcripts_controller,
|
||||
)
|
||||
from reflector.db.users import user_controller
|
||||
from reflector.email import is_email_configured, send_transcript_email
|
||||
from reflector.processors.types import Transcript as ProcessorTranscript
|
||||
from reflector.processors.types import Word
|
||||
from reflector.schemas.transcript_formats import TranscriptFormat, TranscriptSegment
|
||||
@@ -112,6 +114,8 @@ class GetTranscriptMinimal(BaseModel):
|
||||
room_name: str | None = None
|
||||
audio_deleted: bool | None = None
|
||||
change_seq: int | None = None
|
||||
has_cloud_video: bool = False
|
||||
cloud_video_duration: int | None = None
|
||||
|
||||
|
||||
class TranscriptParticipantWithEmail(TranscriptParticipant):
|
||||
@@ -263,16 +267,15 @@ class SearchResponse(BaseModel):
|
||||
|
||||
@router.get("/transcripts", response_model=Page[GetTranscriptMinimal])
|
||||
async def transcripts_list(
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
],
|
||||
source_kind: SourceKind | None = None,
|
||||
room_id: str | None = None,
|
||||
search_term: str | None = None,
|
||||
change_seq_from: int | None = None,
|
||||
sort_by: Literal["created_at", "change_seq"] | None = None,
|
||||
):
|
||||
if not user and not settings.PUBLIC_MODE:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
user_id = user["sub"] if user else None
|
||||
|
||||
# Default behavior preserved: sort_by=None → "-created_at"
|
||||
@@ -307,13 +310,10 @@ async def transcripts_search(
|
||||
from_datetime: SearchFromDatetimeParam = None,
|
||||
to_datetime: SearchToDatetimeParam = None,
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional)
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
] = None,
|
||||
):
|
||||
"""Full-text search across transcript titles and content."""
|
||||
if not user and not settings.PUBLIC_MODE:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
|
||||
user_id = user["sub"] if user else None
|
||||
|
||||
if from_datetime and to_datetime and from_datetime > to_datetime:
|
||||
@@ -346,7 +346,9 @@ async def transcripts_search(
|
||||
@router.post("/transcripts", response_model=GetTranscriptWithParticipants)
|
||||
async def transcripts_create(
|
||||
info: CreateTranscript,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
],
|
||||
):
|
||||
user_id = user["sub"] if user else None
|
||||
transcript = await transcripts_controller.add(
|
||||
@@ -503,6 +505,14 @@ async def transcript_get(
|
||||
)
|
||||
)
|
||||
|
||||
has_cloud_video = False
|
||||
cloud_video_duration = None
|
||||
if transcript.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
has_cloud_video = True
|
||||
cloud_video_duration = meeting.daily_composed_video_duration
|
||||
|
||||
base_data = {
|
||||
"id": transcript.id,
|
||||
"user_id": transcript.user_id,
|
||||
@@ -526,6 +536,8 @@ async def transcript_get(
|
||||
"audio_deleted": transcript.audio_deleted,
|
||||
"change_seq": transcript.change_seq,
|
||||
"participants": participants,
|
||||
"has_cloud_video": has_cloud_video,
|
||||
"cloud_video_duration": cloud_video_duration,
|
||||
}
|
||||
|
||||
if transcript_format == "text":
|
||||
@@ -688,8 +700,6 @@ async def transcript_post_to_zulip(
|
||||
)
|
||||
if not transcript:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
if not transcripts_controller.user_can_mutate(transcript, user_id):
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
content = get_zulip_message(transcript, include_topics)
|
||||
|
||||
message_updated = False
|
||||
@@ -707,3 +717,29 @@ async def transcript_post_to_zulip(
|
||||
await transcripts_controller.update(
|
||||
transcript, {"zulip_message_id": response["id"]}
|
||||
)
|
||||
|
||||
|
||||
class SendEmailRequest(BaseModel):
|
||||
email: str
|
||||
|
||||
|
||||
class SendEmailResponse(BaseModel):
|
||||
sent: int
|
||||
|
||||
|
||||
@router.post("/transcripts/{transcript_id}/email", response_model=SendEmailResponse)
|
||||
async def transcript_send_email(
|
||||
transcript_id: str,
|
||||
request: SendEmailRequest,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
):
|
||||
if not is_email_configured():
|
||||
raise HTTPException(status_code=400, detail="Email not configured")
|
||||
user_id = user["sub"] if user else None
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if not transcript:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
sent = await send_transcript_email([request.email], transcript)
|
||||
return SendEmailResponse(sent=sent)
|
||||
|
||||
@@ -53,9 +53,22 @@ async def transcript_get_audio_mp3(
|
||||
else:
|
||||
user_id = token_user["sub"]
|
||||
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if not user_id and not token:
|
||||
# No authentication provided at all. Only anonymous transcripts
|
||||
# (user_id=None) are accessible without auth, to preserve
|
||||
# pipeline access via _generate_local_audio_link().
|
||||
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||
if not transcript or transcript.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
if transcript.user_id is not None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Authentication required",
|
||||
)
|
||||
else:
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
|
||||
if transcript.audio_location == "storage":
|
||||
# proxy S3 file, to prevent issue with CORS
|
||||
@@ -94,16 +107,16 @@ async def transcript_get_audio_mp3(
|
||||
request,
|
||||
transcript.audio_mp3_filename,
|
||||
content_type="audio/mpeg",
|
||||
content_disposition=f"attachment; filename={filename}",
|
||||
content_disposition=f"inline; filename={filename}",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/transcripts/{transcript_id}/audio/waveform")
|
||||
async def transcript_get_audio_waveform(
|
||||
transcript_id: str,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
user: Annotated[auth.UserInfo, Depends(auth.current_user)],
|
||||
) -> AudioWaveform:
|
||||
user_id = user["sub"] if user else None
|
||||
user_id = user["sub"]
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
|
||||
169
server/reflector/views/transcripts_download.py
Normal file
169
server/reflector/views/transcripts_download.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""
|
||||
Transcript download endpoint — generates a zip archive with all transcript files.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.recordings import recordings_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.logger import logger
|
||||
from reflector.storage import get_source_storage, get_transcripts_storage
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/transcripts/{transcript_id}/download/zip",
|
||||
operation_id="transcript_download_zip",
|
||||
)
|
||||
async def transcript_download_zip(
|
||||
transcript_id: str,
|
||||
user: Annotated[auth.UserInfo, Depends(auth.current_user)],
|
||||
):
|
||||
user_id = user["sub"]
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if not transcripts_controller.user_can_mutate(transcript, user_id):
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
|
||||
recording = None
|
||||
if transcript.recording_id:
|
||||
recording = await recordings_controller.get_by_id(transcript.recording_id)
|
||||
|
||||
meeting = None
|
||||
if transcript.meeting_id:
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
|
||||
truncated_id = str(transcript.id).split("-")[0]
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
zip_path = os.path.join(tmpdir, f"transcript_{truncated_id}.zip")
|
||||
|
||||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
# Transcript audio
|
||||
if transcript.audio_location == "storage" and not transcript.audio_deleted:
|
||||
try:
|
||||
storage = get_transcripts_storage()
|
||||
data = await storage.get_file(transcript.storage_audio_path)
|
||||
audio_path = os.path.join(tmpdir, "audio.mp3")
|
||||
with open(audio_path, "wb") as f:
|
||||
f.write(data)
|
||||
zf.write(audio_path, "audio.mp3")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to download transcript audio for zip",
|
||||
exc_info=e,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
elif (
|
||||
not transcript.audio_deleted
|
||||
and hasattr(transcript, "audio_mp3_filename")
|
||||
and transcript.audio_mp3_filename
|
||||
and transcript.audio_mp3_filename.exists()
|
||||
):
|
||||
zf.write(str(transcript.audio_mp3_filename), "audio.mp3")
|
||||
|
||||
# Recording tracks (multitrack)
|
||||
if recording and recording.track_keys:
|
||||
try:
|
||||
source_storage = get_source_storage(
|
||||
"daily" if recording.track_keys else None
|
||||
)
|
||||
except Exception:
|
||||
source_storage = get_transcripts_storage()
|
||||
|
||||
for i, key in enumerate(recording.track_keys):
|
||||
try:
|
||||
data = await source_storage.get_file(
|
||||
key, bucket=recording.bucket_name
|
||||
)
|
||||
filename = os.path.basename(key) or f"track_{i}"
|
||||
track_path = os.path.join(tmpdir, f"track_{i}")
|
||||
with open(track_path, "wb") as f:
|
||||
f.write(data)
|
||||
zf.write(track_path, f"tracks/{filename}")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to download track for zip",
|
||||
exc_info=e,
|
||||
track_key=key,
|
||||
)
|
||||
|
||||
# Cloud video
|
||||
if meeting and meeting.daily_composed_video_s3_key:
|
||||
try:
|
||||
source_storage = get_source_storage("daily")
|
||||
data = await source_storage.get_file(
|
||||
meeting.daily_composed_video_s3_key
|
||||
)
|
||||
video_path = os.path.join(tmpdir, "cloud_video.mp4")
|
||||
with open(video_path, "wb") as f:
|
||||
f.write(data)
|
||||
zf.write(video_path, "cloud_video.mp4")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to download cloud video for zip",
|
||||
exc_info=e,
|
||||
s3_key=meeting.daily_composed_video_s3_key,
|
||||
)
|
||||
|
||||
# Metadata JSON
|
||||
metadata = {
|
||||
"id": transcript.id,
|
||||
"title": transcript.title,
|
||||
"created_at": (
|
||||
transcript.created_at.isoformat() if transcript.created_at else None
|
||||
),
|
||||
"duration": transcript.duration,
|
||||
"source_language": transcript.source_language,
|
||||
"target_language": transcript.target_language,
|
||||
"short_summary": transcript.short_summary,
|
||||
"long_summary": transcript.long_summary,
|
||||
"topics": (
|
||||
[t.model_dump() for t in transcript.topics]
|
||||
if transcript.topics
|
||||
else []
|
||||
),
|
||||
"participants": (
|
||||
[p.model_dump() for p in transcript.participants]
|
||||
if transcript.participants
|
||||
else []
|
||||
),
|
||||
"action_items": transcript.action_items,
|
||||
"webvtt": transcript.webvtt,
|
||||
"recording_id": transcript.recording_id,
|
||||
"meeting_id": transcript.meeting_id,
|
||||
}
|
||||
meta_path = os.path.join(tmpdir, "metadata.json")
|
||||
with open(meta_path, "w") as f:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
zf.write(meta_path, "metadata.json")
|
||||
|
||||
# Read zip into memory before tmpdir is cleaned up
|
||||
with open(zip_path, "rb") as f:
|
||||
zip_bytes = f.read()
|
||||
|
||||
def iter_zip():
|
||||
offset = 0
|
||||
chunk_size = 64 * 1024
|
||||
while offset < len(zip_bytes):
|
||||
yield zip_bytes[offset : offset + chunk_size]
|
||||
offset += chunk_size
|
||||
|
||||
return StreamingResponse(
|
||||
iter_zip(),
|
||||
media_type="application/zip",
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename=transcript_{truncated_id}.zip"
|
||||
},
|
||||
)
|
||||
@@ -62,8 +62,7 @@ async def transcript_add_participant(
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||
|
||||
# ensure the speaker is unique
|
||||
if participant.speaker is not None and transcript.participants is not None:
|
||||
@@ -109,8 +108,7 @@ async def transcript_update_participant(
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||
|
||||
# ensure the speaker is unique
|
||||
for p in transcript.participants:
|
||||
@@ -148,7 +146,6 @@ async def transcript_delete_participant(
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||
await transcripts_controller.delete_participant(transcript, participant_id)
|
||||
return DeletionStatus(status="ok")
|
||||
|
||||
@@ -26,7 +26,9 @@ class ProcessStatus(BaseModel):
|
||||
@router.post("/transcripts/{transcript_id}/process")
|
||||
async def transcript_process(
|
||||
transcript_id: str,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
],
|
||||
) -> ProcessStatus:
|
||||
user_id = user["sub"] if user else None
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
@@ -50,8 +52,5 @@ async def transcript_process(
|
||||
if isinstance(config, ProcessError):
|
||||
raise HTTPException(status_code=500, detail=config.detail)
|
||||
else:
|
||||
# When transcript is in error state, force a new workflow instead of replaying
|
||||
# (replay would re-run from failure point with same conditions and likely fail again)
|
||||
force = transcript.status == "error"
|
||||
await dispatch_transcript_processing(config, force=force)
|
||||
await dispatch_transcript_processing(config, force=True)
|
||||
return ProcessStatus(status="ok")
|
||||
|
||||
@@ -41,8 +41,7 @@ async def transcript_assign_speaker(
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||
|
||||
if not transcript:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
@@ -121,8 +120,7 @@ async def transcript_merge_speaker(
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
||||
raise HTTPException(status_code=403, detail="Not authorized")
|
||||
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||
|
||||
if not transcript:
|
||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||
|
||||
@@ -6,7 +6,7 @@ from pydantic import BaseModel
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -21,7 +21,9 @@ async def transcript_record_upload(
|
||||
chunk_number: int,
|
||||
total_chunks: int,
|
||||
chunk: UploadFile,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
],
|
||||
):
|
||||
user_id = user["sub"] if user else None
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
@@ -93,7 +95,14 @@ async def transcript_record_upload(
|
||||
transcript, {"status": "uploaded", "source_kind": SourceKind.FILE}
|
||||
)
|
||||
|
||||
# launch a background task to process the file
|
||||
task_pipeline_file_process.delay(transcript_id=transcript_id)
|
||||
# launch Hatchet workflow to process the file
|
||||
workflow_id = await HatchetClientManager.start_workflow(
|
||||
"FilePipeline",
|
||||
{"transcript_id": str(transcript_id)},
|
||||
additional_metadata={"transcript_id": str(transcript_id)},
|
||||
)
|
||||
|
||||
# Save workflow_run_id for duplicate detection and status polling
|
||||
await transcripts_controller.update(transcript, {"workflow_run_id": workflow_id})
|
||||
|
||||
return UploadStatus(status="ok")
|
||||
|
||||
60
server/reflector/views/transcripts_video.py
Normal file
60
server/reflector/views/transcripts_video.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
Transcript cloud video endpoint — returns a presigned URL for streaming playback.
|
||||
"""
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
import reflector.auth as auth
|
||||
from reflector.db.meetings import meetings_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.storage import get_source_storage
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class VideoUrlResponse(BaseModel):
|
||||
url: str
|
||||
duration: int | None = None
|
||||
content_type: str = "video/mp4"
|
||||
|
||||
|
||||
@router.get(
|
||||
"/transcripts/{transcript_id}/video/url",
|
||||
operation_id="transcript_get_video_url",
|
||||
response_model=VideoUrlResponse,
|
||||
)
|
||||
async def transcript_get_video_url(
|
||||
transcript_id: str,
|
||||
user: Annotated[auth.UserInfo, Depends(auth.current_user)],
|
||||
):
|
||||
user_id = user["sub"]
|
||||
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
transcript_id, user_id=user_id
|
||||
)
|
||||
|
||||
if not transcript.meeting_id:
|
||||
raise HTTPException(status_code=404, detail="No video available")
|
||||
|
||||
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||
if not meeting or not meeting.daily_composed_video_s3_key:
|
||||
raise HTTPException(status_code=404, detail="No video available")
|
||||
|
||||
source_storage = get_source_storage("daily")
|
||||
url = await source_storage.get_file_url(
|
||||
meeting.daily_composed_video_s3_key,
|
||||
operation="get_object",
|
||||
expires_in=900,
|
||||
extra_params={
|
||||
"ResponseContentDisposition": "inline",
|
||||
"ResponseContentType": "video/mp4",
|
||||
},
|
||||
)
|
||||
|
||||
return VideoUrlResponse(
|
||||
url=url,
|
||||
duration=meeting.daily_composed_video_duration,
|
||||
)
|
||||
@@ -15,7 +15,9 @@ async def transcript_record_webrtc(
|
||||
transcript_id: str,
|
||||
params: RtcOffer,
|
||||
request: Request,
|
||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||
user: Annotated[
|
||||
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||
],
|
||||
):
|
||||
user_id = user["sub"] if user else None
|
||||
transcript = await transcripts_controller.get_by_id_for_http(
|
||||
|
||||
@@ -90,7 +90,9 @@ async def cleanup_old_transcripts(
|
||||
):
|
||||
"""Delete old anonymous transcripts and their associated recordings/meetings."""
|
||||
query = transcripts.select().where(
|
||||
(transcripts.c.created_at < cutoff_date) & (transcripts.c.user_id.is_(None))
|
||||
(transcripts.c.created_at < cutoff_date)
|
||||
& (transcripts.c.user_id.is_(None))
|
||||
& (transcripts.c.deleted_at.is_(None))
|
||||
)
|
||||
old_transcripts = await db.fetch_all(query)
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@ from reflector.db.transcripts import (
|
||||
transcripts_controller,
|
||||
)
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||
from reflector.pipelines.main_live_pipeline import asynctask
|
||||
from reflector.pipelines.topic_processing import EmptyPipeline
|
||||
from reflector.processors import AudioFileWriterProcessor
|
||||
@@ -105,6 +104,12 @@ async def process_recording(bucket_name: str, object_key: str):
|
||||
room = await rooms_controller.get_by_id(meeting.room_id)
|
||||
|
||||
recording = await recordings_controller.get_by_object_key(bucket_name, object_key)
|
||||
if recording and recording.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted recording",
|
||||
recording_id=recording.id,
|
||||
)
|
||||
return
|
||||
if not recording:
|
||||
recording = await recordings_controller.create(
|
||||
Recording(
|
||||
@@ -116,6 +121,13 @@ async def process_recording(bucket_name: str, object_key: str):
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
||||
if transcript and transcript.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted transcript for recording",
|
||||
recording_id=recording.id,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
return
|
||||
if transcript:
|
||||
await transcripts_controller.update(
|
||||
transcript,
|
||||
@@ -132,7 +144,7 @@ async def process_recording(bucket_name: str, object_key: str):
|
||||
target_language="en",
|
||||
user_id=room.user_id,
|
||||
recording_id=recording.id,
|
||||
share_mode="public",
|
||||
share_mode="semi-private",
|
||||
meeting_id=meeting.id,
|
||||
room_id=room.id,
|
||||
)
|
||||
@@ -163,7 +175,14 @@ async def process_recording(bucket_name: str, object_key: str):
|
||||
|
||||
await transcripts_controller.update(transcript, {"status": "uploaded"})
|
||||
|
||||
task_pipeline_file_process.delay(transcript_id=transcript.id)
|
||||
await HatchetClientManager.start_workflow(
|
||||
"FilePipeline",
|
||||
{
|
||||
"transcript_id": str(transcript.id),
|
||||
"room_id": str(room.id) if room else None,
|
||||
},
|
||||
additional_metadata={"transcript_id": str(transcript.id)},
|
||||
)
|
||||
|
||||
|
||||
@shared_task
|
||||
@@ -256,6 +275,13 @@ async def _process_multitrack_recording_inner(
|
||||
# Check if recording already exists (reprocessing path)
|
||||
recording = await recordings_controller.get_by_id(recording_id)
|
||||
|
||||
if recording and recording.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted recording",
|
||||
recording_id=recording_id,
|
||||
)
|
||||
return
|
||||
|
||||
if recording and recording.meeting_id:
|
||||
# Reprocessing: recording exists with meeting already linked
|
||||
meeting = await meetings_controller.get_by_id(recording.meeting_id)
|
||||
@@ -335,6 +361,13 @@ async def _process_multitrack_recording_inner(
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_recording_id(recording.id)
|
||||
if transcript and transcript.deleted_at is not None:
|
||||
logger.info(
|
||||
"Skipping soft-deleted transcript for recording",
|
||||
recording_id=recording.id,
|
||||
transcript_id=transcript.id,
|
||||
)
|
||||
return
|
||||
if not transcript:
|
||||
transcript = await transcripts_controller.add(
|
||||
"",
|
||||
@@ -343,7 +376,7 @@ async def _process_multitrack_recording_inner(
|
||||
target_language="en",
|
||||
user_id=room.user_id,
|
||||
recording_id=recording.id,
|
||||
share_mode="public",
|
||||
share_mode="semi-private",
|
||||
meeting_id=meeting.id,
|
||||
room_id=room.id,
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -437,6 +437,8 @@ async def ws_manager_in_memory(monkeypatch):
|
||||
|
||||
try:
|
||||
fastapi_app.dependency_overrides[auth.current_user_optional] = lambda: None
|
||||
# current_user_optional_if_public_mode is NOT overridden here so the real
|
||||
# implementation runs and enforces the PUBLIC_MODE check during tests.
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -491,37 +493,39 @@ async def authenticated_client2():
|
||||
@asynccontextmanager
|
||||
async def authenticated_client_ctx():
|
||||
from reflector.app import app
|
||||
from reflector.auth import current_user, current_user_optional
|
||||
from reflector.auth import (
|
||||
current_user,
|
||||
current_user_optional,
|
||||
current_user_optional_if_public_mode,
|
||||
)
|
||||
|
||||
app.dependency_overrides[current_user] = lambda: {
|
||||
"sub": "randomuserid",
|
||||
"email": "test@mail.com",
|
||||
}
|
||||
app.dependency_overrides[current_user_optional] = lambda: {
|
||||
"sub": "randomuserid",
|
||||
"email": "test@mail.com",
|
||||
}
|
||||
_user = lambda: {"sub": "randomuserid", "email": "test@mail.com"}
|
||||
app.dependency_overrides[current_user] = _user
|
||||
app.dependency_overrides[current_user_optional] = _user
|
||||
app.dependency_overrides[current_user_optional_if_public_mode] = _user
|
||||
yield
|
||||
del app.dependency_overrides[current_user]
|
||||
del app.dependency_overrides[current_user_optional]
|
||||
del app.dependency_overrides[current_user_optional_if_public_mode]
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def authenticated_client2_ctx():
|
||||
from reflector.app import app
|
||||
from reflector.auth import current_user, current_user_optional
|
||||
from reflector.auth import (
|
||||
current_user,
|
||||
current_user_optional,
|
||||
current_user_optional_if_public_mode,
|
||||
)
|
||||
|
||||
app.dependency_overrides[current_user] = lambda: {
|
||||
"sub": "randomuserid2",
|
||||
"email": "test@mail.com",
|
||||
}
|
||||
app.dependency_overrides[current_user_optional] = lambda: {
|
||||
"sub": "randomuserid2",
|
||||
"email": "test@mail.com",
|
||||
}
|
||||
_user = lambda: {"sub": "randomuserid2", "email": "test@mail.com"}
|
||||
app.dependency_overrides[current_user] = _user
|
||||
app.dependency_overrides[current_user_optional] = _user
|
||||
app.dependency_overrides[current_user_optional_if_public_mode] = _user
|
||||
yield
|
||||
del app.dependency_overrides[current_user]
|
||||
del app.dependency_overrides[current_user_optional]
|
||||
del app.dependency_overrides[current_user_optional_if_public_mode]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -534,23 +538,64 @@ def fake_mp3_upload():
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_hatchet_client():
|
||||
"""Reset HatchetClientManager singleton before and after each test.
|
||||
def mock_hatchet_client():
|
||||
"""Mock HatchetClientManager for all tests.
|
||||
|
||||
This ensures test isolation - each test starts with a fresh client state.
|
||||
The fixture is autouse=True so it applies to all tests automatically.
|
||||
Prevents tests from connecting to a real Hatchet server. The dummy token
|
||||
in [tool.pytest_env] prevents the import-time ValueError, but the SDK
|
||||
would still try to connect when get_client() is called. This fixture
|
||||
mocks get_client to return a MagicMock and start_workflow to return a
|
||||
dummy workflow ID.
|
||||
"""
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
|
||||
# Reset before test
|
||||
HatchetClientManager.reset()
|
||||
yield
|
||||
# Reset after test to clean up
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.workflow.return_value = MagicMock()
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
HatchetClientManager,
|
||||
"get_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch.object(
|
||||
HatchetClientManager,
|
||||
"start_workflow",
|
||||
new_callable=AsyncMock,
|
||||
return_value="mock-workflow-id",
|
||||
),
|
||||
patch.object(
|
||||
HatchetClientManager,
|
||||
"get_workflow_run_status",
|
||||
new_callable=AsyncMock,
|
||||
return_value=None,
|
||||
),
|
||||
patch.object(
|
||||
HatchetClientManager,
|
||||
"can_replay",
|
||||
new_callable=AsyncMock,
|
||||
return_value=False,
|
||||
),
|
||||
patch.object(
|
||||
HatchetClientManager,
|
||||
"cancel_workflow",
|
||||
new_callable=AsyncMock,
|
||||
),
|
||||
patch.object(
|
||||
HatchetClientManager,
|
||||
"replay_workflow",
|
||||
new_callable=AsyncMock,
|
||||
),
|
||||
):
|
||||
yield mock_client
|
||||
|
||||
HatchetClientManager.reset()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def fake_transcript_with_topics(tmpdir, client):
|
||||
async def fake_transcript_with_topics(tmpdir, client, monkeypatch):
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
@@ -559,6 +604,9 @@ async def fake_transcript_with_topics(tmpdir, client):
|
||||
from reflector.settings import settings
|
||||
from reflector.views.transcripts import transcripts_controller
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
settings.DATA_DIR = Path(tmpdir)
|
||||
|
||||
# create a transcript
|
||||
|
||||
234
server/tests/docker-compose.integration.yml
Normal file
234
server/tests/docker-compose.integration.yml
Normal file
@@ -0,0 +1,234 @@
|
||||
# Integration test stack — full pipeline end-to-end.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f server/tests/docker-compose.integration.yml up -d --build
|
||||
#
|
||||
# Requires .env.integration in the repo root (generated by CI workflow).
|
||||
|
||||
x-backend-env: &backend-env
|
||||
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
REDIS_HOST: redis
|
||||
CELERY_BROKER_URL: redis://redis:6379/1
|
||||
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
||||
HATCHET_CLIENT_TOKEN: ${HATCHET_CLIENT_TOKEN:-}
|
||||
HATCHET_CLIENT_SERVER_URL: http://hatchet:8888
|
||||
HATCHET_CLIENT_HOST_PORT: hatchet:7077
|
||||
HATCHET_CLIENT_TLS_STRATEGY: none
|
||||
# ML backends — CPU-only, no external services
|
||||
TRANSCRIPT_BACKEND: whisper
|
||||
WHISPER_CHUNK_MODEL: tiny
|
||||
WHISPER_FILE_MODEL: tiny
|
||||
DIARIZATION_BACKEND: pyannote
|
||||
TRANSLATION_BACKEND: passthrough
|
||||
# Storage — local Garage S3
|
||||
TRANSCRIPT_STORAGE_BACKEND: aws
|
||||
TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL: http://garage:3900
|
||||
TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: reflector-media
|
||||
TRANSCRIPT_STORAGE_AWS_REGION: garage
|
||||
# Daily mock
|
||||
DAILY_API_URL: http://mock-daily:8080/v1
|
||||
DAILY_API_KEY: fake-daily-key
|
||||
# Auth
|
||||
PUBLIC_MODE: "true"
|
||||
AUTH_BACKEND: none
|
||||
# LLM (injected from CI)
|
||||
LLM_URL: ${LLM_URL:-}
|
||||
LLM_API_KEY: ${LLM_API_KEY:-}
|
||||
LLM_MODEL: ${LLM_MODEL:-gpt-4o-mini}
|
||||
# HuggingFace (for pyannote gated models)
|
||||
HF_TOKEN: ${HF_TOKEN:-}
|
||||
# Garage S3 credentials — hardcoded test keys, containers are ephemeral
|
||||
TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: GK0123456789abcdef01234567 # gitleaks:allow
|
||||
TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" # gitleaks:allow
|
||||
# Email / SMTP — Mailpit captures emails without sending
|
||||
SMTP_HOST: mailpit
|
||||
SMTP_PORT: "1025"
|
||||
SMTP_FROM_EMAIL: test@reflector.local
|
||||
SMTP_USE_TLS: "false"
|
||||
# NOTE: DAILYCO_STORAGE_AWS_* intentionally NOT set — forces fallback to
|
||||
# get_transcripts_storage() which has ENDPOINT_URL pointing at Garage.
|
||||
# Setting them would bypass the endpoint and generate presigned URLs for AWS.
|
||||
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:17-alpine
|
||||
command: ["postgres", "-c", "max_connections=200"]
|
||||
environment:
|
||||
POSTGRES_USER: reflector
|
||||
POSTGRES_PASSWORD: reflector
|
||||
POSTGRES_DB: reflector
|
||||
volumes:
|
||||
- ../../server/docker/init-hatchet-db.sql:/docker-entrypoint-initdb.d/init-hatchet-db.sql:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U reflector"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
hatchet:
|
||||
image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: "postgresql://reflector:reflector@postgres:5432/hatchet?sslmode=disable&connect_timeout=30"
|
||||
SERVER_AUTH_COOKIE_INSECURE: "t"
|
||||
SERVER_AUTH_COOKIE_DOMAIN: "localhost"
|
||||
SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
|
||||
SERVER_GRPC_INSECURE: "t"
|
||||
SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||
SERVER_GRPC_PORT: "7077"
|
||||
SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
|
||||
SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 15
|
||||
start_period: 30s
|
||||
|
||||
garage:
|
||||
image: dxflrs/garage:v1.1.0
|
||||
volumes:
|
||||
- ./integration/garage.toml:/etc/garage.toml:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "/garage", "stats"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
start_period: 5s
|
||||
|
||||
mailpit:
|
||||
image: axllent/mailpit:latest
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8025/api/v1/messages"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
mock-daily:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: integration/Dockerfile.mock-daily
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/v1/recordings/test')"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
server:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: server
|
||||
WEBRTC_HOST: server
|
||||
WEBRTC_PORT_RANGE: "52000-52100"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
garage:
|
||||
condition: service_healthy
|
||||
mock-daily:
|
||||
condition: service_healthy
|
||||
mailpit:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
worker:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: worker
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
hatchet-worker-cpu:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: hatchet-worker-cpu
|
||||
depends_on:
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
hatchet-worker-llm:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
ENTRYPOINT: hatchet-worker-llm
|
||||
depends_on:
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
|
||||
test-runner:
|
||||
build:
|
||||
context: ../../server
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
<<: *backend-env
|
||||
# Override DATABASE_URL for sync driver (used by direct DB access in tests)
|
||||
DATABASE_URL_ASYNC: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
DATABASE_URL: postgresql+asyncpg://reflector:reflector@postgres:5432/reflector
|
||||
SERVER_URL: http://server:1250
|
||||
GARAGE_ENDPOINT: http://garage:3900
|
||||
MAILPIT_URL: http://mailpit:8025
|
||||
depends_on:
|
||||
server:
|
||||
condition: service_started
|
||||
worker:
|
||||
condition: service_started
|
||||
hatchet-worker-cpu:
|
||||
condition: service_started
|
||||
hatchet-worker-llm:
|
||||
condition: service_started
|
||||
volumes:
|
||||
- server_data:/app/data
|
||||
# Mount test files into the container
|
||||
- ./records:/app/tests/records:ro
|
||||
- ./integration:/app/tests/integration:ro
|
||||
entrypoint: ["sleep", "infinity"]
|
||||
|
||||
volumes:
|
||||
server_data:
|
||||
|
||||
networks:
|
||||
default:
|
||||
attachable: true
|
||||
9
server/tests/integration/Dockerfile.mock-daily
Normal file
9
server/tests/integration/Dockerfile.mock-daily
Normal file
@@ -0,0 +1,9 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
RUN pip install --no-cache-dir fastapi uvicorn[standard]
|
||||
|
||||
WORKDIR /app
|
||||
COPY integration/mock_daily_server.py /app/mock_daily_server.py
|
||||
|
||||
EXPOSE 8080
|
||||
CMD ["uvicorn", "mock_daily_server:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
0
server/tests/integration/__init__.py
Normal file
0
server/tests/integration/__init__.py
Normal file
158
server/tests/integration/conftest.py
Normal file
158
server/tests/integration/conftest.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
Integration test fixtures — no mocks, real services.
|
||||
|
||||
All services (PostgreSQL, Redis, Hatchet, Garage, server, workers) are
|
||||
expected to be running via docker-compose.integration.yml.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
import httpx
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
|
||||
GARAGE_ENDPOINT = os.environ.get("GARAGE_ENDPOINT", "http://garage:3900")
|
||||
MAILPIT_URL = os.environ.get("MAILPIT_URL", "http://mailpit:8025")
|
||||
DATABASE_URL = os.environ.get(
|
||||
"DATABASE_URL_ASYNC",
|
||||
os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql+asyncpg://reflector:reflector@postgres:5432/reflector",
|
||||
),
|
||||
)
|
||||
GARAGE_KEY_ID = os.environ.get("TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID", "")
|
||||
GARAGE_KEY_SECRET = os.environ.get("TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY", "")
|
||||
BUCKET_NAME = "reflector-media"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def api_client():
|
||||
"""HTTP client pointed at the running server."""
|
||||
async with httpx.AsyncClient(
|
||||
base_url=f"{SERVER_URL}/v1",
|
||||
timeout=httpx.Timeout(30.0),
|
||||
) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def s3_client():
|
||||
"""Boto3 S3 client pointed at Garage."""
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=GARAGE_ENDPOINT,
|
||||
aws_access_key_id=GARAGE_KEY_ID,
|
||||
aws_secret_access_key=GARAGE_KEY_SECRET,
|
||||
region_name="garage",
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def db_engine():
|
||||
"""SQLAlchemy async engine for direct DB operations."""
|
||||
engine = create_async_engine(DATABASE_URL)
|
||||
yield engine
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_records_dir():
|
||||
"""Path to the test audio files directory."""
|
||||
return Path(__file__).parent.parent / "records"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def bucket_name():
|
||||
"""S3 bucket name used for integration tests."""
|
||||
return BUCKET_NAME
|
||||
|
||||
|
||||
async def _poll_transcript_status(
|
||||
client: httpx.AsyncClient,
|
||||
transcript_id: str,
|
||||
target: str | tuple[str, ...],
|
||||
error: str = "error",
|
||||
max_wait: int = 300,
|
||||
interval: int = 3,
|
||||
) -> dict:
|
||||
"""
|
||||
Poll GET /transcripts/{id} until status matches target or error.
|
||||
|
||||
target can be a single status string or a tuple of acceptable statuses.
|
||||
Returns the transcript dict on success, raises on timeout or error status.
|
||||
"""
|
||||
targets = (target,) if isinstance(target, str) else target
|
||||
elapsed = 0
|
||||
status = None
|
||||
while elapsed < max_wait:
|
||||
resp = await client.get(f"/transcripts/{transcript_id}")
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
status = data.get("status")
|
||||
|
||||
if status in targets:
|
||||
return data
|
||||
if status == error:
|
||||
raise AssertionError(
|
||||
f"Transcript {transcript_id} reached error status: {data}"
|
||||
)
|
||||
|
||||
await asyncio.sleep(interval)
|
||||
elapsed += interval
|
||||
|
||||
raise TimeoutError(
|
||||
f"Transcript {transcript_id} did not reach status '{target}' "
|
||||
f"within {max_wait}s (last status: {status})"
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
def poll_transcript_status():
|
||||
"""Returns the poll_transcript_status async helper function."""
|
||||
return _poll_transcript_status
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def mailpit_client():
|
||||
"""HTTP client for Mailpit API — query captured emails."""
|
||||
async with httpx.AsyncClient(
|
||||
base_url=MAILPIT_URL,
|
||||
timeout=httpx.Timeout(10.0),
|
||||
) as client:
|
||||
# Clear inbox before each test
|
||||
await client.delete("/api/v1/messages")
|
||||
yield client
|
||||
|
||||
|
||||
async def _poll_mailpit_messages(
|
||||
mailpit: httpx.AsyncClient,
|
||||
to_email: str,
|
||||
max_wait: int = 30,
|
||||
interval: int = 2,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Poll Mailpit API until at least one message is delivered to the given address.
|
||||
Returns the list of matching messages.
|
||||
"""
|
||||
elapsed = 0
|
||||
while elapsed < max_wait:
|
||||
resp = await mailpit.get("/api/v1/messages", params={"query": f"to:{to_email}"})
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
messages = data.get("messages", [])
|
||||
if messages:
|
||||
return messages
|
||||
await asyncio.sleep(interval)
|
||||
elapsed += interval
|
||||
raise TimeoutError(f"No email delivered to {to_email} within {max_wait}s")
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
def poll_mailpit_messages():
|
||||
"""Returns the poll_mailpit_messages async helper function."""
|
||||
return _poll_mailpit_messages
|
||||
14
server/tests/integration/garage.toml
Normal file
14
server/tests/integration/garage.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
metadata_dir = "/var/lib/garage/meta"
|
||||
data_dir = "/var/lib/garage/data"
|
||||
replication_factor = 1
|
||||
|
||||
rpc_secret = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789" # gitleaks:allow
|
||||
rpc_bind_addr = "[::]:3901"
|
||||
|
||||
[s3_api]
|
||||
api_bind_addr = "[::]:3900"
|
||||
s3_region = "garage"
|
||||
root_domain = ".s3.garage.localhost"
|
||||
|
||||
[admin]
|
||||
api_bind_addr = "[::]:3903"
|
||||
62
server/tests/integration/garage_setup.sh
Executable file
62
server/tests/integration/garage_setup.sh
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Initialize Garage bucket and keys for integration tests.
|
||||
# Run inside the Garage container after it's healthy.
|
||||
#
|
||||
# Outputs KEY_ID and KEY_SECRET to stdout (last two lines).
|
||||
#
|
||||
# Note: uses /bin/sh (not bash) since the Garage container is minimal.
|
||||
#
|
||||
set -eu
|
||||
|
||||
echo "Waiting for Garage to be ready..."
|
||||
i=0
|
||||
while [ "$i" -lt 30 ]; do
|
||||
if /garage stats >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
# Layout setup
|
||||
NODE_ID=$(/garage node id -q | tr -d '[:space:]')
|
||||
LAYOUT_STATUS=$(/garage layout show 2>&1 || true)
|
||||
if echo "$LAYOUT_STATUS" | grep -q "No nodes"; then
|
||||
/garage layout assign "$NODE_ID" -c 1G -z dc1
|
||||
/garage layout apply --version 1
|
||||
echo "Layout applied."
|
||||
else
|
||||
echo "Layout already configured."
|
||||
fi
|
||||
|
||||
# Bucket
|
||||
if ! /garage bucket info reflector-media >/dev/null 2>&1; then
|
||||
/garage bucket create reflector-media
|
||||
echo "Bucket 'reflector-media' created."
|
||||
else
|
||||
echo "Bucket 'reflector-media' already exists."
|
||||
fi
|
||||
|
||||
# Key
|
||||
if /garage key info reflector-test >/dev/null 2>&1; then
|
||||
echo "Key 'reflector-test' already exists."
|
||||
KEY_OUTPUT=$(/garage key info reflector-test 2>&1)
|
||||
else
|
||||
KEY_OUTPUT=$(/garage key create reflector-test 2>&1)
|
||||
echo "Key 'reflector-test' created."
|
||||
fi
|
||||
|
||||
# Permissions
|
||||
/garage bucket allow reflector-media --read --write --key reflector-test
|
||||
|
||||
# Extract key ID and secret from output using POSIX-compatible parsing
|
||||
# garage key output format:
|
||||
# Key name: reflector-test
|
||||
# Key ID: GK...
|
||||
# Secret key: ...
|
||||
KEY_ID=$(echo "$KEY_OUTPUT" | grep "Key ID" | sed 's/.*Key ID: *//')
|
||||
KEY_SECRET=$(echo "$KEY_OUTPUT" | grep "Secret key" | sed 's/.*Secret key: *//')
|
||||
|
||||
echo "GARAGE_KEY_ID=${KEY_ID}"
|
||||
echo "GARAGE_KEY_SECRET=${KEY_SECRET}"
|
||||
75
server/tests/integration/mock_daily_server.py
Normal file
75
server/tests/integration/mock_daily_server.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
Minimal FastAPI mock for Daily.co API.
|
||||
|
||||
Serves canned responses for:
|
||||
- GET /v1/recordings/{recording_id}
|
||||
- GET /v1/meetings/{meeting_id}/participants
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
app = FastAPI(title="Mock Daily API")
|
||||
|
||||
|
||||
# Participant UUIDs must be 36-char hex UUIDs to match Daily's filename format
|
||||
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||
|
||||
# Daily-format track keys: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
|
||||
TRACK_KEYS = [
|
||||
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
|
||||
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
|
||||
]
|
||||
|
||||
|
||||
@app.get("/v1/recordings/{recording_id}")
|
||||
async def get_recording(recording_id: str):
|
||||
return {
|
||||
"id": recording_id,
|
||||
"room_name": "integration-test-room",
|
||||
"start_ts": 1700000000,
|
||||
"type": "raw-tracks",
|
||||
"status": "finished",
|
||||
"max_participants": 2,
|
||||
"duration": 5,
|
||||
"share_token": None,
|
||||
"s3": {
|
||||
"bucket_name": "reflector-media",
|
||||
"bucket_region": "garage",
|
||||
"key": None,
|
||||
"endpoint": None,
|
||||
},
|
||||
"s3key": None,
|
||||
"tracks": [
|
||||
{"type": "audio", "s3Key": key, "size": 100000} for key in TRACK_KEYS
|
||||
],
|
||||
"mtgSessionId": "mock-mtg-session-id",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/v1/meetings/{meeting_id}/participants")
|
||||
async def get_meeting_participants(meeting_id: str):
|
||||
return {
|
||||
"data": [
|
||||
{
|
||||
"user_id": "user-a",
|
||||
"participant_id": PARTICIPANT_A_ID,
|
||||
"user_name": "Speaker A",
|
||||
"join_time": 1700000000,
|
||||
"duration": 300,
|
||||
},
|
||||
{
|
||||
"user_id": "user-b",
|
||||
"participant_id": PARTICIPANT_B_ID,
|
||||
"user_name": "Speaker B",
|
||||
"join_time": 1700000010,
|
||||
"duration": 290,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8080)
|
||||
61
server/tests/integration/test_file_pipeline.py
Normal file
61
server/tests/integration/test_file_pipeline.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Integration test: File upload → FilePipeline → full processing.
|
||||
|
||||
Exercises: upload endpoint → Hatchet FilePipeline → whisper transcription →
|
||||
pyannote diarization → LLM summarization/topics → status "ended".
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_pipeline_end_to_end(
|
||||
api_client, test_records_dir, poll_transcript_status
|
||||
):
|
||||
"""Upload a WAV file and verify the full pipeline completes."""
|
||||
# 1. Create transcript
|
||||
resp = await api_client.post(
|
||||
"/transcripts",
|
||||
json={"name": "integration-file-test", "source_kind": "file"},
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||
transcript = resp.json()
|
||||
transcript_id = transcript["id"]
|
||||
|
||||
# 2. Upload audio file (single chunk)
|
||||
audio_path = test_records_dir / "test_short.wav"
|
||||
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||
|
||||
with open(audio_path, "rb") as f:
|
||||
resp = await api_client.post(
|
||||
f"/transcripts/{transcript_id}/record/upload",
|
||||
params={"chunk_number": 0, "total_chunks": 1},
|
||||
files={"chunk": ("test_short.wav", f, "audio/wav")},
|
||||
)
|
||||
assert resp.status_code == 200, f"Upload failed: {resp.text}"
|
||||
|
||||
# 3. Poll until pipeline completes
|
||||
data = await poll_transcript_status(
|
||||
api_client, transcript_id, target="ended", max_wait=300
|
||||
)
|
||||
|
||||
# 4. Assertions
|
||||
assert data["status"] == "ended"
|
||||
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||
assert (
|
||||
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||
), "Long summary should be non-empty"
|
||||
assert (
|
||||
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||
), "Short summary should be non-empty"
|
||||
|
||||
# Topics are served from a separate endpoint
|
||||
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||
topics = topics_resp.json()
|
||||
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||
for topic in topics:
|
||||
assert topic.get("title"), "Each topic should have a title"
|
||||
assert topic.get("summary"), "Each topic should have a summary"
|
||||
|
||||
assert data.get("duration", 0) > 0, "Duration should be positive"
|
||||
109
server/tests/integration/test_live_pipeline.py
Normal file
109
server/tests/integration/test_live_pipeline.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Integration test: WebRTC stream → LivePostProcessingPipeline → full processing.
|
||||
|
||||
Exercises: WebRTC SDP exchange → live audio streaming → connection close →
|
||||
Hatchet LivePostPipeline → whisper transcription → LLM summarization/topics → status "ended".
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from aiortc import RTCPeerConnection, RTCSessionDescription
|
||||
from aiortc.contrib.media import MediaPlayer
|
||||
|
||||
SERVER_URL = os.environ.get("SERVER_URL", "http://server:1250")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_live_pipeline_end_to_end(
|
||||
api_client, test_records_dir, poll_transcript_status
|
||||
):
|
||||
"""Stream audio via WebRTC and verify the full post-processing pipeline completes."""
|
||||
# 1. Create transcript
|
||||
resp = await api_client.post(
|
||||
"/transcripts",
|
||||
json={"name": "integration-live-test"},
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||
transcript = resp.json()
|
||||
transcript_id = transcript["id"]
|
||||
|
||||
# 2. Set up WebRTC peer connection with audio from test file
|
||||
audio_path = test_records_dir / "test_short.wav"
|
||||
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||
|
||||
pc = RTCPeerConnection()
|
||||
player = MediaPlayer(audio_path.as_posix())
|
||||
|
||||
# Add audio track
|
||||
audio_track = player.audio
|
||||
pc.addTrack(audio_track)
|
||||
|
||||
# Create data channel (server expects this for STOP command)
|
||||
channel = pc.createDataChannel("data-channel")
|
||||
|
||||
# 3. Generate SDP offer
|
||||
offer = await pc.createOffer()
|
||||
await pc.setLocalDescription(offer)
|
||||
|
||||
sdp_payload = {
|
||||
"sdp": pc.localDescription.sdp,
|
||||
"type": pc.localDescription.type,
|
||||
}
|
||||
|
||||
# 4. Send offer to server and get answer
|
||||
webrtc_url = f"{SERVER_URL}/v1/transcripts/{transcript_id}/record/webrtc"
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
|
||||
resp = await client.post(webrtc_url, json=sdp_payload)
|
||||
assert resp.status_code == 200, f"WebRTC offer failed: {resp.text}"
|
||||
|
||||
answer_data = resp.json()
|
||||
answer = RTCSessionDescription(sdp=answer_data["sdp"], type=answer_data["type"])
|
||||
await pc.setRemoteDescription(answer)
|
||||
|
||||
# 5. Wait for audio playback to finish
|
||||
max_stream_wait = 60
|
||||
elapsed = 0
|
||||
while elapsed < max_stream_wait:
|
||||
if audio_track.readyState == "ended":
|
||||
break
|
||||
await asyncio.sleep(0.5)
|
||||
elapsed += 0.5
|
||||
|
||||
# 6. Send STOP command and close connection
|
||||
try:
|
||||
channel.send(json.dumps({"cmd": "STOP"}))
|
||||
await asyncio.sleep(1)
|
||||
except Exception:
|
||||
pass # Channel may not be open if track ended quickly
|
||||
|
||||
await pc.close()
|
||||
|
||||
# 7. Poll until post-processing pipeline completes
|
||||
data = await poll_transcript_status(
|
||||
api_client, transcript_id, target="ended", max_wait=300
|
||||
)
|
||||
|
||||
# 8. Assertions
|
||||
assert data["status"] == "ended"
|
||||
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||
assert (
|
||||
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||
), "Long summary should be non-empty"
|
||||
assert (
|
||||
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||
), "Short summary should be non-empty"
|
||||
|
||||
# Topics are served from a separate endpoint
|
||||
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||
topics = topics_resp.json()
|
||||
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||
for topic in topics:
|
||||
assert topic.get("title"), "Each topic should have a title"
|
||||
assert topic.get("summary"), "Each topic should have a summary"
|
||||
|
||||
assert data.get("duration", 0) > 0, "Duration should be positive"
|
||||
181
server/tests/integration/test_multitrack_pipeline.py
Normal file
181
server/tests/integration/test_multitrack_pipeline.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
Integration test: Multitrack → DailyMultitrackPipeline → full processing.
|
||||
|
||||
Exercises: S3 upload → DB recording setup → process endpoint →
|
||||
Hatchet DiarizationPipeline → mock Daily API → whisper per-track transcription →
|
||||
diarization → mixdown → LLM summarization/topics → status "ended".
|
||||
Also tests email transcript notification via Mailpit SMTP sink.
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
|
||||
# Must match Daily's filename format: {recording_start_ts}-{participant_uuid}-cam-audio-{track_start_ts}
|
||||
# These UUIDs must match mock_daily_server.py participant IDs
|
||||
PARTICIPANT_A_ID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||
PARTICIPANT_B_ID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||
TRACK_KEYS = [
|
||||
f"1700000000000-{PARTICIPANT_A_ID}-cam-audio-1700000001000",
|
||||
f"1700000000000-{PARTICIPANT_B_ID}-cam-audio-1700000001000",
|
||||
]
|
||||
|
||||
|
||||
TEST_EMAIL = "integration-test@reflector.local"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multitrack_pipeline_end_to_end(
|
||||
api_client,
|
||||
s3_client,
|
||||
db_engine,
|
||||
test_records_dir,
|
||||
bucket_name,
|
||||
poll_transcript_status,
|
||||
mailpit_client,
|
||||
poll_mailpit_messages,
|
||||
):
|
||||
"""Set up multitrack recording in S3/DB and verify the full pipeline completes."""
|
||||
# 1. Upload test audio as two separate tracks to Garage S3
|
||||
audio_path = test_records_dir / "test_short.wav"
|
||||
assert audio_path.exists(), f"Test audio file not found: {audio_path}"
|
||||
|
||||
for track_key in TRACK_KEYS:
|
||||
s3_client.upload_file(
|
||||
str(audio_path),
|
||||
bucket_name,
|
||||
track_key,
|
||||
)
|
||||
|
||||
# 2. Create transcript via API
|
||||
resp = await api_client.post(
|
||||
"/transcripts",
|
||||
json={"name": "integration-multitrack-test"},
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to create transcript: {resp.text}"
|
||||
transcript = resp.json()
|
||||
transcript_id = transcript["id"]
|
||||
|
||||
# 3. Insert Meeting, Recording, and link to transcript via direct DB access
|
||||
recording_id = f"rec-integration-{transcript_id[:8]}"
|
||||
meeting_id = str(uuid.uuid4())
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
async with db_engine.begin() as conn:
|
||||
# Insert meeting with email_recipients for email notification test
|
||||
await conn.execute(
|
||||
text("""
|
||||
INSERT INTO meeting (
|
||||
id, room_name, room_url, host_room_url,
|
||||
start_date, end_date, platform, email_recipients
|
||||
)
|
||||
VALUES (
|
||||
:id, :room_name, :room_url, :host_room_url,
|
||||
:start_date, :end_date, :platform, CAST(:email_recipients AS json)
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"id": meeting_id,
|
||||
"room_name": "integration-test-room",
|
||||
"room_url": "https://test.daily.co/integration-test-room",
|
||||
"host_room_url": "https://test.daily.co/integration-test-room",
|
||||
"start_date": now,
|
||||
"end_date": now + timedelta(hours=1),
|
||||
"platform": "daily",
|
||||
"email_recipients": json.dumps([TEST_EMAIL]),
|
||||
},
|
||||
)
|
||||
|
||||
# Insert recording with track_keys, linked to meeting
|
||||
await conn.execute(
|
||||
text("""
|
||||
INSERT INTO recording (id, bucket_name, object_key, recorded_at, status, track_keys, meeting_id)
|
||||
VALUES (:id, :bucket_name, :object_key, :recorded_at, :status, CAST(:track_keys AS json), :meeting_id)
|
||||
"""),
|
||||
{
|
||||
"id": recording_id,
|
||||
"bucket_name": bucket_name,
|
||||
"object_key": TRACK_KEYS[0],
|
||||
"recorded_at": now,
|
||||
"status": "completed",
|
||||
"track_keys": json.dumps(TRACK_KEYS),
|
||||
"meeting_id": meeting_id,
|
||||
},
|
||||
)
|
||||
|
||||
# Link recording to transcript and set status to uploaded
|
||||
await conn.execute(
|
||||
text("""
|
||||
UPDATE transcript
|
||||
SET recording_id = :recording_id, status = 'uploaded'
|
||||
WHERE id = :transcript_id
|
||||
"""),
|
||||
{
|
||||
"recording_id": recording_id,
|
||||
"transcript_id": transcript_id,
|
||||
},
|
||||
)
|
||||
|
||||
# 4. Trigger processing via process endpoint
|
||||
resp = await api_client.post(f"/transcripts/{transcript_id}/process")
|
||||
assert resp.status_code == 200, f"Process trigger failed: {resp.text}"
|
||||
|
||||
# 5. Poll until pipeline completes
|
||||
# The pipeline will call mock-daily for get_recording and get_participants
|
||||
# Accept "error" too — non-critical steps like action_items may fail due to
|
||||
# LLM parsing flakiness while core results (transcript, summaries) still exist.
|
||||
data = await poll_transcript_status(
|
||||
api_client, transcript_id, target=("ended", "error"), max_wait=300
|
||||
)
|
||||
|
||||
# 6. Assertions — verify core pipeline results regardless of final status
|
||||
assert data.get("title") and len(data["title"]) > 0, "Title should be non-empty"
|
||||
assert (
|
||||
data.get("long_summary") and len(data["long_summary"]) > 0
|
||||
), "Long summary should be non-empty"
|
||||
assert (
|
||||
data.get("short_summary") and len(data["short_summary"]) > 0
|
||||
), "Short summary should be non-empty"
|
||||
|
||||
# Topics are served from a separate endpoint
|
||||
topics_resp = await api_client.get(f"/transcripts/{transcript_id}/topics")
|
||||
assert topics_resp.status_code == 200, f"Failed to get topics: {topics_resp.text}"
|
||||
topics = topics_resp.json()
|
||||
assert len(topics) >= 1, "Should have at least 1 topic"
|
||||
for topic in topics:
|
||||
assert topic.get("title"), "Each topic should have a title"
|
||||
assert topic.get("summary"), "Each topic should have a summary"
|
||||
|
||||
# Participants are served from a separate endpoint
|
||||
participants_resp = await api_client.get(
|
||||
f"/transcripts/{transcript_id}/participants"
|
||||
)
|
||||
assert (
|
||||
participants_resp.status_code == 200
|
||||
), f"Failed to get participants: {participants_resp.text}"
|
||||
participants = participants_resp.json()
|
||||
assert (
|
||||
len(participants) >= 2
|
||||
), f"Expected at least 2 speakers for multitrack, got {len(participants)}"
|
||||
|
||||
# 7. Verify email transcript notification
|
||||
# The send_email pipeline task should have:
|
||||
# a) Set the transcript to public share_mode
|
||||
# b) Sent an email to TEST_EMAIL via Mailpit
|
||||
transcript_resp = await api_client.get(f"/transcripts/{transcript_id}")
|
||||
transcript_resp.raise_for_status()
|
||||
transcript_data = transcript_resp.json()
|
||||
assert (
|
||||
transcript_data.get("share_mode") == "public"
|
||||
), "Transcript should be set to public when email recipients exist"
|
||||
|
||||
# Poll Mailpit for the delivered email (send_email task runs async after finalize)
|
||||
messages = await poll_mailpit_messages(mailpit_client, TEST_EMAIL, max_wait=30)
|
||||
assert len(messages) >= 1, "Should have received at least 1 email"
|
||||
email_msg = messages[0]
|
||||
assert (
|
||||
"Transcript Ready" in email_msg.get("Subject", "")
|
||||
), f"Email subject should contain 'Transcript Ready', got: {email_msg.get('Subject')}"
|
||||
17
server/tests/test_app.py
Normal file
17
server/tests/test_app.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Tests for app-level endpoints (root, not under /v1)."""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_endpoint_returns_healthy():
|
||||
"""GET /health returns 200 and {"status": "healthy"} for probes and CI."""
|
||||
from httpx import AsyncClient
|
||||
|
||||
from reflector.app import app
|
||||
|
||||
# Health is at app root, not under /v1
|
||||
async with AsyncClient(app=app, base_url="http://test") as root_client:
|
||||
response = await root_client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"status": "healthy"}
|
||||
@@ -76,8 +76,10 @@ async def test_cleanup_old_public_data_deletes_old_anonymous_transcripts():
|
||||
assert result["transcripts_deleted"] == 1
|
||||
assert result["errors"] == []
|
||||
|
||||
# Verify old anonymous transcript was deleted
|
||||
assert await transcripts_controller.get_by_id(old_transcript.id) is None
|
||||
# Verify old anonymous transcript was soft-deleted
|
||||
old = await transcripts_controller.get_by_id(old_transcript.id)
|
||||
assert old is not None
|
||||
assert old.deleted_at is not None
|
||||
|
||||
# Verify new anonymous transcript still exists
|
||||
assert await transcripts_controller.get_by_id(new_transcript.id) is not None
|
||||
@@ -150,15 +152,17 @@ async def test_cleanup_deletes_associated_meeting_and_recording():
|
||||
assert result["recordings_deleted"] == 1
|
||||
assert result["errors"] == []
|
||||
|
||||
# Verify transcript was deleted
|
||||
assert await transcripts_controller.get_by_id(old_transcript.id) is None
|
||||
# Verify transcript was soft-deleted
|
||||
old = await transcripts_controller.get_by_id(old_transcript.id)
|
||||
assert old is not None
|
||||
assert old.deleted_at is not None
|
||||
|
||||
# Verify meeting was deleted
|
||||
# Verify meeting was hard-deleted (cleanup deletes meetings directly)
|
||||
query = meetings.select().where(meetings.c.id == meeting_id)
|
||||
meeting_result = await get_database().fetch_one(query)
|
||||
assert meeting_result is None
|
||||
|
||||
# Verify recording was deleted
|
||||
# Verify recording was hard-deleted (cleanup deletes recordings directly)
|
||||
assert await recordings_controller.get_by_id(recording.id) is None
|
||||
|
||||
|
||||
|
||||
290
server/tests/test_failed_runs_monitor.py
Normal file
290
server/tests/test_failed_runs_monitor.py
Normal file
@@ -0,0 +1,290 @@
|
||||
"""
|
||||
Tests for FailedRunsMonitor Hatchet cron workflow.
|
||||
|
||||
Tests cover:
|
||||
- No Zulip message sent when no failures found
|
||||
- Messages sent for failed main pipeline runs
|
||||
- Child workflow failures filtered out
|
||||
- Errors in the monitor itself are caught and logged
|
||||
"""
|
||||
|
||||
from datetime import timezone
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||
|
||||
|
||||
def _make_task_summary(
|
||||
workflow_name: str,
|
||||
workflow_run_external_id: str = "run-123",
|
||||
status: V1TaskStatus = V1TaskStatus.FAILED,
|
||||
):
|
||||
"""Create a mock V1TaskSummary."""
|
||||
mock = MagicMock()
|
||||
mock.workflow_name = workflow_name
|
||||
mock.workflow_run_external_id = workflow_run_external_id
|
||||
mock.status = status
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestCheckFailedRuns:
|
||||
async def test_no_failures_sends_no_message(self):
|
||||
mock_result = MagicMock()
|
||||
mock_result.rows = []
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.runs.aio_list = AsyncMock(return_value=mock_result)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.send_message_to_zulip",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_send,
|
||||
):
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import (
|
||||
_check_failed_runs,
|
||||
)
|
||||
|
||||
result = await _check_failed_runs()
|
||||
|
||||
assert result["checked"] == 0
|
||||
assert result["reported"] == 0
|
||||
mock_send.assert_not_called()
|
||||
|
||||
async def test_reports_failed_main_pipeline_runs(self):
|
||||
failed_runs = [
|
||||
_make_task_summary("DiarizationPipeline", "run-1"),
|
||||
_make_task_summary("FilePipeline", "run-2"),
|
||||
]
|
||||
mock_result = MagicMock()
|
||||
mock_result.rows = failed_runs
|
||||
|
||||
mock_details = MagicMock()
|
||||
mock_client = MagicMock()
|
||||
mock_client.runs.aio_list = AsyncMock(return_value=mock_result)
|
||||
mock_client.runs.aio_get = AsyncMock(return_value=mock_details)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.render_run_detail",
|
||||
return_value="**rendered DAG**",
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.send_message_to_zulip",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"id": 1},
|
||||
) as mock_send,
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.settings"
|
||||
) as mock_settings,
|
||||
):
|
||||
mock_settings.ZULIP_DAG_STREAM = "dag-stream"
|
||||
mock_settings.ZULIP_DAG_TOPIC = "dag-topic"
|
||||
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import (
|
||||
_check_failed_runs,
|
||||
)
|
||||
|
||||
result = await _check_failed_runs()
|
||||
|
||||
assert result["checked"] == 2
|
||||
assert result["reported"] == 2
|
||||
assert mock_send.call_count == 2
|
||||
mock_send.assert_any_call("dag-stream", "dag-topic", "**rendered DAG**")
|
||||
|
||||
async def test_filters_out_child_workflows(self):
|
||||
runs = [
|
||||
_make_task_summary("DiarizationPipeline", "run-1"),
|
||||
_make_task_summary("TrackProcessing", "run-2"),
|
||||
_make_task_summary("TopicChunkProcessing", "run-3"),
|
||||
_make_task_summary("SubjectProcessing", "run-4"),
|
||||
]
|
||||
mock_result = MagicMock()
|
||||
mock_result.rows = runs
|
||||
|
||||
mock_details = MagicMock()
|
||||
mock_client = MagicMock()
|
||||
mock_client.runs.aio_list = AsyncMock(return_value=mock_result)
|
||||
mock_client.runs.aio_get = AsyncMock(return_value=mock_details)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.render_run_detail",
|
||||
return_value="**rendered**",
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.send_message_to_zulip",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"id": 1},
|
||||
) as mock_send,
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.settings"
|
||||
) as mock_settings,
|
||||
):
|
||||
mock_settings.ZULIP_DAG_STREAM = "dag-stream"
|
||||
mock_settings.ZULIP_DAG_TOPIC = "dag-topic"
|
||||
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import (
|
||||
_check_failed_runs,
|
||||
)
|
||||
|
||||
result = await _check_failed_runs()
|
||||
|
||||
# Only DiarizationPipeline should be reported
|
||||
assert result["checked"] == 4
|
||||
assert result["reported"] == 1
|
||||
assert mock_send.call_count == 1
|
||||
|
||||
async def test_all_three_pipelines_reported(self):
|
||||
runs = [
|
||||
_make_task_summary("DiarizationPipeline", "run-1"),
|
||||
_make_task_summary("FilePipeline", "run-2"),
|
||||
_make_task_summary("LivePostProcessingPipeline", "run-3"),
|
||||
]
|
||||
mock_result = MagicMock()
|
||||
mock_result.rows = runs
|
||||
|
||||
mock_details = MagicMock()
|
||||
mock_client = MagicMock()
|
||||
mock_client.runs.aio_list = AsyncMock(return_value=mock_result)
|
||||
mock_client.runs.aio_get = AsyncMock(return_value=mock_details)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.render_run_detail",
|
||||
return_value="**rendered**",
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.send_message_to_zulip",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"id": 1},
|
||||
) as mock_send,
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.settings"
|
||||
) as mock_settings,
|
||||
):
|
||||
mock_settings.ZULIP_DAG_STREAM = "dag-stream"
|
||||
mock_settings.ZULIP_DAG_TOPIC = "dag-topic"
|
||||
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import (
|
||||
_check_failed_runs,
|
||||
)
|
||||
|
||||
result = await _check_failed_runs()
|
||||
|
||||
assert result["reported"] == 3
|
||||
assert mock_send.call_count == 3
|
||||
|
||||
async def test_continues_on_individual_run_failure(self):
|
||||
"""If one run fails to report, the others should still be reported."""
|
||||
runs = [
|
||||
_make_task_summary("DiarizationPipeline", "run-1"),
|
||||
_make_task_summary("FilePipeline", "run-2"),
|
||||
]
|
||||
mock_result = MagicMock()
|
||||
mock_result.rows = runs
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.runs.aio_list = AsyncMock(return_value=mock_result)
|
||||
# First call raises, second succeeds
|
||||
mock_client.runs.aio_get = AsyncMock(
|
||||
side_effect=[Exception("Hatchet API error"), MagicMock()]
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.render_run_detail",
|
||||
return_value="**rendered**",
|
||||
),
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.send_message_to_zulip",
|
||||
new_callable=AsyncMock,
|
||||
return_value={"id": 1},
|
||||
) as mock_send,
|
||||
patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.settings"
|
||||
) as mock_settings,
|
||||
):
|
||||
mock_settings.ZULIP_DAG_STREAM = "dag-stream"
|
||||
mock_settings.ZULIP_DAG_TOPIC = "dag-topic"
|
||||
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import (
|
||||
_check_failed_runs,
|
||||
)
|
||||
|
||||
result = await _check_failed_runs()
|
||||
|
||||
# First run failed to report, second succeeded
|
||||
assert result["reported"] == 1
|
||||
assert mock_send.call_count == 1
|
||||
|
||||
async def test_handles_list_api_failure(self):
|
||||
"""If aio_list fails, should return error and not crash."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.runs.aio_list = AsyncMock(
|
||||
side_effect=Exception("Connection refused")
|
||||
)
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
):
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import (
|
||||
_check_failed_runs,
|
||||
)
|
||||
|
||||
result = await _check_failed_runs()
|
||||
|
||||
assert result["checked"] == 0
|
||||
assert result["reported"] == 0
|
||||
assert "error" in result
|
||||
|
||||
async def test_uses_correct_time_window(self):
|
||||
"""Verify the correct since/until parameters are passed to aio_list."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.rows = []
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.runs.aio_list = AsyncMock(return_value=mock_result)
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.failed_runs_monitor.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
):
|
||||
from reflector.hatchet.workflows.failed_runs_monitor import (
|
||||
_check_failed_runs,
|
||||
)
|
||||
|
||||
await _check_failed_runs()
|
||||
|
||||
call_kwargs = mock_client.runs.aio_list.call_args
|
||||
assert call_kwargs.kwargs["statuses"] == [V1TaskStatus.FAILED]
|
||||
since = call_kwargs.kwargs["since"]
|
||||
until = call_kwargs.kwargs["until"]
|
||||
assert since.tzinfo == timezone.utc
|
||||
assert until.tzinfo == timezone.utc
|
||||
# Window should be ~1 hour
|
||||
delta = until - since
|
||||
assert 3590 < delta.total_seconds() < 3610
|
||||
@@ -37,18 +37,3 @@ async def test_hatchet_client_can_replay_handles_exception():
|
||||
|
||||
# Should return False on error (workflow might be gone)
|
||||
assert can_replay is False
|
||||
|
||||
|
||||
def test_hatchet_client_raises_without_token():
|
||||
"""Test that get_client raises ValueError without token.
|
||||
|
||||
Useful: Catches if someone removes the token validation,
|
||||
which would cause cryptic errors later.
|
||||
"""
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
|
||||
with patch("reflector.hatchet.client.settings") as mock_settings:
|
||||
mock_settings.HATCHET_CLIENT_TOKEN = None
|
||||
|
||||
with pytest.raises(ValueError, match="HATCHET_CLIENT_TOKEN must be set"):
|
||||
HatchetClientManager.get_client()
|
||||
|
||||
233
server/tests/test_hatchet_file_pipeline.py
Normal file
233
server/tests/test_hatchet_file_pipeline.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""
|
||||
Tests for the FilePipeline Hatchet workflow.
|
||||
|
||||
Tests verify:
|
||||
1. with_error_handling behavior for file pipeline input model
|
||||
2. on_workflow_failure logic (don't overwrite 'ended' status)
|
||||
3. Input model validation
|
||||
"""
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from hatchet_sdk import NonRetryableException
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _noop_db_context():
|
||||
"""Async context manager that yields without touching the DB."""
|
||||
yield None
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def file_pipeline_module():
|
||||
"""Import file_pipeline with Hatchet client mocked."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.workflow.return_value = MagicMock()
|
||||
with patch(
|
||||
"reflector.hatchet.client.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
):
|
||||
from reflector.hatchet.workflows import file_pipeline
|
||||
|
||||
return file_pipeline
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_file_input():
|
||||
"""Minimal FilePipelineInput for tests."""
|
||||
from reflector.hatchet.workflows.file_pipeline import FilePipelineInput
|
||||
|
||||
return FilePipelineInput(
|
||||
transcript_id="ts-file-123",
|
||||
room_id="room-456",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ctx():
|
||||
"""Minimal Context-like object."""
|
||||
ctx = MagicMock()
|
||||
ctx.log = MagicMock()
|
||||
return ctx
|
||||
|
||||
|
||||
def test_file_pipeline_input_model():
|
||||
"""Test FilePipelineInput validation."""
|
||||
from reflector.hatchet.workflows.file_pipeline import FilePipelineInput
|
||||
|
||||
# Valid input with room_id
|
||||
input_with_room = FilePipelineInput(transcript_id="ts-123", room_id="room-456")
|
||||
assert input_with_room.transcript_id == "ts-123"
|
||||
assert input_with_room.room_id == "room-456"
|
||||
|
||||
# Valid input without room_id
|
||||
input_no_room = FilePipelineInput(transcript_id="ts-123")
|
||||
assert input_no_room.room_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_pipeline_error_handling_transient(
|
||||
file_pipeline_module, mock_file_input, mock_ctx
|
||||
):
|
||||
"""Transient exception must NOT set error status."""
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
TaskName,
|
||||
with_error_handling,
|
||||
)
|
||||
|
||||
async def failing_task(input, ctx):
|
||||
raise httpx.TimeoutException("timed out")
|
||||
|
||||
wrapped = with_error_handling(TaskName.EXTRACT_AUDIO)(failing_task)
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
with pytest.raises(httpx.TimeoutException):
|
||||
await wrapped(mock_file_input, mock_ctx)
|
||||
|
||||
mock_set_error.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_pipeline_error_handling_hard_fail(
|
||||
file_pipeline_module, mock_file_input, mock_ctx
|
||||
):
|
||||
"""Hard-fail (ValueError) must set error status and raise NonRetryableException."""
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
TaskName,
|
||||
with_error_handling,
|
||||
)
|
||||
|
||||
async def failing_task(input, ctx):
|
||||
raise ValueError("No audio file found")
|
||||
|
||||
wrapped = with_error_handling(TaskName.EXTRACT_AUDIO)(failing_task)
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
with pytest.raises(NonRetryableException) as exc_info:
|
||||
await wrapped(mock_file_input, mock_ctx)
|
||||
|
||||
assert "No audio file found" in str(exc_info.value)
|
||||
mock_set_error.assert_called_once_with("ts-file-123")
|
||||
|
||||
|
||||
def test_diarize_result_uses_plain_dicts():
|
||||
"""DiarizationSegment is a TypedDict (plain dict), not a Pydantic model.
|
||||
|
||||
The diarize task must serialize segments as plain dicts (not call .model_dump()),
|
||||
and assemble_transcript must be able to reconstruct them with DiarizationSegment(**s).
|
||||
This was a real bug: 'dict' object has no attribute 'model_dump'.
|
||||
"""
|
||||
from reflector.hatchet.workflows.file_pipeline import DiarizeResult
|
||||
from reflector.processors.types import DiarizationSegment
|
||||
|
||||
# DiarizationSegment is a TypedDict — instances are plain dicts
|
||||
segments = [
|
||||
DiarizationSegment(start=0.0, end=1.5, speaker=0),
|
||||
DiarizationSegment(start=1.5, end=3.0, speaker=1),
|
||||
]
|
||||
assert isinstance(segments[0], dict), "DiarizationSegment should be a plain dict"
|
||||
|
||||
# DiarizeResult should accept list[dict] directly (no model_dump needed)
|
||||
result = DiarizeResult(diarization=segments)
|
||||
assert result.diarization is not None
|
||||
assert len(result.diarization) == 2
|
||||
|
||||
# Consumer (assemble_transcript) reconstructs via DiarizationSegment(**s)
|
||||
reconstructed = [DiarizationSegment(**s) for s in result.diarization]
|
||||
assert reconstructed[0]["start"] == 0.0
|
||||
assert reconstructed[0]["speaker"] == 0
|
||||
assert reconstructed[1]["end"] == 3.0
|
||||
assert reconstructed[1]["speaker"] == 1
|
||||
|
||||
|
||||
def test_diarize_result_handles_none():
|
||||
"""DiarizeResult with no diarization data (diarization disabled)."""
|
||||
from reflector.hatchet.workflows.file_pipeline import DiarizeResult
|
||||
|
||||
result = DiarizeResult(diarization=None)
|
||||
assert result.diarization is None
|
||||
|
||||
result_default = DiarizeResult()
|
||||
assert result_default.diarization is None
|
||||
|
||||
|
||||
def test_transcribe_result_words_are_pydantic():
|
||||
"""TranscribeResult words come from Pydantic Word.model_dump() — verify roundtrip."""
|
||||
from reflector.hatchet.workflows.file_pipeline import TranscribeResult
|
||||
from reflector.processors.types import Word
|
||||
|
||||
words = [
|
||||
Word(text="hello", start=0.0, end=0.5),
|
||||
Word(text="world", start=0.5, end=1.0),
|
||||
]
|
||||
# Words are Pydantic models, so model_dump() works
|
||||
word_dicts = [w.model_dump() for w in words]
|
||||
result = TranscribeResult(words=word_dicts)
|
||||
|
||||
# Consumer reconstructs via Word(**w)
|
||||
reconstructed = [Word(**w) for w in result.words]
|
||||
assert reconstructed[0].text == "hello"
|
||||
assert reconstructed[1].start == 0.5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_pipeline_on_failure_sets_error_status(
|
||||
file_pipeline_module, mock_file_input, mock_ctx
|
||||
):
|
||||
"""on_workflow_failure sets error status when transcript is processing."""
|
||||
from reflector.hatchet.workflows.file_pipeline import on_workflow_failure
|
||||
|
||||
transcript_processing = MagicMock()
|
||||
transcript_processing.status = "processing"
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.file_pipeline.fresh_db_connection",
|
||||
_noop_db_context,
|
||||
):
|
||||
with patch(
|
||||
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||
new_callable=AsyncMock,
|
||||
return_value=transcript_processing,
|
||||
):
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.file_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
await on_workflow_failure(mock_file_input, mock_ctx)
|
||||
mock_set_error.assert_called_once_with(mock_file_input.transcript_id)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_pipeline_on_failure_does_not_overwrite_ended(
|
||||
file_pipeline_module, mock_file_input, mock_ctx
|
||||
):
|
||||
"""on_workflow_failure must NOT overwrite 'ended' status."""
|
||||
from reflector.hatchet.workflows.file_pipeline import on_workflow_failure
|
||||
|
||||
transcript_ended = MagicMock()
|
||||
transcript_ended.status = "ended"
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.file_pipeline.fresh_db_connection",
|
||||
_noop_db_context,
|
||||
):
|
||||
with patch(
|
||||
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||
new_callable=AsyncMock,
|
||||
return_value=transcript_ended,
|
||||
):
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.file_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
await on_workflow_failure(mock_file_input, mock_ctx)
|
||||
mock_set_error.assert_not_called()
|
||||
218
server/tests/test_hatchet_live_post_pipeline.py
Normal file
218
server/tests/test_hatchet_live_post_pipeline.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""
|
||||
Tests for the LivePostProcessingPipeline Hatchet workflow.
|
||||
|
||||
Tests verify:
|
||||
1. with_error_handling behavior for live post pipeline input model
|
||||
2. on_workflow_failure logic (don't overwrite 'ended' status)
|
||||
3. Input model validation
|
||||
4. pipeline_post() now triggers Hatchet instead of Celery chord
|
||||
"""
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from hatchet_sdk import NonRetryableException
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _noop_db_context():
|
||||
"""Async context manager that yields without touching the DB."""
|
||||
yield None
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def live_pipeline_module():
|
||||
"""Import live_post_pipeline with Hatchet client mocked."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.workflow.return_value = MagicMock()
|
||||
with patch(
|
||||
"reflector.hatchet.client.HatchetClientManager.get_client",
|
||||
return_value=mock_client,
|
||||
):
|
||||
from reflector.hatchet.workflows import live_post_pipeline
|
||||
|
||||
return live_post_pipeline
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_live_input():
|
||||
"""Minimal LivePostPipelineInput for tests."""
|
||||
from reflector.hatchet.workflows.live_post_pipeline import LivePostPipelineInput
|
||||
|
||||
return LivePostPipelineInput(
|
||||
transcript_id="ts-live-789",
|
||||
room_id="room-abc",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ctx():
|
||||
"""Minimal Context-like object."""
|
||||
ctx = MagicMock()
|
||||
ctx.log = MagicMock()
|
||||
return ctx
|
||||
|
||||
|
||||
def test_live_post_pipeline_input_model():
|
||||
"""Test LivePostPipelineInput validation."""
|
||||
from reflector.hatchet.workflows.live_post_pipeline import LivePostPipelineInput
|
||||
|
||||
# Valid input with room_id
|
||||
input_with_room = LivePostPipelineInput(transcript_id="ts-123", room_id="room-456")
|
||||
assert input_with_room.transcript_id == "ts-123"
|
||||
assert input_with_room.room_id == "room-456"
|
||||
|
||||
# Valid input without room_id
|
||||
input_no_room = LivePostPipelineInput(transcript_id="ts-123")
|
||||
assert input_no_room.room_id is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_live_pipeline_error_handling_transient(
|
||||
live_pipeline_module, mock_live_input, mock_ctx
|
||||
):
|
||||
"""Transient exception must NOT set error status."""
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
TaskName,
|
||||
with_error_handling,
|
||||
)
|
||||
|
||||
async def failing_task(input, ctx):
|
||||
raise httpx.TimeoutException("timed out")
|
||||
|
||||
wrapped = with_error_handling(TaskName.WAVEFORM)(failing_task)
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
with pytest.raises(httpx.TimeoutException):
|
||||
await wrapped(mock_live_input, mock_ctx)
|
||||
|
||||
mock_set_error.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_live_pipeline_error_handling_hard_fail(
|
||||
live_pipeline_module, mock_live_input, mock_ctx
|
||||
):
|
||||
"""Hard-fail must set error status and raise NonRetryableException."""
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
TaskName,
|
||||
with_error_handling,
|
||||
)
|
||||
|
||||
async def failing_task(input, ctx):
|
||||
raise ValueError("Transcript not found")
|
||||
|
||||
wrapped = with_error_handling(TaskName.WAVEFORM)(failing_task)
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
with pytest.raises(NonRetryableException) as exc_info:
|
||||
await wrapped(mock_live_input, mock_ctx)
|
||||
|
||||
assert "Transcript not found" in str(exc_info.value)
|
||||
mock_set_error.assert_called_once_with("ts-live-789")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_live_pipeline_on_failure_sets_error_status(
|
||||
live_pipeline_module, mock_live_input, mock_ctx
|
||||
):
|
||||
"""on_workflow_failure sets error status when transcript is processing."""
|
||||
from reflector.hatchet.workflows.live_post_pipeline import on_workflow_failure
|
||||
|
||||
transcript_processing = MagicMock()
|
||||
transcript_processing.status = "processing"
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.live_post_pipeline.fresh_db_connection",
|
||||
_noop_db_context,
|
||||
):
|
||||
with patch(
|
||||
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||
new_callable=AsyncMock,
|
||||
return_value=transcript_processing,
|
||||
):
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.live_post_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
await on_workflow_failure(mock_live_input, mock_ctx)
|
||||
mock_set_error.assert_called_once_with(mock_live_input.transcript_id)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_live_pipeline_on_failure_does_not_overwrite_ended(
|
||||
live_pipeline_module, mock_live_input, mock_ctx
|
||||
):
|
||||
"""on_workflow_failure must NOT overwrite 'ended' status."""
|
||||
from reflector.hatchet.workflows.live_post_pipeline import on_workflow_failure
|
||||
|
||||
transcript_ended = MagicMock()
|
||||
transcript_ended.status = "ended"
|
||||
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.live_post_pipeline.fresh_db_connection",
|
||||
_noop_db_context,
|
||||
):
|
||||
with patch(
|
||||
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||
new_callable=AsyncMock,
|
||||
return_value=transcript_ended,
|
||||
):
|
||||
with patch(
|
||||
"reflector.hatchet.workflows.live_post_pipeline.set_workflow_error_status",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_set_error:
|
||||
await on_workflow_failure(mock_live_input, mock_ctx)
|
||||
mock_set_error.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pipeline_post_triggers_hatchet():
|
||||
"""pipeline_post() should trigger Hatchet LivePostProcessingPipeline workflow."""
|
||||
with patch(
|
||||
"reflector.hatchet.client.HatchetClientManager.start_workflow",
|
||||
new_callable=AsyncMock,
|
||||
return_value="workflow-run-id",
|
||||
) as mock_start:
|
||||
from reflector.pipelines.main_live_pipeline import pipeline_post
|
||||
|
||||
await pipeline_post(transcript_id="ts-test-123", room_id="room-test")
|
||||
|
||||
mock_start.assert_called_once_with(
|
||||
"LivePostProcessingPipeline",
|
||||
{
|
||||
"transcript_id": "ts-test-123",
|
||||
"room_id": "room-test",
|
||||
},
|
||||
additional_metadata={"transcript_id": "ts-test-123"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pipeline_post_triggers_hatchet_without_room_id():
|
||||
"""pipeline_post() should handle None room_id."""
|
||||
with patch(
|
||||
"reflector.hatchet.client.HatchetClientManager.start_workflow",
|
||||
new_callable=AsyncMock,
|
||||
return_value="workflow-run-id",
|
||||
) as mock_start:
|
||||
from reflector.pipelines.main_live_pipeline import pipeline_post
|
||||
|
||||
await pipeline_post(transcript_id="ts-test-456")
|
||||
|
||||
mock_start.assert_called_once_with(
|
||||
"LivePostProcessingPipeline",
|
||||
{
|
||||
"transcript_id": "ts-test-456",
|
||||
"room_id": None,
|
||||
},
|
||||
additional_metadata={"transcript_id": "ts-test-456"},
|
||||
)
|
||||
90
server/tests/test_hatchet_trigger_migration.py
Normal file
90
server/tests/test_hatchet_trigger_migration.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Tests verifying Celery-to-Hatchet trigger migration.
|
||||
|
||||
Ensures that:
|
||||
1. process_recording triggers FilePipeline via Hatchet (not Celery)
|
||||
2. transcript_record_upload triggers FilePipeline via Hatchet (not Celery)
|
||||
3. Old Celery task references are no longer in active call sites
|
||||
"""
|
||||
|
||||
|
||||
def test_process_recording_does_not_import_celery_file_task():
|
||||
"""Verify process.py no longer imports task_pipeline_file_process."""
|
||||
import inspect
|
||||
|
||||
from reflector.worker import process
|
||||
|
||||
source = inspect.getsource(process)
|
||||
# Should not contain the old Celery task import
|
||||
assert "task_pipeline_file_process" not in source
|
||||
|
||||
|
||||
def test_transcripts_upload_does_not_import_celery_file_task():
|
||||
"""Verify transcripts_upload.py no longer imports task_pipeline_file_process."""
|
||||
import inspect
|
||||
|
||||
from reflector.views import transcripts_upload
|
||||
|
||||
source = inspect.getsource(transcripts_upload)
|
||||
# Should not contain the old Celery task import
|
||||
assert "task_pipeline_file_process" not in source
|
||||
|
||||
|
||||
def test_transcripts_upload_imports_hatchet():
|
||||
"""Verify transcripts_upload.py imports HatchetClientManager."""
|
||||
import inspect
|
||||
|
||||
from reflector.views import transcripts_upload
|
||||
|
||||
source = inspect.getsource(transcripts_upload)
|
||||
assert "HatchetClientManager" in source
|
||||
|
||||
|
||||
def test_pipeline_post_is_async():
|
||||
"""Verify pipeline_post is now async (Hatchet trigger)."""
|
||||
import asyncio
|
||||
|
||||
from reflector.pipelines.main_live_pipeline import pipeline_post
|
||||
|
||||
assert asyncio.iscoroutinefunction(pipeline_post)
|
||||
|
||||
|
||||
def test_transcript_process_service_does_not_import_celery_file_task():
|
||||
"""Verify transcript_process.py service no longer imports task_pipeline_file_process."""
|
||||
import inspect
|
||||
|
||||
from reflector.services import transcript_process
|
||||
|
||||
source = inspect.getsource(transcript_process)
|
||||
assert "task_pipeline_file_process" not in source
|
||||
|
||||
|
||||
def test_transcript_process_service_dispatch_uses_hatchet():
|
||||
"""Verify dispatch_transcript_processing uses HatchetClientManager for file processing."""
|
||||
import inspect
|
||||
|
||||
from reflector.services import transcript_process
|
||||
|
||||
source = inspect.getsource(transcript_process.dispatch_transcript_processing)
|
||||
assert "HatchetClientManager" in source
|
||||
assert "FilePipeline" in source
|
||||
|
||||
|
||||
def test_new_task_names_exist():
|
||||
"""Verify new TaskName constants were added for file and live pipelines."""
|
||||
from reflector.hatchet.constants import TaskName
|
||||
|
||||
# File pipeline tasks
|
||||
assert TaskName.EXTRACT_AUDIO == "extract_audio"
|
||||
assert TaskName.UPLOAD_AUDIO == "upload_audio"
|
||||
assert TaskName.TRANSCRIBE == "transcribe"
|
||||
assert TaskName.DIARIZE == "diarize"
|
||||
assert TaskName.ASSEMBLE_TRANSCRIPT == "assemble_transcript"
|
||||
assert TaskName.GENERATE_SUMMARIES == "generate_summaries"
|
||||
|
||||
# Live post-processing pipeline tasks
|
||||
assert TaskName.WAVEFORM == "waveform"
|
||||
assert TaskName.CONVERT_MP3 == "convert_mp3"
|
||||
assert TaskName.UPLOAD_MP3 == "upload_mp3"
|
||||
assert TaskName.REMOVE_UPLOAD == "remove_upload"
|
||||
assert TaskName.FINAL_SUMMARIES == "final_summaries"
|
||||
@@ -5,6 +5,7 @@ This test verifies the complete file processing pipeline without mocking much,
|
||||
ensuring all processors are correctly invoked and the happy path works correctly.
|
||||
"""
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from uuid import uuid4
|
||||
@@ -136,6 +137,7 @@ async def mock_storage():
|
||||
operation: str = "get_object",
|
||||
expires_in: int = 3600,
|
||||
bucket=None,
|
||||
extra_params=None,
|
||||
):
|
||||
return f"http://test-storage/{path}"
|
||||
|
||||
@@ -651,3 +653,43 @@ async def test_pipeline_file_process_no_audio_file(
|
||||
# This should fail when trying to open the file with av
|
||||
with pytest.raises(Exception):
|
||||
await pipeline.process(non_existent_path)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_on_title_does_not_overwrite_user_set_title():
|
||||
"""When transcript already has a title, on_title does not call update."""
|
||||
from reflector.db.transcripts import Transcript, TranscriptFinalTitle
|
||||
from reflector.pipelines.main_file_pipeline import PipelineMainFile
|
||||
|
||||
transcript_id = str(uuid4())
|
||||
transcript_with_title = Transcript(
|
||||
id=transcript_id,
|
||||
name="test",
|
||||
source_kind="file",
|
||||
title="User set title",
|
||||
)
|
||||
|
||||
controller = "reflector.pipelines.main_live_pipeline.transcripts_controller"
|
||||
with patch(f"{controller}.get_by_id", new_callable=AsyncMock) as mock_get:
|
||||
with patch(f"{controller}.update", new_callable=AsyncMock) as mock_update:
|
||||
with patch(
|
||||
f"{controller}.append_event", new_callable=AsyncMock
|
||||
) as mock_append:
|
||||
with patch(f"{controller}.transaction") as mock_txn:
|
||||
mock_get.return_value = transcript_with_title
|
||||
mock_append.return_value = None
|
||||
|
||||
@asynccontextmanager
|
||||
async def noop_txn():
|
||||
yield
|
||||
|
||||
mock_txn.return_value = noop_txn()
|
||||
|
||||
pipeline = PipelineMainFile(transcript_id=transcript_id)
|
||||
await pipeline.on_title(
|
||||
TranscriptFinalTitle(title="Generated title")
|
||||
)
|
||||
|
||||
mock_get.assert_called_once()
|
||||
mock_update.assert_not_called()
|
||||
mock_append.assert_called_once()
|
||||
|
||||
@@ -5,6 +5,7 @@ import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from conftest import authenticated_client_ctx
|
||||
from httpx_ws import aconnect_ws
|
||||
from uvicorn import Config, Server
|
||||
|
||||
@@ -372,9 +373,9 @@ async def test_audio_mp3_requires_token_for_owned_transcript(
|
||||
tr.audio_mp3_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(audio_path, tr.audio_mp3_filename)
|
||||
|
||||
# Anonymous GET without token should be 403 or 404 depending on access; we call mp3
|
||||
# Anonymous GET without token should be 401 (auth required)
|
||||
resp = await client.get(f"/transcripts/{t.id}/audio/mp3")
|
||||
assert resp.status_code == 403
|
||||
assert resp.status_code == 401
|
||||
|
||||
# With token should succeed
|
||||
token = create_access_token(
|
||||
@@ -382,3 +383,607 @@ async def test_audio_mp3_requires_token_for_owned_transcript(
|
||||
)
|
||||
resp2 = await client.get(f"/transcripts/{t.id}/audio/mp3", params={"token": token})
|
||||
assert resp2.status_code == 200
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Auth guards: anonymous blocked when PUBLIC_MODE=False
|
||||
# ======================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_cannot_create_transcript_when_not_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
resp = await client.post("/transcripts", json={"name": "anon-test"})
|
||||
assert resp.status_code == 401, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_cannot_process_transcript_when_not_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="process-test",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="public",
|
||||
)
|
||||
|
||||
resp = await client.post(f"/transcripts/{t.id}/process")
|
||||
assert resp.status_code == 401, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_cannot_upload_when_not_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="upload-test",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="public",
|
||||
)
|
||||
|
||||
# Minimal multipart upload
|
||||
resp = await client.post(
|
||||
f"/transcripts/{t.id}/record/upload",
|
||||
params={"chunk_number": 0, "total_chunks": 1},
|
||||
files={"chunk": ("test.mp3", b"fake-audio", "audio/mpeg")},
|
||||
)
|
||||
assert resp.status_code == 401, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_cannot_webrtc_record_when_not_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="webrtc-test",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="public",
|
||||
)
|
||||
|
||||
resp = await client.post(
|
||||
f"/transcripts/{t.id}/record/webrtc",
|
||||
json={"sdp": "v=0\r\n", "type": "offer"},
|
||||
)
|
||||
assert resp.status_code == 401, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_cannot_start_meeting_recording_when_not_public(
|
||||
client, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
room = await rooms_controller.add(
|
||||
name="recording-auth-test",
|
||||
user_id="owner-rec",
|
||||
zulip_auto_post=False,
|
||||
zulip_stream="",
|
||||
zulip_topic="",
|
||||
is_locked=False,
|
||||
room_mode="normal",
|
||||
recording_type="cloud",
|
||||
recording_trigger="automatic-2nd-participant",
|
||||
is_shared=True,
|
||||
webhook_url="",
|
||||
webhook_secret="",
|
||||
)
|
||||
|
||||
meeting = await meetings_controller.create(
|
||||
id="meeting-rec-test",
|
||||
room_name="recording-auth-test",
|
||||
room_url="room-url",
|
||||
host_room_url="host-url",
|
||||
start_date=Room.model_fields["created_at"].default_factory(),
|
||||
end_date=Room.model_fields["created_at"].default_factory(),
|
||||
room=room,
|
||||
)
|
||||
|
||||
resp = await client.post(
|
||||
f"/meetings/{meeting.id}/recordings/start",
|
||||
json={"type": "cloud", "instanceId": "00000000-0000-0000-0000-000000000001"},
|
||||
)
|
||||
assert resp.status_code == 401, resp.text
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Public mode: anonymous IS allowed when PUBLIC_MODE=True
|
||||
# ======================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_can_create_transcript_when_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
resp = await client.post("/transcripts", json={"name": "anon-public-test"})
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_can_list_transcripts_when_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
resp = await client.get("/transcripts")
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_can_read_public_transcript(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="readable-test",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="public",
|
||||
)
|
||||
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_can_upload_when_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="upload-public-test",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="public",
|
||||
)
|
||||
|
||||
resp = await client.post(
|
||||
f"/transcripts/{t.id}/record/upload",
|
||||
params={"chunk_number": 0, "total_chunks": 2},
|
||||
files={"chunk": ("test.mp3", b"fake-audio", "audio/mpeg")},
|
||||
)
|
||||
# Chunk 0 of 2 won't trigger av.open validation, so should succeed with "ok"
|
||||
# The key assertion: auth did NOT block us (no 401)
|
||||
assert resp.status_code != 401, f"Should not get 401 in public mode: {resp.text}"
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_can_start_meeting_recording_when_public(client, monkeypatch):
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
room = await rooms_controller.add(
|
||||
name="recording-public-test",
|
||||
user_id="owner-pub",
|
||||
zulip_auto_post=False,
|
||||
zulip_stream="",
|
||||
zulip_topic="",
|
||||
is_locked=False,
|
||||
room_mode="normal",
|
||||
recording_type="cloud",
|
||||
recording_trigger="automatic-2nd-participant",
|
||||
is_shared=True,
|
||||
webhook_url="",
|
||||
webhook_secret="",
|
||||
)
|
||||
|
||||
meeting = await meetings_controller.create(
|
||||
id="meeting-pub-test",
|
||||
room_name="recording-public-test",
|
||||
room_url="room-url",
|
||||
host_room_url="host-url",
|
||||
start_date=Room.model_fields["created_at"].default_factory(),
|
||||
end_date=Room.model_fields["created_at"].default_factory(),
|
||||
room=room,
|
||||
)
|
||||
|
||||
resp = await client.post(
|
||||
f"/meetings/{meeting.id}/recordings/start",
|
||||
json={"type": "cloud", "instanceId": "00000000-0000-0000-0000-000000000002"},
|
||||
)
|
||||
# Should not be 401 (may fail for other reasons like no Daily API, but auth passes)
|
||||
assert resp.status_code != 401, f"Should not get 401 in public mode: {resp.text}"
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Authenticated user vs private data (own transcripts)
|
||||
# Authenticated owner should be able to create, read, and process
|
||||
# their own private transcripts even when PUBLIC_MODE=False
|
||||
# ======================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_create_transcript_private_mode(client, monkeypatch):
|
||||
"""Authenticated user can create transcripts even when PUBLIC_MODE=False."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.post("/transcripts", json={"name": "auth-private-create"})
|
||||
assert resp.status_code == 200, resp.text
|
||||
assert resp.json()["user_id"] == "randomuserid"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_read_own_private_transcript(client, monkeypatch):
|
||||
"""Authenticated owner can read their own private transcript."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
# Create transcript owned by "randomuserid"
|
||||
t = await transcripts_controller.add(
|
||||
name="auth-private-read",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="randomuserid",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_cannot_read_others_private_transcript(client, monkeypatch):
|
||||
"""Authenticated user cannot read another user's private transcript."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
# Create transcript owned by someone else
|
||||
t = await transcripts_controller.add(
|
||||
name="other-private",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="other-owner",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 403, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_process_own_private_transcript(client, monkeypatch):
|
||||
"""Authenticated owner can trigger processing on their own private transcript."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="auth-private-process",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="randomuserid",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.post(f"/transcripts/{t.id}/process")
|
||||
# Should pass auth (may fail for other reasons like validation, but not 401/403)
|
||||
assert resp.status_code not in (401, 403), resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_upload_to_own_private_transcript(client, monkeypatch):
|
||||
"""Authenticated owner can upload audio to their own private transcript."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="auth-private-upload",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="randomuserid",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.post(
|
||||
f"/transcripts/{t.id}/record/upload",
|
||||
params={"chunk_number": 0, "total_chunks": 2},
|
||||
files={"chunk": ("test.mp3", b"fake-audio", "audio/mpeg")},
|
||||
)
|
||||
# Auth passes, chunk accepted (not final chunk so no av validation)
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_webrtc_own_private_transcript(client, monkeypatch):
|
||||
"""Authenticated owner can start WebRTC recording on their own private transcript."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="auth-private-webrtc",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="randomuserid",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.post(
|
||||
f"/transcripts/{t.id}/record/webrtc",
|
||||
json={"sdp": "v=0\r\n", "type": "offer"},
|
||||
)
|
||||
# Auth passes (may fail for other reasons like RTC setup, but not 401/403)
|
||||
assert resp.status_code not in (401, 403), resp.text
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Authenticated user vs semi-private data (other user's transcripts)
|
||||
# Any authenticated user should be able to READ semi-private transcripts
|
||||
# but NOT write to them (upload, process) since they don't own them
|
||||
# ======================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_read_others_semi_private_transcript(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""Any authenticated user can read a semi-private transcript (link sharing)."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
# Create transcript owned by someone else with semi-private share mode
|
||||
t = await transcripts_controller.add(
|
||||
name="semi-private-readable",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="other-owner",
|
||||
share_mode="semi-private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_cannot_read_semi_private_transcript(client, monkeypatch):
|
||||
"""Anonymous user cannot read a semi-private transcript."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="semi-private-blocked",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="some-owner",
|
||||
share_mode="semi-private",
|
||||
)
|
||||
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 403, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_list_own_transcripts_private_mode(client, monkeypatch):
|
||||
"""Authenticated user can list their own transcripts when PUBLIC_MODE=False."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
await transcripts_controller.add(
|
||||
name="my-transcript",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="randomuserid",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.get("/transcripts")
|
||||
assert resp.status_code == 200, resp.text
|
||||
items = resp.json()["items"]
|
||||
assert len(items) >= 1
|
||||
# All returned transcripts should belong to the user or be in shared rooms
|
||||
for item in items:
|
||||
assert item["user_id"] == "randomuserid" or item.get("room_id") is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_cannot_list_others_private_transcripts(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""Authenticated user should NOT see another user's private transcripts in the list."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
await transcripts_controller.add(
|
||||
name="hidden-from-others",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="secret-owner",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.get("/transcripts")
|
||||
assert resp.status_code == 200, resp.text
|
||||
items = resp.json()["items"]
|
||||
# Should not contain transcripts owned by "secret-owner"
|
||||
for item in items:
|
||||
assert (
|
||||
item.get("user_id") != "secret-owner"
|
||||
), f"Leaked private transcript: {item['id']}"
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Anonymous-created transcripts (user_id=None)
|
||||
# These transcripts bypass share_mode checks entirely in get_by_id_for_http.
|
||||
# They should always be accessible to everyone regardless of PUBLIC_MODE
|
||||
# or share_mode setting, because there is no owner to restrict access.
|
||||
# ======================================================================
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_transcript_accessible_when_public_mode_true(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""Anonymous transcript (user_id=None) is accessible even with default private share_mode
|
||||
when PUBLIC_MODE=True."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="anon-transcript-public-mode",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="private", # share_mode is irrelevant for user_id=None
|
||||
)
|
||||
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_transcript_accessible_when_public_mode_false(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""Anonymous transcript (user_id=None) is accessible by authenticated users
|
||||
even when PUBLIC_MODE=False. The transcript has no owner, so share_mode is bypassed."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="anon-transcript-private-mode",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 200, resp.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_transcript_accessible_regardless_of_share_mode(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""Anonymous transcripts (user_id=None) are accessible regardless of share_mode value.
|
||||
Tests all three share modes to confirm the user_id=None bypass works consistently."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
for mode in ("private", "semi-private", "public"):
|
||||
t = await transcripts_controller.add(
|
||||
name=f"anon-share-{mode}",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode=mode,
|
||||
)
|
||||
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 200, f"Failed for share_mode={mode}: {resp.text}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_transcript_readable_by_different_authenticated_user(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""An authenticated user can read anonymous transcripts (user_id=None) even with
|
||||
private share_mode, because the no-owner bypass applies."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="anon-read-by-auth-user",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert resp.status_code == 200, resp.text
|
||||
assert resp.json()["user_id"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_transcript_in_list_when_public_mode(client, monkeypatch):
|
||||
"""Anonymous transcripts appear in the transcript list when PUBLIC_MODE=True."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="anon-in-list",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
resp = await client.get("/transcripts")
|
||||
assert resp.status_code == 200, resp.text
|
||||
ids = [item["id"] for item in resp.json()["items"]]
|
||||
assert t.id in ids, "Anonymous transcript should appear in the public list"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anonymous_transcript_audio_accessible(client, monkeypatch, tmpdir):
|
||||
"""Anonymous transcript audio (mp3) is accessible without authentication
|
||||
because user_id=None bypasses the auth requirement (pipeline access)."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
monkeypatch.setattr(settings, "DATA_DIR", Path(tmpdir).as_posix())
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="anon-audio-access",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id=None,
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
tr = await transcripts_controller.get_by_id(t.id)
|
||||
await transcripts_controller.update(tr, {"status": "ended"})
|
||||
|
||||
# Copy fixture audio to transcript path
|
||||
audio_path = Path(__file__).parent / "records" / "test_mathieu_hello.mp3"
|
||||
tr.audio_mp3_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(audio_path, tr.audio_mp3_filename)
|
||||
|
||||
resp = await client.get(f"/transcripts/{t.id}/audio/mp3")
|
||||
assert (
|
||||
resp.status_code == 200
|
||||
), f"Anonymous transcript audio should be accessible for pipeline: {resp.text}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_owned_transcript_not_accessible_by_anon_when_not_public(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""Contrast test: owned transcript with private share_mode is NOT accessible
|
||||
to anonymous users when PUBLIC_MODE=False. This confirms that the user_id=None
|
||||
bypass only applies to anonymous transcripts, not to all transcripts."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
t = await transcripts_controller.add(
|
||||
name="owned-private-contrast",
|
||||
source_kind=SourceKind.LIVE,
|
||||
user_id="some-owner",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
resp = await client.get(f"/transcripts/{t.id}")
|
||||
assert (
|
||||
resp.status_code == 403
|
||||
), f"Owned private transcript should be denied to anonymous: {resp.text}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticated_can_start_meeting_recording_private_mode(
|
||||
client, monkeypatch
|
||||
):
|
||||
"""Authenticated user can start recording in non-public mode."""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||
|
||||
room = await rooms_controller.add(
|
||||
name="auth-recording-test",
|
||||
user_id="randomuserid",
|
||||
zulip_auto_post=False,
|
||||
zulip_stream="",
|
||||
zulip_topic="",
|
||||
is_locked=False,
|
||||
room_mode="normal",
|
||||
recording_type="cloud",
|
||||
recording_trigger="automatic-2nd-participant",
|
||||
is_shared=True,
|
||||
webhook_url="",
|
||||
webhook_secret="",
|
||||
)
|
||||
|
||||
meeting = await meetings_controller.create(
|
||||
id="meeting-auth-rec",
|
||||
room_name="auth-recording-test",
|
||||
room_url="room-url",
|
||||
host_room_url="host-url",
|
||||
start_date=Room.model_fields["created_at"].default_factory(),
|
||||
end_date=Room.model_fields["created_at"].default_factory(),
|
||||
room=room,
|
||||
)
|
||||
|
||||
async with authenticated_client_ctx():
|
||||
resp = await client.post(
|
||||
f"/meetings/{meeting.id}/recordings/start",
|
||||
json={
|
||||
"type": "cloud",
|
||||
"instanceId": "00000000-0000-0000-0000-000000000003",
|
||||
},
|
||||
)
|
||||
# Auth passes (may fail for Daily API reasons, but not 401)
|
||||
assert resp.status_code != 401, resp.text
|
||||
|
||||
@@ -340,8 +340,13 @@ async def test_transcript_formats_with_overlapping_speakers_multitrack():
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_transcript_format_text(client):
|
||||
async def test_api_transcript_format_text(monkeypatch, client):
|
||||
"""Test GET /transcripts/{id} with transcript_format=text."""
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
@@ -390,8 +395,13 @@ async def test_api_transcript_format_text(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_transcript_format_text_timestamped(client):
|
||||
async def test_api_transcript_format_text_timestamped(monkeypatch, client):
|
||||
"""Test GET /transcripts/{id} with transcript_format=text-timestamped."""
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
@@ -441,8 +451,13 @@ async def test_api_transcript_format_text_timestamped(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_transcript_format_webvtt_named(client):
|
||||
async def test_api_transcript_format_webvtt_named(monkeypatch, client):
|
||||
"""Test GET /transcripts/{id} with transcript_format=webvtt-named."""
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
@@ -491,8 +506,13 @@ async def test_api_transcript_format_webvtt_named(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_transcript_format_json(client):
|
||||
async def test_api_transcript_format_json(monkeypatch, client):
|
||||
"""Test GET /transcripts/{id} with transcript_format=json."""
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
@@ -544,8 +564,13 @@ async def test_api_transcript_format_json(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_transcript_format_default_is_text(client):
|
||||
async def test_api_transcript_format_default_is_text(monkeypatch, client):
|
||||
"""Test GET /transcripts/{id} defaults to text format."""
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
@@ -654,12 +679,18 @@ async def test_api_topics_endpoint_multitrack_segmentation(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_api_topics_endpoint_non_multitrack_segmentation(client):
|
||||
async def test_api_topics_endpoint_non_multitrack_segmentation(monkeypatch, client):
|
||||
"""Test GET /transcripts/{id}/topics uses default segmentation for non-multitrack.
|
||||
|
||||
Ensures backward compatibility - transcripts without multitrack recordings
|
||||
should continue using the default speaker-change-based segmentation.
|
||||
"""
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
|
||||
from reflector.db.transcripts import (
|
||||
TranscriptParticipant,
|
||||
TranscriptTopic,
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
import pytest
|
||||
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.db.rooms import rooms_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_create(client):
|
||||
async def test_transcript_create(monkeypatch, client):
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "test"})
|
||||
assert response.status_code == 200
|
||||
assert response.json()["name"] == "test"
|
||||
@@ -110,6 +116,33 @@ async def test_transcript_get_update_title(authenticated_client, client):
|
||||
assert response.json()["title"] == "test_title"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_set_status_emits_status_event_and_updates_transcript(
|
||||
monkeypatch, client
|
||||
):
|
||||
"""set_status adds a STATUS event and updates the transcript status (broadcast for WebSocket)."""
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "Status test"})
|
||||
assert response.status_code == 200
|
||||
transcript_id = response.json()["id"]
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||
assert transcript is not None
|
||||
assert transcript.status == "idle"
|
||||
|
||||
event = await transcripts_controller.set_status(transcript_id, "processing")
|
||||
assert event is not None
|
||||
assert event.event == "STATUS"
|
||||
assert event.data.get("value") == "processing"
|
||||
|
||||
updated = await transcripts_controller.get_by_id(transcript_id)
|
||||
assert updated.status == "processing"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcripts_list_anonymous(client):
|
||||
# XXX this test is a bit fragile, as it depends on the storage which
|
||||
@@ -160,9 +193,93 @@ async def test_transcript_delete(authenticated_client, client):
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
|
||||
# API returns 404 for soft-deleted transcripts
|
||||
response = await client.get(f"/transcripts/{tid}")
|
||||
assert response.status_code == 404
|
||||
|
||||
# But the transcript still exists in DB with deleted_at set
|
||||
transcript = await transcripts_controller.get_by_id(tid)
|
||||
assert transcript is not None
|
||||
assert transcript.deleted_at is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deleted_transcript_not_in_list(authenticated_client, client):
|
||||
"""Soft-deleted transcripts should not appear in the list endpoint."""
|
||||
response = await client.post("/transcripts", json={"name": "testdel_list"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
|
||||
# Verify it appears in the list
|
||||
response = await client.get("/transcripts")
|
||||
assert response.status_code == 200
|
||||
ids = [t["id"] for t in response.json()["items"]]
|
||||
assert tid in ids
|
||||
|
||||
# Delete it
|
||||
response = await client.delete(f"/transcripts/{tid}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify it no longer appears in the list
|
||||
response = await client.get("/transcripts")
|
||||
assert response.status_code == 200
|
||||
ids = [t["id"] for t in response.json()["items"]]
|
||||
assert tid not in ids
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_already_deleted_is_idempotent(authenticated_client, client):
|
||||
"""Deleting an already-deleted transcript is idempotent (returns 200)."""
|
||||
response = await client.post("/transcripts", json={"name": "testdel_idem"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
|
||||
# First delete
|
||||
response = await client.delete(f"/transcripts/{tid}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Second delete — idempotent, still returns ok
|
||||
response = await client.delete(f"/transcripts/{tid}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# But deleted_at was only set once (not updated)
|
||||
transcript = await transcripts_controller.get_by_id(tid)
|
||||
assert transcript is not None
|
||||
assert transcript.deleted_at is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deleted_transcript_recording_soft_deleted(authenticated_client, client):
|
||||
"""Soft-deleting a transcript also soft-deletes its recording."""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
recording = await recordings_controller.create(
|
||||
Recording(
|
||||
bucket_name="test-bucket",
|
||||
object_key="test.mp4",
|
||||
recorded_at=datetime.now(timezone.utc),
|
||||
)
|
||||
)
|
||||
transcript = await transcripts_controller.add(
|
||||
name="with-recording",
|
||||
source_kind=SourceKind.ROOM,
|
||||
recording_id=recording.id,
|
||||
user_id="randomuserid",
|
||||
)
|
||||
|
||||
response = await client.delete(f"/transcripts/{transcript.id}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Recording still in DB with deleted_at set
|
||||
rec = await recordings_controller.get_by_id(recording.id)
|
||||
assert rec is not None
|
||||
assert rec.deleted_at is not None
|
||||
|
||||
# Transcript still in DB with deleted_at set
|
||||
tr = await transcripts_controller.get_by_id(transcript.id)
|
||||
assert tr is not None
|
||||
assert tr.deleted_at is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_mark_reviewed(authenticated_client, client):
|
||||
@@ -233,3 +350,43 @@ async def test_transcript_get_returns_null_room_name_when_no_room(
|
||||
assert response.status_code == 200
|
||||
assert response.json()["room_id"] is None
|
||||
assert response.json()["room_name"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcripts_list_filtered_by_room_id(authenticated_client, client):
|
||||
"""GET /transcripts?room_id=X returns only transcripts for that room."""
|
||||
# Use same user as authenticated_client (conftest uses "randomuserid")
|
||||
user_id = "randomuserid"
|
||||
room = await rooms_controller.add(
|
||||
name="room-for-list-filter",
|
||||
user_id=user_id,
|
||||
zulip_auto_post=False,
|
||||
zulip_stream="",
|
||||
zulip_topic="",
|
||||
is_locked=False,
|
||||
room_mode="normal",
|
||||
recording_type="cloud",
|
||||
recording_trigger="automatic-2nd-participant",
|
||||
is_shared=False,
|
||||
webhook_url="",
|
||||
webhook_secret="",
|
||||
)
|
||||
in_room = await transcripts_controller.add(
|
||||
name="in-room",
|
||||
source_kind="file",
|
||||
room_id=room.id,
|
||||
user_id=user_id,
|
||||
)
|
||||
other = await transcripts_controller.add(
|
||||
name="no-room",
|
||||
source_kind="file",
|
||||
room_id=None,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
response = await client.get("/transcripts", params={"room_id": room.id})
|
||||
assert response.status_code == 200
|
||||
items = response.json()["items"]
|
||||
ids = [t["id"] for t in items]
|
||||
assert in_room.id in ids
|
||||
assert other.id not in ids
|
||||
|
||||
@@ -5,10 +5,13 @@ import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def fake_transcript(tmpdir, client):
|
||||
async def fake_transcript(tmpdir, client, monkeypatch):
|
||||
from reflector.settings import settings
|
||||
from reflector.views.transcripts import transcripts_controller
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
settings.DATA_DIR = Path(tmpdir)
|
||||
|
||||
# create a transcript
|
||||
@@ -37,7 +40,7 @@ async def fake_transcript(tmpdir, client):
|
||||
],
|
||||
)
|
||||
async def test_transcript_audio_download(
|
||||
fake_transcript, url_suffix, content_type, client
|
||||
authenticated_client, fake_transcript, url_suffix, content_type, client
|
||||
):
|
||||
response = await client.get(f"/transcripts/{fake_transcript.id}/audio{url_suffix}")
|
||||
assert response.status_code == 200
|
||||
@@ -58,7 +61,7 @@ async def test_transcript_audio_download(
|
||||
],
|
||||
)
|
||||
async def test_transcript_audio_download_head(
|
||||
fake_transcript, url_suffix, content_type, client
|
||||
authenticated_client, fake_transcript, url_suffix, content_type, client
|
||||
):
|
||||
response = await client.head(f"/transcripts/{fake_transcript.id}/audio{url_suffix}")
|
||||
assert response.status_code == 200
|
||||
@@ -79,7 +82,7 @@ async def test_transcript_audio_download_head(
|
||||
],
|
||||
)
|
||||
async def test_transcript_audio_download_range(
|
||||
fake_transcript, url_suffix, content_type, client
|
||||
authenticated_client, fake_transcript, url_suffix, content_type, client
|
||||
):
|
||||
response = await client.get(
|
||||
f"/transcripts/{fake_transcript.id}/audio{url_suffix}",
|
||||
@@ -99,7 +102,7 @@ async def test_transcript_audio_download_range(
|
||||
],
|
||||
)
|
||||
async def test_transcript_audio_download_range_with_seek(
|
||||
fake_transcript, url_suffix, content_type, client
|
||||
authenticated_client, fake_transcript, url_suffix, content_type, client
|
||||
):
|
||||
response = await client.get(
|
||||
f"/transcripts/{fake_transcript.id}/audio{url_suffix}",
|
||||
|
||||
@@ -98,10 +98,10 @@ async def private_transcript(tmpdir):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_mp3_private_no_auth_returns_403(private_transcript, client):
|
||||
"""Without auth, accessing a private transcript's audio returns 403."""
|
||||
async def test_audio_mp3_private_no_auth_returns_401(private_transcript, client):
|
||||
"""Without auth, accessing a private transcript's audio returns 401."""
|
||||
response = await client.get(f"/transcripts/{private_transcript.id}/audio/mp3")
|
||||
assert response.status_code == 403
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -125,8 +125,8 @@ async def test_audio_mp3_with_bearer_header(private_transcript, client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_mp3_public_transcript_no_auth_ok(tmpdir, client):
|
||||
"""Public transcripts are accessible without any auth."""
|
||||
async def test_audio_mp3_public_transcript_no_auth_returns_401(tmpdir, client):
|
||||
"""Public transcripts require authentication for audio access."""
|
||||
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||
from reflector.settings import settings
|
||||
|
||||
@@ -146,8 +146,7 @@ async def test_audio_mp3_public_transcript_no_auth_ok(tmpdir, client):
|
||||
shutil.copy(mp3_source, audio_filename)
|
||||
|
||||
response = await client.get(f"/transcripts/{transcript.id}/audio/mp3")
|
||||
assert response.status_code == 200
|
||||
assert response.headers["content-type"] == "audio/mpeg"
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -299,11 +298,9 @@ async def test_local_audio_link_token_works_with_authentik_backend(
|
||||
"""_generate_local_audio_link creates an HS256 token via create_access_token.
|
||||
|
||||
When the Authentik (RS256) auth backend is active, verify_raw_token uses
|
||||
JWTAuth which expects RS256 + public key. The HS256 token created by
|
||||
_generate_local_audio_link will fail verification, returning 401.
|
||||
|
||||
This test documents the bug: the internal audio URL generated for the
|
||||
diarization pipeline is unusable under the JWT auth backend.
|
||||
JWTAuth which expects RS256 + public key. The HS256 token fails RS256
|
||||
verification, but the audio endpoint's HS256 fallback (jwt.decode with
|
||||
SECRET_KEY) correctly handles it, so the request succeeds with 200.
|
||||
"""
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
@@ -322,6 +319,55 @@ async def test_local_audio_link_token_works_with_authentik_backend(
|
||||
f"/transcripts/{private_transcript.id}/audio/mp3?token={token}"
|
||||
)
|
||||
|
||||
# BUG: this should be 200 (the token was created by our own server),
|
||||
# but the Authentik backend rejects it because it's HS256, not RS256.
|
||||
# The HS256 fallback in the audio endpoint handles this correctly.
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Waveform endpoint auth tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_waveform_requires_authentication(client):
|
||||
"""Waveform endpoint returns 401 for unauthenticated requests."""
|
||||
response = await client.get("/transcripts/any-id/audio/waveform")
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_mp3_authenticated_user_accesses_anonymous_transcript(
|
||||
tmpdir, client
|
||||
):
|
||||
"""Authenticated user can access audio for an anonymous (user_id=None) transcript."""
|
||||
from reflector.app import app
|
||||
from reflector.auth import current_user, current_user_optional
|
||||
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||
from reflector.settings import settings
|
||||
|
||||
settings.DATA_DIR = Path(tmpdir)
|
||||
|
||||
transcript = await transcripts_controller.add(
|
||||
"Anonymous audio test",
|
||||
source_kind=SourceKind.FILE,
|
||||
user_id=None,
|
||||
share_mode="private",
|
||||
)
|
||||
await transcripts_controller.update(transcript, {"status": "ended"})
|
||||
|
||||
audio_filename = transcript.audio_mp3_filename
|
||||
mp3_source = Path(__file__).parent / "records" / "test_mathieu_hello.mp3"
|
||||
audio_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(mp3_source, audio_filename)
|
||||
|
||||
_user = lambda: {"sub": "some-authenticated-user", "email": "user@example.com"}
|
||||
app.dependency_overrides[current_user] = _user
|
||||
app.dependency_overrides[current_user_optional] = _user
|
||||
try:
|
||||
response = await client.get(f"/transcripts/{transcript.id}/audio/mp3")
|
||||
finally:
|
||||
del app.dependency_overrides[current_user]
|
||||
del app.dependency_overrides[current_user_optional]
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.headers["content-type"] == "audio/mpeg"
|
||||
|
||||
36
server/tests/test_transcripts_download.py
Normal file
36
server/tests/test_transcripts_download.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_zip_returns_valid_zip(
|
||||
authenticated_client, client, fake_transcript_with_topics
|
||||
):
|
||||
"""Test that the zip download endpoint returns a valid zip file."""
|
||||
transcript = fake_transcript_with_topics
|
||||
response = await client.get(f"/transcripts/{transcript.id}/download/zip")
|
||||
assert response.status_code == 200
|
||||
assert response.headers["content-type"] == "application/zip"
|
||||
|
||||
# Verify it's a valid zip
|
||||
zip_buffer = io.BytesIO(response.content)
|
||||
with zipfile.ZipFile(zip_buffer) as zf:
|
||||
names = zf.namelist()
|
||||
assert "metadata.json" in names
|
||||
assert "audio.mp3" in names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_zip_requires_auth(client):
|
||||
"""Test that zip download requires authentication."""
|
||||
response = await client.get("/transcripts/nonexistent/download/zip")
|
||||
assert response.status_code in (401, 403, 422)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_zip_not_found(authenticated_client, client):
|
||||
"""Test 404 for non-existent transcript."""
|
||||
response = await client.get("/transcripts/nonexistent-id/download/zip")
|
||||
assert response.status_code == 404
|
||||
@@ -1,10 +1,10 @@
|
||||
import asyncio
|
||||
import time
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def app_lifespan():
|
||||
@@ -25,8 +25,6 @@ async def client(app_lifespan):
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_database")
|
||||
@pytest.mark.usefixtures("celery_session_app")
|
||||
@pytest.mark.usefixtures("celery_session_worker")
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_process(
|
||||
tmpdir,
|
||||
@@ -36,7 +34,16 @@ async def test_transcript_process(
|
||||
dummy_file_diarization,
|
||||
dummy_storage,
|
||||
client,
|
||||
monkeypatch,
|
||||
mock_hatchet_client,
|
||||
):
|
||||
"""Test upload + process dispatch via Hatchet.
|
||||
|
||||
The file pipeline is now dispatched to Hatchet (fire-and-forget),
|
||||
so we verify the workflow was triggered rather than polling for completion.
|
||||
"""
|
||||
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||
|
||||
# create a transcript
|
||||
response = await client.post("/transcripts", json={"name": "test"})
|
||||
assert response.status_code == 200
|
||||
@@ -57,61 +64,58 @@ async def test_transcript_process(
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
|
||||
# wait for processing to finish (max 1 minute)
|
||||
timeout_seconds = 60
|
||||
start_time = time.monotonic()
|
||||
while (time.monotonic() - start_time) < timeout_seconds:
|
||||
# fetch the transcript and check if it is ended
|
||||
resp = await client.get(f"/transcripts/{tid}")
|
||||
assert resp.status_code == 200
|
||||
if resp.json()["status"] in ("ended", "error"):
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
pytest.fail(f"Initial processing timed out after {timeout_seconds} seconds")
|
||||
# Verify Hatchet workflow was dispatched (from upload endpoint)
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
|
||||
# restart the processing
|
||||
response = await client.post(
|
||||
f"/transcripts/{tid}/process",
|
||||
HatchetClientManager.start_workflow.assert_called_once_with(
|
||||
"FilePipeline",
|
||||
{"transcript_id": tid},
|
||||
additional_metadata={"transcript_id": tid},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# wait for processing to finish (max 1 minute)
|
||||
timeout_seconds = 60
|
||||
start_time = time.monotonic()
|
||||
while (time.monotonic() - start_time) < timeout_seconds:
|
||||
# fetch the transcript and check if it is ended
|
||||
resp = await client.get(f"/transcripts/{tid}")
|
||||
assert resp.status_code == 200
|
||||
if resp.json()["status"] in ("ended", "error"):
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
pytest.fail(f"Restart processing timed out after {timeout_seconds} seconds")
|
||||
# Verify transcript status was set to "uploaded"
|
||||
resp = await client.get(f"/transcripts/{tid}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "uploaded"
|
||||
|
||||
# check the transcript is ended
|
||||
transcript = resp.json()
|
||||
assert transcript["status"] == "ended"
|
||||
assert transcript["short_summary"] == "LLM SHORT SUMMARY"
|
||||
assert transcript["title"] == "Llm Title"
|
||||
# Reset mock for reprocess test
|
||||
HatchetClientManager.start_workflow.reset_mock()
|
||||
|
||||
# check topics and transcript
|
||||
response = await client.get(f"/transcripts/{tid}/topics")
|
||||
assert response.status_code == 200
|
||||
assert len(response.json()) == 1
|
||||
assert "Hello world. How are you today?" in response.json()[0]["transcript"]
|
||||
# Clear workflow_run_id so /process endpoint can dispatch again
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(tid)
|
||||
await transcripts_controller.update(transcript, {"workflow_run_id": None})
|
||||
|
||||
# Reprocess via /process endpoint
|
||||
with patch(
|
||||
"reflector.services.transcript_process.task_is_scheduled_or_active",
|
||||
return_value=False,
|
||||
):
|
||||
response = await client.post(f"/transcripts/{tid}/process")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
|
||||
# Verify second Hatchet dispatch (from /process endpoint)
|
||||
HatchetClientManager.start_workflow.assert_called_once()
|
||||
call_kwargs = HatchetClientManager.start_workflow.call_args.kwargs
|
||||
assert call_kwargs["workflow_name"] == "FilePipeline"
|
||||
assert call_kwargs["input_data"]["transcript_id"] == tid
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_database")
|
||||
@pytest.mark.asyncio
|
||||
async def test_whereby_recording_uses_file_pipeline(client):
|
||||
async def test_whereby_recording_uses_file_pipeline(monkeypatch, client):
|
||||
"""Test that Whereby recordings (bucket_name but no track_keys) use file pipeline"""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
|
||||
# Create transcript with Whereby recording (has bucket_name, no track_keys)
|
||||
transcript = await transcripts_controller.add(
|
||||
@@ -139,31 +143,41 @@ async def test_whereby_recording_uses_file_pipeline(client):
|
||||
|
||||
with (
|
||||
patch(
|
||||
"reflector.services.transcript_process.task_pipeline_file_process"
|
||||
) as mock_file_pipeline,
|
||||
"reflector.services.transcript_process.task_is_scheduled_or_active",
|
||||
return_value=False,
|
||||
),
|
||||
patch(
|
||||
"reflector.services.transcript_process.HatchetClientManager"
|
||||
) as mock_hatchet,
|
||||
):
|
||||
mock_hatchet.start_workflow = AsyncMock(return_value="test-workflow-id")
|
||||
|
||||
response = await client.post(f"/transcripts/{transcript.id}/process")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
|
||||
# Whereby recordings should use file pipeline, not Hatchet
|
||||
mock_file_pipeline.delay.assert_called_once_with(transcript_id=transcript.id)
|
||||
mock_hatchet.start_workflow.assert_not_called()
|
||||
# Whereby recordings should use Hatchet FilePipeline
|
||||
mock_hatchet.start_workflow.assert_called_once()
|
||||
call_kwargs = mock_hatchet.start_workflow.call_args.kwargs
|
||||
assert call_kwargs["workflow_name"] == "FilePipeline"
|
||||
assert call_kwargs["input_data"]["transcript_id"] == transcript.id
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_database")
|
||||
@pytest.mark.asyncio
|
||||
async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
||||
async def test_dailyco_recording_uses_multitrack_pipeline(monkeypatch, client):
|
||||
"""Test that Daily.co recordings (bucket_name + track_keys) use multitrack pipeline"""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.db.rooms import rooms_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
|
||||
room = await rooms_controller.add(
|
||||
name="test-room",
|
||||
@@ -208,8 +222,9 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
||||
|
||||
with (
|
||||
patch(
|
||||
"reflector.services.transcript_process.task_pipeline_file_process"
|
||||
) as mock_file_pipeline,
|
||||
"reflector.services.transcript_process.task_is_scheduled_or_active",
|
||||
return_value=False,
|
||||
),
|
||||
patch(
|
||||
"reflector.services.transcript_process.HatchetClientManager"
|
||||
) as mock_hatchet,
|
||||
@@ -221,7 +236,7 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
|
||||
# Daily.co multitrack recordings should use Hatchet workflow
|
||||
# Daily.co multitrack recordings should use Hatchet DiarizationPipeline
|
||||
mock_hatchet.start_workflow.assert_called_once()
|
||||
call_kwargs = mock_hatchet.start_workflow.call_args.kwargs
|
||||
assert call_kwargs["workflow_name"] == "DiarizationPipeline"
|
||||
@@ -230,18 +245,22 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
||||
assert call_kwargs["input_data"]["tracks"] == [
|
||||
{"s3_key": k} for k in track_keys
|
||||
]
|
||||
mock_file_pipeline.delay.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_database")
|
||||
@pytest.mark.asyncio
|
||||
async def test_reprocess_error_transcript_passes_force(client):
|
||||
async def test_reprocess_error_transcript_passes_force(monkeypatch, client):
|
||||
"""When transcript status is 'error', reprocess passes force=True to start fresh workflow."""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from reflector.db.recordings import Recording, recordings_controller
|
||||
from reflector.db.rooms import rooms_controller
|
||||
from reflector.db.transcripts import transcripts_controller
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
|
||||
room = await rooms_controller.add(
|
||||
name="test-room",
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -9,6 +8,7 @@ from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recording_deleted_with_transcript():
|
||||
"""Soft-delete: recording and transcript remain in DB with deleted_at set, no files deleted."""
|
||||
recording = await recordings_controller.create(
|
||||
Recording(
|
||||
bucket_name="test-bucket",
|
||||
@@ -22,16 +22,13 @@ async def test_recording_deleted_with_transcript():
|
||||
recording_id=recording.id,
|
||||
)
|
||||
|
||||
with patch("reflector.db.transcripts.get_transcripts_storage") as mock_get_storage:
|
||||
storage_instance = mock_get_storage.return_value
|
||||
storage_instance.delete_file = AsyncMock()
|
||||
await transcripts_controller.remove_by_id(transcript.id)
|
||||
|
||||
await transcripts_controller.remove_by_id(transcript.id)
|
||||
# Both should still exist in DB but with deleted_at set
|
||||
rec = await recordings_controller.get_by_id(recording.id)
|
||||
assert rec is not None
|
||||
assert rec.deleted_at is not None
|
||||
|
||||
# Should be called with bucket override
|
||||
storage_instance.delete_file.assert_awaited_once_with(
|
||||
recording.object_key, bucket=recording.bucket_name
|
||||
)
|
||||
|
||||
assert await recordings_controller.get_by_id(recording.id) is None
|
||||
assert await transcripts_controller.get_by_id(transcript.id) is None
|
||||
tr = await transcripts_controller.get_by_id(transcript.id)
|
||||
assert tr is not None
|
||||
assert tr.deleted_at is not None
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
# FIXME test status of transcript
|
||||
# FIXME test websocket connection after RTC is finished still send the full events
|
||||
# FIXME try with locked session, RTC should not work
|
||||
# TODO: add integration tests for post-processing (LivePostPipeline) with a real
|
||||
# Hatchet instance. These tests currently only cover the live pipeline.
|
||||
# Post-processing events (WAVEFORM, FINAL_*, DURATION, STATUS=ended, mp3)
|
||||
# are now dispatched via Hatchet and tested in test_hatchet_live_post_pipeline.py.
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
@@ -49,7 +53,7 @@ class ThreadedUvicorn:
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def appserver(tmpdir, setup_database, celery_session_app, celery_session_worker):
|
||||
def appserver(tmpdir, setup_database):
|
||||
import threading
|
||||
|
||||
from reflector.app import app
|
||||
@@ -119,8 +123,6 @@ def appserver(tmpdir, setup_database, celery_session_app, celery_session_worker)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_database")
|
||||
@pytest.mark.usefixtures("celery_session_app")
|
||||
@pytest.mark.usefixtures("celery_session_worker")
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_rtc_and_websocket(
|
||||
tmpdir,
|
||||
@@ -133,10 +135,18 @@ async def test_transcript_rtc_and_websocket(
|
||||
fake_mp3_upload,
|
||||
appserver,
|
||||
client,
|
||||
monkeypatch,
|
||||
mock_hatchet_client,
|
||||
):
|
||||
# goal: start the server, exchange RTC, receive websocket events
|
||||
# because of that, we need to start the server in a thread
|
||||
# to be able to connect with aiortc
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
|
||||
server, host, port = appserver
|
||||
|
||||
# create a transcript
|
||||
@@ -201,35 +211,30 @@ async def test_transcript_rtc_and_websocket(
|
||||
stream_client.channel.send(json.dumps({"cmd": "STOP"}))
|
||||
await stream_client.stop()
|
||||
|
||||
# wait the processing to finish
|
||||
timeout = 120
|
||||
# Wait for live pipeline to flush (it dispatches post-processing to Hatchet)
|
||||
timeout = 30
|
||||
while True:
|
||||
# fetch the transcript and check if it is ended
|
||||
resp = await client.get(f"/transcripts/{tid}")
|
||||
assert resp.status_code == 200
|
||||
if resp.json()["status"] in ("ended", "error"):
|
||||
if resp.json()["status"] in ("processing", "ended", "error"):
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
timeout -= 1
|
||||
if timeout < 0:
|
||||
raise TimeoutError("Timeout while waiting for transcript to be ended")
|
||||
|
||||
if resp.json()["status"] != "ended":
|
||||
raise TimeoutError("Transcript processing failed")
|
||||
raise TimeoutError("Timeout waiting for live pipeline to finish")
|
||||
|
||||
# stop websocket task
|
||||
websocket_task.cancel()
|
||||
|
||||
# check events
|
||||
# check live pipeline events
|
||||
assert len(events) > 0
|
||||
from pprint import pprint
|
||||
|
||||
pprint(events)
|
||||
|
||||
# get events list
|
||||
eventnames = [e["event"] for e in events]
|
||||
|
||||
# check events
|
||||
# Live pipeline produces TRANSCRIPT and TOPIC events during RTC
|
||||
assert "TRANSCRIPT" in eventnames
|
||||
ev = events[eventnames.index("TRANSCRIPT")]
|
||||
assert ev["data"]["text"].startswith("Hello world.")
|
||||
@@ -242,50 +247,18 @@ async def test_transcript_rtc_and_websocket(
|
||||
assert ev["data"]["transcript"].startswith("Hello world.")
|
||||
assert ev["data"]["timestamp"] == 0.0
|
||||
|
||||
assert "FINAL_LONG_SUMMARY" in eventnames
|
||||
ev = events[eventnames.index("FINAL_LONG_SUMMARY")]
|
||||
assert ev["data"]["long_summary"] == "LLM LONG SUMMARY"
|
||||
|
||||
assert "FINAL_SHORT_SUMMARY" in eventnames
|
||||
ev = events[eventnames.index("FINAL_SHORT_SUMMARY")]
|
||||
assert ev["data"]["short_summary"] == "LLM SHORT SUMMARY"
|
||||
|
||||
assert "FINAL_TITLE" in eventnames
|
||||
ev = events[eventnames.index("FINAL_TITLE")]
|
||||
assert ev["data"]["title"] == "Llm Title"
|
||||
|
||||
assert "WAVEFORM" in eventnames
|
||||
ev = events[eventnames.index("WAVEFORM")]
|
||||
assert isinstance(ev["data"]["waveform"], list)
|
||||
assert len(ev["data"]["waveform"]) >= 250
|
||||
waveform_resp = await client.get(f"/transcripts/{tid}/audio/waveform")
|
||||
assert waveform_resp.status_code == 200
|
||||
assert waveform_resp.headers["content-type"] == "application/json"
|
||||
assert isinstance(waveform_resp.json()["data"], list)
|
||||
assert len(waveform_resp.json()["data"]) >= 250
|
||||
|
||||
# check status order
|
||||
# Live pipeline status progression
|
||||
statuses = [e["data"]["value"] for e in events if e["event"] == "STATUS"]
|
||||
assert "recording" in statuses
|
||||
assert "processing" in statuses
|
||||
assert statuses.index("recording") < statuses.index("processing")
|
||||
assert statuses.index("processing") < statuses.index("ended")
|
||||
|
||||
# ensure the last event received is ended
|
||||
assert events[-1]["event"] == "STATUS"
|
||||
assert events[-1]["data"]["value"] == "ended"
|
||||
|
||||
# check on the latest response that the audio duration is > 0
|
||||
assert resp.json()["duration"] > 0
|
||||
assert "DURATION" in eventnames
|
||||
|
||||
# check that audio/mp3 is available
|
||||
audio_resp = await client.get(f"/transcripts/{tid}/audio/mp3")
|
||||
assert audio_resp.status_code == 200
|
||||
assert audio_resp.headers["Content-Type"] == "audio/mpeg"
|
||||
# Post-processing (WAVEFORM, FINAL_*, DURATION, mp3, STATUS=ended) is now
|
||||
# dispatched to Hatchet via LivePostPipeline — not tested here.
|
||||
# See test_hatchet_live_post_pipeline.py for post-processing tests.
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_database")
|
||||
@pytest.mark.usefixtures("celery_session_app")
|
||||
@pytest.mark.usefixtures("celery_session_worker")
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_rtc_and_websocket_and_fr(
|
||||
tmpdir,
|
||||
@@ -298,11 +271,19 @@ async def test_transcript_rtc_and_websocket_and_fr(
|
||||
fake_mp3_upload,
|
||||
appserver,
|
||||
client,
|
||||
monkeypatch,
|
||||
mock_hatchet_client,
|
||||
):
|
||||
# goal: start the server, exchange RTC, receive websocket events
|
||||
# because of that, we need to start the server in a thread
|
||||
# to be able to connect with aiortc
|
||||
# with target french language
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
|
||||
server, host, port = appserver
|
||||
|
||||
# create a transcript
|
||||
@@ -368,42 +349,34 @@ async def test_transcript_rtc_and_websocket_and_fr(
|
||||
# instead of waiting a long time, we just send a STOP
|
||||
stream_client.channel.send(json.dumps({"cmd": "STOP"}))
|
||||
|
||||
# wait the processing to finish
|
||||
await asyncio.sleep(2)
|
||||
|
||||
await stream_client.stop()
|
||||
|
||||
# wait the processing to finish
|
||||
timeout = 120
|
||||
# Wait for live pipeline to flush
|
||||
timeout = 30
|
||||
while True:
|
||||
# fetch the transcript and check if it is ended
|
||||
resp = await client.get(f"/transcripts/{tid}")
|
||||
assert resp.status_code == 200
|
||||
if resp.json()["status"] == "ended":
|
||||
if resp.json()["status"] in ("processing", "ended", "error"):
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
timeout -= 1
|
||||
if timeout < 0:
|
||||
raise TimeoutError("Timeout while waiting for transcript to be ended")
|
||||
|
||||
if resp.json()["status"] != "ended":
|
||||
raise TimeoutError("Transcript processing failed")
|
||||
|
||||
await asyncio.sleep(2)
|
||||
raise TimeoutError("Timeout waiting for live pipeline to finish")
|
||||
|
||||
# stop websocket task
|
||||
websocket_task.cancel()
|
||||
|
||||
# check events
|
||||
# check live pipeline events
|
||||
assert len(events) > 0
|
||||
from pprint import pprint
|
||||
|
||||
pprint(events)
|
||||
|
||||
# get events list
|
||||
eventnames = [e["event"] for e in events]
|
||||
|
||||
# check events
|
||||
# Live pipeline produces TRANSCRIPT with translation
|
||||
assert "TRANSCRIPT" in eventnames
|
||||
ev = events[eventnames.index("TRANSCRIPT")]
|
||||
assert ev["data"]["text"].startswith("Hello world.")
|
||||
@@ -416,23 +389,11 @@ async def test_transcript_rtc_and_websocket_and_fr(
|
||||
assert ev["data"]["transcript"].startswith("Hello world.")
|
||||
assert ev["data"]["timestamp"] == 0.0
|
||||
|
||||
assert "FINAL_LONG_SUMMARY" in eventnames
|
||||
ev = events[eventnames.index("FINAL_LONG_SUMMARY")]
|
||||
assert ev["data"]["long_summary"] == "LLM LONG SUMMARY"
|
||||
|
||||
assert "FINAL_SHORT_SUMMARY" in eventnames
|
||||
ev = events[eventnames.index("FINAL_SHORT_SUMMARY")]
|
||||
assert ev["data"]["short_summary"] == "LLM SHORT SUMMARY"
|
||||
|
||||
assert "FINAL_TITLE" in eventnames
|
||||
ev = events[eventnames.index("FINAL_TITLE")]
|
||||
assert ev["data"]["title"] == "Llm Title"
|
||||
|
||||
# check status order
|
||||
# Live pipeline status progression
|
||||
statuses = [e["data"]["value"] for e in events if e["event"] == "STATUS"]
|
||||
assert "recording" in statuses
|
||||
assert "processing" in statuses
|
||||
assert statuses.index("recording") < statuses.index("processing")
|
||||
assert statuses.index("processing") < statuses.index("ended")
|
||||
|
||||
# ensure the last event received is ended
|
||||
assert events[-1]["event"] == "STATUS"
|
||||
assert events[-1]["data"]["value"] == "ended"
|
||||
# Post-processing (FINAL_*, STATUS=ended) is now dispatched to Hatchet
|
||||
# via LivePostPipeline — not tested here.
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
import pytest
|
||||
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_create_default_translation(client):
|
||||
async def test_transcript_create_default_translation(monkeypatch, client):
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post("/transcripts", json={"name": "test en"})
|
||||
assert response.status_code == 200
|
||||
assert response.json()["name"] == "test en"
|
||||
@@ -18,7 +23,10 @@ async def test_transcript_create_default_translation(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_create_en_fr_translation(client):
|
||||
async def test_transcript_create_en_fr_translation(monkeypatch, client):
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post(
|
||||
"/transcripts", json={"name": "test en/fr", "target_language": "fr"}
|
||||
)
|
||||
@@ -36,7 +44,10 @@ async def test_transcript_create_en_fr_translation(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_create_fr_en_translation(client):
|
||||
async def test_transcript_create_fr_en_translation(monkeypatch, client):
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
response = await client.post(
|
||||
"/transcripts", json={"name": "test fr/en", "source_language": "fr"}
|
||||
)
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_database")
|
||||
@pytest.mark.usefixtures("celery_session_app")
|
||||
@pytest.mark.usefixtures("celery_session_worker")
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_upload_file(
|
||||
tmpdir,
|
||||
@@ -16,7 +11,14 @@ async def test_transcript_upload_file(
|
||||
dummy_file_diarization,
|
||||
dummy_storage,
|
||||
client,
|
||||
monkeypatch,
|
||||
mock_hatchet_client,
|
||||
):
|
||||
from reflector.settings import settings
|
||||
|
||||
monkeypatch.setattr(
|
||||
settings, "PUBLIC_MODE", True
|
||||
) # public mode: allow anonymous transcript creation for this test
|
||||
# create a transcript
|
||||
response = await client.post("/transcripts", json={"name": "test"})
|
||||
assert response.status_code == 200
|
||||
@@ -37,27 +39,16 @@ async def test_transcript_upload_file(
|
||||
assert response.status_code == 200
|
||||
assert response.json()["status"] == "ok"
|
||||
|
||||
# wait the processing to finish (max 1 minute)
|
||||
timeout_seconds = 60
|
||||
start_time = time.monotonic()
|
||||
while (time.monotonic() - start_time) < timeout_seconds:
|
||||
# fetch the transcript and check if it is ended
|
||||
resp = await client.get(f"/transcripts/{tid}")
|
||||
assert resp.status_code == 200
|
||||
if resp.json()["status"] in ("ended", "error"):
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
else:
|
||||
return pytest.fail(f"Processing timed out after {timeout_seconds} seconds")
|
||||
# Verify Hatchet workflow was dispatched for file processing
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
|
||||
# check the transcript is ended
|
||||
transcript = resp.json()
|
||||
assert transcript["status"] == "ended"
|
||||
assert transcript["short_summary"] == "LLM SHORT SUMMARY"
|
||||
assert transcript["title"] == "Llm Title"
|
||||
HatchetClientManager.start_workflow.assert_called_once_with(
|
||||
"FilePipeline",
|
||||
{"transcript_id": tid},
|
||||
additional_metadata={"transcript_id": tid},
|
||||
)
|
||||
|
||||
# check topics and transcript
|
||||
response = await client.get(f"/transcripts/{tid}/topics")
|
||||
assert response.status_code == 200
|
||||
assert len(response.json()) == 1
|
||||
assert "Hello world. How are you today?" in response.json()[0]["transcript"]
|
||||
# Verify transcript status was updated to "uploaded"
|
||||
resp = await client.get(f"/transcripts/{tid}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "uploaded"
|
||||
|
||||
264
server/tests/test_transcripts_video.py
Normal file
264
server/tests/test_transcripts_video.py
Normal file
@@ -0,0 +1,264 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_returns_404_when_no_meeting(authenticated_client, client):
|
||||
"""Test that video URL returns 404 when transcript has no meeting."""
|
||||
response = await client.post("/transcripts", json={"name": "no-meeting"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
|
||||
response = await client.get(f"/transcripts/{tid}/video/url")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_returns_404_when_no_cloud_video(authenticated_client, client):
|
||||
"""Test that video URL returns 404 when meeting has no cloud video."""
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings
|
||||
|
||||
meeting_id = "test-meeting-no-video"
|
||||
await get_database().execute(
|
||||
meetings.insert().values(
|
||||
id=meeting_id,
|
||||
room_name="No Video Meeting",
|
||||
room_url="https://example.com",
|
||||
host_room_url="https://example.com/host",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=datetime.now(timezone.utc) + timedelta(hours=1),
|
||||
room_id=None,
|
||||
)
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.add(
|
||||
name="with-meeting",
|
||||
source_kind=SourceKind.ROOM,
|
||||
meeting_id=meeting_id,
|
||||
user_id="randomuserid",
|
||||
)
|
||||
|
||||
response = await client.get(f"/transcripts/{transcript.id}/video/url")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_returns_presigned_url(authenticated_client, client):
|
||||
"""Test that video URL returns a presigned URL when cloud video exists."""
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings
|
||||
|
||||
meeting_id = "test-meeting-with-video"
|
||||
await get_database().execute(
|
||||
meetings.insert().values(
|
||||
id=meeting_id,
|
||||
room_name="Video Meeting",
|
||||
room_url="https://example.com",
|
||||
host_room_url="https://example.com/host",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=datetime.now(timezone.utc) + timedelta(hours=1),
|
||||
room_id=None,
|
||||
daily_composed_video_s3_key="recordings/video.mp4",
|
||||
daily_composed_video_duration=120,
|
||||
)
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.add(
|
||||
name="with-video",
|
||||
source_kind=SourceKind.ROOM,
|
||||
meeting_id=meeting_id,
|
||||
user_id="randomuserid",
|
||||
)
|
||||
|
||||
with patch("reflector.views.transcripts_video.get_source_storage") as mock_storage:
|
||||
mock_instance = AsyncMock()
|
||||
mock_instance.get_file_url = AsyncMock(
|
||||
return_value="https://s3.example.com/presigned-url"
|
||||
)
|
||||
mock_storage.return_value = mock_instance
|
||||
|
||||
response = await client.get(f"/transcripts/{transcript.id}/video/url")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["url"] == "https://s3.example.com/presigned-url"
|
||||
assert data["duration"] == 120
|
||||
assert data["content_type"] == "video/mp4"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcript_get_includes_video_fields(authenticated_client, client):
|
||||
"""Test that transcript GET response includes has_cloud_video field."""
|
||||
response = await client.post("/transcripts", json={"name": "video-fields"})
|
||||
assert response.status_code == 200
|
||||
tid = response.json()["id"]
|
||||
|
||||
response = await client.get(f"/transcripts/{tid}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["has_cloud_video"] is False
|
||||
assert data["cloud_video_duration"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_requires_authentication(client):
|
||||
"""Test that video URL endpoint returns 401 for unauthenticated requests."""
|
||||
response = await client.get("/transcripts/any-id/video/url")
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_presigned_params(authenticated_client, client):
|
||||
"""Test that presigned URL is generated with short expiry and inline disposition."""
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings
|
||||
|
||||
meeting_id = "test-meeting-presigned-params"
|
||||
await get_database().execute(
|
||||
meetings.insert().values(
|
||||
id=meeting_id,
|
||||
room_name="Presigned Params Meeting",
|
||||
room_url="https://example.com",
|
||||
host_room_url="https://example.com/host",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=datetime.now(timezone.utc) + timedelta(hours=1),
|
||||
room_id=None,
|
||||
daily_composed_video_s3_key="recordings/video.mp4",
|
||||
daily_composed_video_duration=60,
|
||||
)
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.add(
|
||||
name="presigned-params",
|
||||
source_kind=SourceKind.ROOM,
|
||||
meeting_id=meeting_id,
|
||||
user_id="randomuserid",
|
||||
)
|
||||
|
||||
with patch("reflector.views.transcripts_video.get_source_storage") as mock_storage:
|
||||
mock_instance = AsyncMock()
|
||||
mock_instance.get_file_url = AsyncMock(
|
||||
return_value="https://s3.example.com/presigned-url"
|
||||
)
|
||||
mock_storage.return_value = mock_instance
|
||||
|
||||
await client.get(f"/transcripts/{transcript.id}/video/url")
|
||||
|
||||
mock_instance.get_file_url.assert_called_once_with(
|
||||
"recordings/video.mp4",
|
||||
operation="get_object",
|
||||
expires_in=900,
|
||||
extra_params={
|
||||
"ResponseContentDisposition": "inline",
|
||||
"ResponseContentType": "video/mp4",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
async def _create_meeting_with_video(meeting_id):
|
||||
"""Helper to create a meeting with cloud video."""
|
||||
from reflector.db import get_database
|
||||
from reflector.db.meetings import meetings
|
||||
|
||||
await get_database().execute(
|
||||
meetings.insert().values(
|
||||
id=meeting_id,
|
||||
room_name="Video Meeting",
|
||||
room_url="https://example.com",
|
||||
host_room_url="https://example.com/host",
|
||||
start_date=datetime.now(timezone.utc),
|
||||
end_date=datetime.now(timezone.utc) + timedelta(hours=1),
|
||||
room_id=None,
|
||||
daily_composed_video_s3_key="recordings/video.mp4",
|
||||
daily_composed_video_duration=60,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_private_transcript_denies_non_owner(
|
||||
authenticated_client, client
|
||||
):
|
||||
"""Authenticated non-owner cannot access video for a private transcript."""
|
||||
meeting_id = "test-meeting-private-deny"
|
||||
await _create_meeting_with_video(meeting_id)
|
||||
|
||||
transcript = await transcripts_controller.add(
|
||||
name="private-video",
|
||||
source_kind=SourceKind.ROOM,
|
||||
meeting_id=meeting_id,
|
||||
user_id="other-owner",
|
||||
share_mode="private",
|
||||
)
|
||||
|
||||
with patch("reflector.views.transcripts_video.get_source_storage") as mock_storage:
|
||||
mock_instance = AsyncMock()
|
||||
mock_instance.get_file_url = AsyncMock(
|
||||
return_value="https://s3.example.com/url"
|
||||
)
|
||||
mock_storage.return_value = mock_instance
|
||||
|
||||
response = await client.get(f"/transcripts/{transcript.id}/video/url")
|
||||
|
||||
assert response.status_code == 403
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_public_transcript_allows_authenticated_non_owner(
|
||||
authenticated_client, client
|
||||
):
|
||||
"""Authenticated non-owner can access video for a public transcript."""
|
||||
meeting_id = "test-meeting-public-allow"
|
||||
await _create_meeting_with_video(meeting_id)
|
||||
|
||||
transcript = await transcripts_controller.add(
|
||||
name="public-video",
|
||||
source_kind=SourceKind.ROOM,
|
||||
meeting_id=meeting_id,
|
||||
user_id="other-owner",
|
||||
share_mode="public",
|
||||
)
|
||||
|
||||
with patch("reflector.views.transcripts_video.get_source_storage") as mock_storage:
|
||||
mock_instance = AsyncMock()
|
||||
mock_instance.get_file_url = AsyncMock(
|
||||
return_value="https://s3.example.com/url"
|
||||
)
|
||||
mock_storage.return_value = mock_instance
|
||||
|
||||
response = await client.get(f"/transcripts/{transcript.id}/video/url")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_video_url_semi_private_allows_authenticated_non_owner(
|
||||
authenticated_client, client
|
||||
):
|
||||
"""Authenticated non-owner can access video for a semi-private transcript."""
|
||||
meeting_id = "test-meeting-semi-private-allow"
|
||||
await _create_meeting_with_video(meeting_id)
|
||||
|
||||
transcript = await transcripts_controller.add(
|
||||
name="semi-private-video",
|
||||
source_kind=SourceKind.ROOM,
|
||||
meeting_id=meeting_id,
|
||||
user_id="other-owner",
|
||||
share_mode="semi-private",
|
||||
)
|
||||
|
||||
with patch("reflector.views.transcripts_video.get_source_storage") as mock_storage:
|
||||
mock_instance = AsyncMock()
|
||||
mock_instance.get_file_url = AsyncMock(
|
||||
return_value="https://s3.example.com/url"
|
||||
)
|
||||
mock_storage.return_value = mock_instance
|
||||
|
||||
response = await client.get(f"/transcripts/{transcript.id}/video/url")
|
||||
|
||||
assert response.status_code == 200
|
||||
@@ -141,33 +141,19 @@ async def test_user_ws_accepts_valid_token_and_receives_events(appserver_ws_user
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
# Emit an event to the user's room via a standard HTTP action
|
||||
# Use a real HTTP request to the server with the JWT token so that
|
||||
# current_user_optional_if_public_mode is exercised without dependency overrides
|
||||
from httpx import AsyncClient
|
||||
|
||||
from reflector.app import app
|
||||
from reflector.auth import current_user, current_user_optional
|
||||
|
||||
# Override auth dependencies so HTTP request is performed as the same user
|
||||
# Use the internal user.id (not the Authentik UID)
|
||||
app.dependency_overrides[current_user] = lambda: {
|
||||
"sub": user.id,
|
||||
"email": "user-abc@example.com",
|
||||
}
|
||||
app.dependency_overrides[current_user_optional] = lambda: {
|
||||
"sub": user.id,
|
||||
"email": "user-abc@example.com",
|
||||
}
|
||||
|
||||
# Use in-memory client (global singleton makes it share ws_manager)
|
||||
async with AsyncClient(app=app, base_url=f"http://{host}:{port}/v1") as ac:
|
||||
# Create a transcript as this user so that the server publishes TRANSCRIPT_CREATED to user room
|
||||
resp = await ac.post("/transcripts", json={"name": "WS Test"})
|
||||
async with AsyncClient(base_url=f"http://{host}:{port}/v1") as ac:
|
||||
resp = await ac.post(
|
||||
"/transcripts",
|
||||
json={"name": "WS Test"},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
# Receive the published event
|
||||
msg = await ws.receive_json()
|
||||
assert msg["event"] == "TRANSCRIPT_CREATED"
|
||||
assert "id" in msg["data"]
|
||||
|
||||
# Clean overrides
|
||||
del app.dependency_overrides[current_user]
|
||||
del app.dependency_overrides[current_user_optional]
|
||||
|
||||
109
server/uv.lock
generated
109
server/uv.lock
generated
@@ -188,6 +188,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aiosmtplib"
|
||||
version = "5.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e7/ad/240a7ce4e50713b111dff8b781a898d8d4770e5d6ad4899103f84c86005c/aiosmtplib-5.1.0.tar.gz", hash = "sha256:2504a23b2b63c9de6bc4ea719559a38996dba68f73f6af4eb97be20ee4c5e6c4", size = 66176, upload-time = "2026-01-25T01:51:11.408Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/37/82/70f2c452acd7ed18c558c8ace9a8cf4fdcc70eae9a41749b5bdc53eb6f45/aiosmtplib-5.1.0-py3-none-any.whl", hash = "sha256:368029440645b486b69db7029208a7a78c6691b90d24a5332ddba35d9109d55b", size = 27778, upload-time = "2026-01-25T01:51:10.026Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aiosqlite"
|
||||
version = "0.21.0"
|
||||
@@ -404,7 +413,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "black"
|
||||
version = "24.3.0"
|
||||
version = "26.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "click" },
|
||||
@@ -412,18 +421,21 @@ dependencies = [
|
||||
{ name = "packaging" },
|
||||
{ name = "pathspec" },
|
||||
{ name = "platformdirs" },
|
||||
{ name = "pytokens" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/8f/5f/bac24a952668c7482cfdb4ebf91ba57a796c9da8829363a772040c1a3312/black-24.3.0.tar.gz", hash = "sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f", size = 634292, upload-time = "2024-03-15T19:35:43.699Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e1/c5/61175d618685d42b005847464b8fb4743a67b1b8fdb75e50e5a96c31a27a/black-26.3.1.tar.gz", hash = "sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07", size = 666155, upload-time = "2026-03-12T03:36:03.593Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/68/df/ceea5828be9c4931cb5a75b7e8fb02971f57524da7a16dfec0d4d575327f/black-24.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9", size = 1571235, upload-time = "2024-03-15T19:45:27.77Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/5f/30398c5056cb72f883b32b6520ad00042a9d0454b693f70509867db03a80/black-24.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597", size = 1414926, upload-time = "2024-03-15T19:43:52.993Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/59/498885b279e890f656ea4300a2671c964acb6d97994ea626479c2e5501b4/black-24.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d", size = 1725920, upload-time = "2024-03-15T19:38:13.052Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8f/b0/4bef40c808cc615187db983b75bacdca1c110a229d41ba9887549fac529c/black-24.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5", size = 1372608, upload-time = "2024-03-15T19:39:34.973Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b6/c6/1d174efa9ff02b22d0124c73fc5f4d4fb006d0d9a081aadc354d05754a13/black-24.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f", size = 1600822, upload-time = "2024-03-15T19:45:20.337Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/ed/704731afffe460b8ff0672623b40fce9fe569f2ee617c15857e4d4440a3a/black-24.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11", size = 1429987, upload-time = "2024-03-15T19:45:00.637Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/05/8dd038e30caadab7120176d4bc109b7ca2f4457f12eef746b0560a583458/black-24.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4", size = 1755319, upload-time = "2024-03-15T19:38:24.009Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/9d/e5fa1ff4ef1940be15a64883c0bb8d2fcf626efec996eab4ae5a8c691d2c/black-24.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5", size = 1385180, upload-time = "2024-03-15T19:39:37.014Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/ea/31770a7e49f3eedfd8cd7b35e78b3a3aaad860400f8673994bc988318135/black-24.3.0-py3-none-any.whl", hash = "sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93", size = 201493, upload-time = "2024-03-15T19:35:41.572Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/17/57/5f11c92861f9c92eb9dddf515530bc2d06db843e44bdcf1c83c1427824bc/black-26.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:28ef38aee69e4b12fda8dba75e21f9b4f979b490c8ac0baa7cb505369ac9e1ff", size = 1851987, upload-time = "2026-03-12T03:40:06.248Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/aa/340a1463660bf6831f9e39646bf774086dbd8ca7fc3cded9d59bbdf4ad0a/black-26.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf9bf162ed91a26f1adba8efda0b573bc6924ec1408a52cc6f82cb73ec2b142c", size = 1689499, upload-time = "2026-03-12T03:40:07.642Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/01/b726c93d717d72733da031d2de10b92c9fa4c8d0c67e8a8a372076579279/black-26.3.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:474c27574d6d7037c1bc875a81d9be0a9a4f9ee95e62800dab3cfaadbf75acd5", size = 1754369, upload-time = "2026-03-12T03:40:09.279Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/09/61e91881ca291f150cfc9eb7ba19473c2e59df28859a11a88248b5cbbc4d/black-26.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5e9d0d86df21f2e1677cc4bd090cd0e446278bcbbe49bf3659c308c3e402843e", size = 1413613, upload-time = "2026-03-12T03:40:10.943Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/16/73/544f23891b22e7efe4d8f812371ab85b57f6a01b2fc45e3ba2e52ba985b8/black-26.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:9a5e9f45e5d5e1c5b5c29b3bd4265dcc90e8b92cf4534520896ed77f791f4da5", size = 1219719, upload-time = "2026-03-12T03:40:12.597Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/f8/da5eae4fc75e78e6dceb60624e1b9662ab00d6b452996046dfa9b8a6025b/black-26.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1", size = 1895920, upload-time = "2026-03-12T03:40:13.921Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/9f/04e6f26534da2e1629b2b48255c264cabf5eedc5141d04516d9d68a24111/black-26.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f", size = 1718499, upload-time = "2026-03-12T03:40:15.239Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/91/a5935b2a63e31b331060c4a9fdb5a6c725840858c599032a6f3aac94055f/black-26.3.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7", size = 1794994, upload-time = "2026-03-12T03:40:17.124Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/0a/86e462cdd311a3c2a8ece708d22aba17d0b2a0d5348ca34b40cdcbea512e/black-26.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983", size = 1420867, upload-time = "2026-03-12T03:40:18.83Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/e5/22515a19cb7eaee3440325a6b0d95d2c0e88dd180cb011b12ae488e031d1/black-26.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb", size = 1230124, upload-time = "2026-03-12T03:40:20.425Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/0d/52d98722666d6fc6c3dd4c76df339501d6efd40e0ff95e6186a7b7f0befd/black-26.3.1-py3-none-any.whl", hash = "sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b", size = 207542, upload-time = "2026-03-12T03:36:01.668Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2252,7 +2264,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "nltk"
|
||||
version = "3.9.3"
|
||||
version = "3.9.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "click" },
|
||||
@@ -2260,9 +2272,9 @@ dependencies = [
|
||||
{ name = "regex" },
|
||||
{ name = "tqdm" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e1/8f/915e1c12df07c70ed779d18ab83d065718a926e70d3ea33eb0cd66ffb7c0/nltk-3.9.3.tar.gz", hash = "sha256:cb5945d6424a98d694c2b9a0264519fab4363711065a46aa0ae7a2195b92e71f", size = 2923673, upload-time = "2026-02-24T12:05:53.833Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/74/a1/b3b4adf15585a5bc4c357adde150c01ebeeb642173ded4d871e89468767c/nltk-3.9.4.tar.gz", hash = "sha256:ed03bc098a40481310320808b2db712d95d13ca65b27372f8a403949c8b523d0", size = 2946864, upload-time = "2026-03-24T06:13:40.641Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/7e/9af5a710a1236e4772de8dfcc6af942a561327bb9f42b5b4a24d0cf100fd/nltk-3.9.3-py3-none-any.whl", hash = "sha256:60b3db6e9995b3dd976b1f0fa7dec22069b2677e759c28eb69b62ddd44870522", size = 1525385, upload-time = "2026-02-24T12:05:46.54Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/91/04e965f8e717ba0ab4bdca5c112deeab11c9e750d94c4d4602f050295d39/nltk-3.9.4-py3-none-any.whl", hash = "sha256:f2fa301c3a12718ce4a0e9305c5675299da5ad9e26068218b69d692fda84828f", size = 1552087, upload-time = "2026-03-24T06:13:38.47Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2433,11 +2445,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pathspec"
|
||||
version = "0.12.1"
|
||||
version = "1.0.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2915,11 +2927,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pyjwt"
|
||||
version = "2.11.0"
|
||||
version = "2.12.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
@@ -2951,15 +2963,15 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pyopenssl"
|
||||
version = "25.3.0"
|
||||
version = "26.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/8e/11/a62e1d33b373da2b2c2cd9eb508147871c80f12b1cacde3c5d314922afdd/pyopenssl-26.0.0.tar.gz", hash = "sha256:f293934e52936f2e3413b89c6ce36df66a0b34ae1ea3a053b8c5020ff2f513fc", size = 185534, upload-time = "2026-03-15T14:28:26.353Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/7d/d4f7d908fa8415571771b30669251d57c3cf313b36a856e6d7548ae01619/pyopenssl-26.0.0-py3-none-any.whl", hash = "sha256:df94d28498848b98cc1c0ffb8ef1e71e40210d3b0a8064c9d29571ed2904bf81", size = 57969, upload-time = "2026-03-15T14:28:24.864Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2973,11 +2985,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pypdf"
|
||||
version = "6.7.5"
|
||||
version = "6.9.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f6/52/37cc0aa9e9d1bf7729a737a0d83f8b3f851c8eb137373d9f71eafb0a3405/pypdf-6.7.5.tar.gz", hash = "sha256:40bb2e2e872078655f12b9b89e2f900888bb505e88a82150b64f9f34fa25651d", size = 5304278, upload-time = "2026-03-02T09:05:21.464Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/31/83/691bdb309306232362503083cb15777491045dd54f45393a317dc7d8082f/pypdf-6.9.2.tar.gz", hash = "sha256:7f850faf2b0d4ab936582c05da32c52214c2b089d61a316627b5bfb5b0dab46c", size = 5311837, upload-time = "2026-03-23T14:53:27.983Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/05/89/336673efd0a88956562658aba4f0bbef7cb92a6fbcbcaf94926dbc82b408/pypdf-6.7.5-py3-none-any.whl", hash = "sha256:07ba7f1d6e6d9aa2a17f5452e320a84718d4ce863367f7ede2fd72280349ab13", size = 331421, upload-time = "2026-03-02T09:05:19.722Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a5/7e/c85f41243086a8fe5d1baeba527cb26a1918158a565932b41e0f7c0b32e9/pypdf-6.9.2-py3-none-any.whl", hash = "sha256:662cf29bcb419a36a1365232449624ab40b7c2d0cfc28e54f42eeecd1fd7e844", size = 333744, upload-time = "2026-03-23T14:53:26.573Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3159,6 +3171,25 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytokens"
|
||||
version = "0.4.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b6/34/b4e015b99031667a7b960f888889c5bd34ef585c85e1cb56a594b92836ac/pytokens-0.4.1.tar.gz", hash = "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a", size = 23015, upload-time = "2026-01-30T01:03:45.924Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/92/790ebe03f07b57e53b10884c329b9a1a308648fc083a6d4a39a10a28c8fc/pytokens-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440", size = 160864, upload-time = "2026-01-30T01:02:57.882Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/13/25/a4f555281d975bfdd1eba731450e2fe3a95870274da73fb12c40aeae7625/pytokens-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc", size = 248565, upload-time = "2026-01-30T01:02:59.912Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/17/50/bc0394b4ad5b1601be22fa43652173d47e4c9efbf0044c62e9a59b747c56/pytokens-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d", size = 260824, upload-time = "2026-01-30T01:03:01.471Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/54/3e04f9d92a4be4fc6c80016bc396b923d2a6933ae94b5f557c939c460ee0/pytokens-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16", size = 264075, upload-time = "2026-01-30T01:03:04.143Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/1b/44b0326cb5470a4375f37988aea5d61b5cc52407143303015ebee94abfd6/pytokens-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6", size = 103323, upload-time = "2026-01-30T01:03:05.412Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/5d/e44573011401fb82e9d51e97f1290ceb377800fb4eed650b96f4753b499c/pytokens-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083", size = 160663, upload-time = "2026-01-30T01:03:06.473Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f0/e6/5bbc3019f8e6f21d09c41f8b8654536117e5e211a85d89212d59cbdab381/pytokens-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1", size = 255626, upload-time = "2026-01-30T01:03:08.177Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/3c/2d5297d82286f6f3d92770289fd439956b201c0a4fc7e72efb9b2293758e/pytokens-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1", size = 269779, upload-time = "2026-01-30T01:03:09.756Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/20/01/7436e9ad693cebda0551203e0bf28f7669976c60ad07d6402098208476de/pytokens-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9", size = 268076, upload-time = "2026-01-30T01:03:10.957Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2e/df/533c82a3c752ba13ae7ef238b7f8cdd272cf1475f03c63ac6cf3fcfb00b6/pytokens-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68", size = 103552, upload-time = "2026-01-30T01:03:12.066Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/78/397db326746f0a342855b81216ae1f0a32965deccfd7c830a2dbc66d2483/pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", size = 13729, upload-time = "2026-01-30T01:03:45.029Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytorch-lightning"
|
||||
version = "2.5.6"
|
||||
@@ -3321,10 +3352,12 @@ dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
{ name = "aiohttp-cors" },
|
||||
{ name = "aiortc" },
|
||||
{ name = "aiosmtplib" },
|
||||
{ name = "alembic" },
|
||||
{ name = "av" },
|
||||
{ name = "celery" },
|
||||
{ name = "databases", extra = ["aiosqlite", "asyncpg"] },
|
||||
{ name = "email-validator" },
|
||||
{ name = "fastapi", extra = ["standard"] },
|
||||
{ name = "fastapi-pagination" },
|
||||
{ name = "hatchet-sdk" },
|
||||
@@ -3400,10 +3433,12 @@ requires-dist = [
|
||||
{ name = "aiohttp", specifier = ">=3.9.0" },
|
||||
{ name = "aiohttp-cors", specifier = ">=0.7.0" },
|
||||
{ name = "aiortc", specifier = ">=1.5.0" },
|
||||
{ name = "aiosmtplib", specifier = ">=3.0.0" },
|
||||
{ name = "alembic", specifier = ">=1.11.3" },
|
||||
{ name = "av", specifier = ">=15.0.0" },
|
||||
{ name = "celery", specifier = ">=5.3.4" },
|
||||
{ name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
|
||||
{ name = "email-validator", specifier = ">=2.0.0" },
|
||||
{ name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
|
||||
{ name = "fastapi-pagination", specifier = ">=0.14.2" },
|
||||
{ name = "hatchet-sdk", specifier = "==1.22.16" },
|
||||
@@ -3508,7 +3543,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.32.4"
|
||||
version = "2.33.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
@@ -3516,9 +3551,9 @@ dependencies = [
|
||||
{ name = "idna" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258, upload-time = "2025-06-09T16:43:07.34Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/34/64/8860370b167a9721e8956ae116825caff829224fbca0ca6e7bf8ddef8430/requests-2.33.0.tar.gz", hash = "sha256:c7ebc5e8b0f21837386ad0e1c8fe8b829fa5f544d8df3b2253bff14ef29d7652", size = 134232, upload-time = "2026-03-25T15:10:41.586Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4268,10 +4303,10 @@ dependencies = [
|
||||
{ name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9276857d241c6de257af765c0f51fc011af38cb725401495121b280913007cf" },
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4573c6042950c20278e3608a9a38050ba0bc72e0049e1bbfd249caf859a8029b" },
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ddef94bf181e6447cbb05f38beaca8f6c5bb8d2b9ddced1aa3452025b9fc70d3" },
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:862e2e40bf09d865e5df080a84c1a39bbcef40e43140f4b1737eb3a389d3b38f" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9276857d241c6de257af765c0f51fc011af38cb725401495121b280913007cf" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4573c6042950c20278e3608a9a38050ba0bc72e0049e1bbfd249caf859a8029b" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ddef94bf181e6447cbb05f38beaca8f6c5bb8d2b9ddced1aa3452025b9fc70d3" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:862e2e40bf09d865e5df080a84c1a39bbcef40e43140f4b1737eb3a389d3b38f" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4294,10 +4329,10 @@ dependencies = [
|
||||
{ name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation == 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'CPython' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e54bd7fc9472019308097d99102df9acee22aa2451ae808d27840bc874320292" },
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:db37df7eee906f8fe0a639fdc673f3541cb2e173169b16d4133447eb922d1938" },
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9377faee65a290578280ac7f4884c3586253dac2ca28c60f458ff6efe86a6b05" },
|
||||
{ url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:9b302192b570657c1cc787a4d487ae4bbb7f2aab1c01b1fcc46757e7f86f391e" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e54bd7fc9472019308097d99102df9acee22aa2451ae808d27840bc874320292" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:db37df7eee906f8fe0a639fdc673f3541cb2e173169b16d4133447eb922d1938" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9377faee65a290578280ac7f4884c3586253dac2ca28c60f458ff6efe86a6b05" },
|
||||
{ url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:9b302192b570657c1cc787a4d487ae4bbb7f2aab1c01b1fcc46757e7f86f391e" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user