Compare commits

..

1 Commits

Author SHA1 Message Date
opencode
47cb75ee39 feat: add background information field to room model
- Add background_information field to Room database table and model
- Create database migration for the new field
- Update API schemas (CreateRoom, UpdateRoom) to handle background_information
- Integrate room context into AI summarization prompts
- Add background_information field to frontend room form
- Update TypeScript types from regenerated OpenAPI spec

The background information will be used to provide context for AI-generated
summaries, helping create more appropriate and relevant meeting summaries.

🤖 Generated with [opencode](https://opencode.ai)

Co-Authored-By: opencode <noreply@opencode.ai>
2025-07-29 01:53:13 +00:00
419 changed files with 20537 additions and 56203 deletions

View File

@@ -1,21 +1,19 @@
<!--- Provide a general summary of your changes in the Title above -->
## ⚠️ Insert the PR TITLE replacing this text ⚠️
## Description
<!--- Describe your changes in detail -->
⚠️ Describe your PR replacing this text. Post screenshots or videos whenever possible. ⚠️
## Related Issue
<!--- This project only accepts pull requests related to open issues -->
<!--- If suggesting a new feature or change, please discuss it in an issue first -->
<!--- If fixing a bug, there should be an issue describing it with steps to reproduce -->
<!--- Please link to the issue here: -->
### Checklist
## Motivation and Context
<!--- Why is this change required? What problem does it solve? -->
<!--- If it fixes an open issue, please link to the issue here. -->
- [ ] My branch is updated with main (mandatory)
- [ ] I wrote unit tests for this (if applies)
- [ ] I have included migrations and tested them locally (if applies)
- [ ] I have manually tested this feature locally
## How Has This Been Tested?
<!--- Please describe in detail how you tested your changes. -->
<!--- Include details of your testing environment, and the tests you ran to -->
<!--- see how your change affects other areas of the code, etc. -->
> IMPORTANT: Remember that you are responsible for merging this PR after it's been reviewed, and once deployed
> you should perform manual testing to make sure everything went smoothly.
### Urgency
- [ ] Urgent (deploy ASAP)
- [ ] Non-urgent (deploying in next release is ok)
## Screenshots (if appropriate):

View File

@@ -0,0 +1,19 @@
name: Conventional commit PR
on: [pull_request]
jobs:
cog_check_job:
runs-on: ubuntu-latest
name: check conventional commit compliance
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# pick the pr HEAD instead of the merge commit
ref: ${{ github.event.pull_request.head.sha }}
- name: Conventional commit check
uses: cocogitto/cocogitto-action@v3
with:
check-latest-tag-only: true

View File

@@ -2,8 +2,6 @@ name: Test Database Migrations
on:
push:
branches:
- main
paths:
- "server/migrations/**"
- "server/reflector/db/**"
@@ -19,43 +17,10 @@ on:
jobs:
test-migrations:
runs-on: ubuntu-latest
concurrency:
group: db-ubuntu-latest-${{ github.ref }}
cancel-in-progress: true
services:
postgres:
image: postgres:17
env:
POSTGRES_USER: reflector
POSTGRES_PASSWORD: reflector
POSTGRES_DB: reflector
ports:
- 5432:5432
options: >-
--health-cmd pg_isready -h 127.0.0.1 -p 5432
--health-interval 10s
--health-timeout 5s
--health-retries 5
env:
DATABASE_URL: postgresql://reflector:reflector@localhost:5432/reflector
steps:
- uses: actions/checkout@v4
- name: Install PostgreSQL client
run: sudo apt-get update && sudo apt-get install -y postgresql-client | cat
- name: Wait for Postgres
run: |
for i in {1..30}; do
if pg_isready -h localhost -p 5432; then
echo "Postgres is ready"
break
fi
echo "Waiting for Postgres... ($i)" && sleep 1
done
- name: Install uv
uses: astral-sh/setup-uv@v3
with:

View File

@@ -8,30 +8,18 @@ env:
ECR_REPOSITORY: reflector
jobs:
build:
strategy:
matrix:
include:
- platform: linux/amd64
runner: linux-amd64
arch: amd64
- platform: linux/arm64
runner: linux-arm64
arch: arm64
runs-on: ${{ matrix.runner }}
deploy:
runs-on: ubuntu-latest
permissions:
deployments: write
contents: read
outputs:
registry: ${{ steps.login-ecr.outputs.registry }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
uses: aws-actions/configure-aws-credentials@0e613a0980cbf65ed5b322eb7a1e075d28913a83
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -39,52 +27,21 @@ jobs:
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
uses: aws-actions/amazon-ecr-login@62f4f872db3836360b72999f4b87f1ff13310f3a
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Build and push ${{ matrix.arch }}
uses: docker/build-push-action@v5
- name: Build and push
id: docker_build
uses: docker/build-push-action@v4
with:
context: server
platforms: ${{ matrix.platform }}
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }}:latest-${{ matrix.arch }}
cache-from: type=gha,scope=${{ matrix.arch }}
cache-to: type=gha,mode=max,scope=${{ matrix.arch }}
github-token: ${{ secrets.GHA_CACHE_TOKEN }}
provenance: false
create-manifest:
runs-on: ubuntu-latest
needs: [build]
permissions:
deployments: write
contents: read
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Login to Amazon ECR
uses: aws-actions/amazon-ecr-login@v2
- name: Create and push multi-arch manifest
run: |
# Get the registry URL (since we can't easily access job outputs in matrix)
ECR_REGISTRY=$(aws ecr describe-registry --query 'registryId' --output text).dkr.ecr.${{ env.AWS_REGION }}.amazonaws.com
docker manifest create \
$ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest \
$ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest-amd64 \
$ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest-arm64
docker manifest push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest
echo "✅ Multi-arch manifest pushed: $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest"
tags: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -1,57 +0,0 @@
name: Build and Push Frontend Docker Image
on:
push:
branches:
- main
paths:
- 'www/**'
- '.github/workflows/docker-frontend.yml'
workflow_dispatch:
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}-frontend
jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: ./www
file: ./www/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64,linux/arm64

View File

@@ -1,24 +0,0 @@
name: pre-commit
on:
pull_request:
push:
branches: [main]
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
- uses: pnpm/action-setup@v4
with:
version: 10
- uses: actions/setup-node@v4
with:
node-version: 22
cache: "pnpm"
cache-dependency-path: "www/pnpm-lock.yaml"
- name: Install dependencies
run: cd www && pnpm install --frozen-lockfile
- uses: pre-commit/action@v3.0.1

View File

@@ -1,45 +0,0 @@
name: Test Next Server
on:
pull_request:
paths:
- "www/**"
push:
branches:
- main
paths:
- "www/**"
jobs:
test-next-server:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./www
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
version: 8
- name: Setup Node.js cache
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'pnpm'
cache-dependency-path: './www/pnpm-lock.yaml'
- name: Install dependencies
run: pnpm install
- name: Run tests
run: pnpm test

View File

@@ -5,17 +5,12 @@ on:
paths:
- "server/**"
push:
branches:
- main
paths:
- "server/**"
jobs:
pytest:
runs-on: ubuntu-latest
concurrency:
group: pytest-${{ github.ref }}
cancel-in-progress: true
services:
redis:
image: redis:6
@@ -24,47 +19,29 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
working-directory: server
- name: Tests
run: |
cd server
uv run -m pytest -v tests
docker-amd64:
runs-on: linux-amd64
concurrency:
group: docker-amd64-${{ github.ref }}
cancel-in-progress: true
docker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build AMD64
uses: docker/build-push-action@v6
uses: docker/setup-buildx-action@v2
- name: Build and push
id: docker_build
uses: docker/build-push-action@v4
with:
context: server
platforms: linux/amd64
cache-from: type=gha,scope=amd64
cache-to: type=gha,mode=max,scope=amd64
github-token: ${{ secrets.GHA_CACHE_TOKEN }}
docker-arm64:
runs-on: linux-arm64
concurrency:
group: docker-arm64-${{ github.ref }}
cancel-in-progress: true
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build ARM64
uses: docker/build-push-action@v6
with:
context: server
platforms: linux/arm64
cache-from: type=gha,scope=arm64
cache-to: type=gha,mode=max,scope=arm64
github-token: ${{ secrets.GHA_CACHE_TOKEN }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max

5
.gitignore vendored
View File

@@ -13,8 +13,3 @@ restart-dev.sh
data/
www/REFACTOR.md
www/reload-frontend
server/test.sqlite
CLAUDE.local.md
www/.env.development
www/.env.production
.playwright-mcp

View File

@@ -1 +0,0 @@
b9d891d3424f371642cb032ecfd0e2564470a72c:server/tests/test_transcripts_recording_deletion.py:generic-api-key:15

View File

@@ -3,10 +3,10 @@
repos:
- repo: local
hooks:
- id: format
name: run format
- id: yarn-format
name: run yarn format
language: system
entry: bash -c 'cd www && pnpm format'
entry: bash -c 'cd www && yarn format'
pass_filenames: false
files: ^www/
@@ -15,20 +15,25 @@ repos:
hooks:
- id: debug-statements
- id: trailing-whitespace
exclude: ^server/trials
- id: detect-private-key
- repo: https://github.com/psf/black
rev: 24.1.1
hooks:
- id: black
files: ^server/(reflector|tests)/
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
files: ^server/(gpu|evaluate|reflector)/
args: [ "--profile", "black", "--filter-files" ]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.2
rev: v0.6.5
hooks:
- id: ruff
args:
- --fix
# Uses select rules from server/pyproject.toml
files: ^server/
- id: ruff-format
files: ^server/
- repo: https://github.com/gitleaks/gitleaks
rev: v8.28.0
hooks:
- id: gitleaks
files: ^server/(reflector|tests)/

View File

@@ -1,214 +1,5 @@
# Changelog
## [0.14.0](https://github.com/Monadical-SAS/reflector/compare/v0.13.1...v0.14.0) (2025-10-08)
### Features
* Add calendar event data to transcript webhook payload ([#689](https://github.com/Monadical-SAS/reflector/issues/689)) ([5f6910e](https://github.com/Monadical-SAS/reflector/commit/5f6910e5131b7f28f86c9ecdcc57fed8412ee3cd))
* container build for www / github ([#672](https://github.com/Monadical-SAS/reflector/issues/672)) ([969bd84](https://github.com/Monadical-SAS/reflector/commit/969bd84fcc14851d1a101412a0ba115f1b7cde82))
* docker-compose for production frontend ([#664](https://github.com/Monadical-SAS/reflector/issues/664)) ([5bf64b5](https://github.com/Monadical-SAS/reflector/commit/5bf64b5a41f64535e22849b4bb11734d4dbb4aae))
### Bug Fixes
* restore feature boolean logic ([#671](https://github.com/Monadical-SAS/reflector/issues/671)) ([3660884](https://github.com/Monadical-SAS/reflector/commit/36608849ec64e953e3be456172502762e3c33df9))
* security review ([#656](https://github.com/Monadical-SAS/reflector/issues/656)) ([5d98754](https://github.com/Monadical-SAS/reflector/commit/5d98754305c6c540dd194dda268544f6d88bfaf8))
* update transcript list on reprocess ([#676](https://github.com/Monadical-SAS/reflector/issues/676)) ([9a71af1](https://github.com/Monadical-SAS/reflector/commit/9a71af145ee9b833078c78d0c684590ab12e9f0e))
* upgrade nemo toolkit ([#678](https://github.com/Monadical-SAS/reflector/issues/678)) ([eef6dc3](https://github.com/Monadical-SAS/reflector/commit/eef6dc39037329b65804297786d852dddb0557f9))
## [0.13.1](https://github.com/Monadical-SAS/reflector/compare/v0.13.0...v0.13.1) (2025-09-22)
### Bug Fixes
* TypeError on not all arguments converted during string formatting in logger ([#667](https://github.com/Monadical-SAS/reflector/issues/667)) ([565a629](https://github.com/Monadical-SAS/reflector/commit/565a62900f5a02fc946b68f9269a42190ed70ab6))
## [0.13.0](https://github.com/Monadical-SAS/reflector/compare/v0.12.1...v0.13.0) (2025-09-19)
### Features
* room form edit with enter ([#662](https://github.com/Monadical-SAS/reflector/issues/662)) ([47716f6](https://github.com/Monadical-SAS/reflector/commit/47716f6e5ddee952609d2fa0ffabdfa865286796))
### Bug Fixes
* invalid cleanup call ([#660](https://github.com/Monadical-SAS/reflector/issues/660)) ([0abcebf](https://github.com/Monadical-SAS/reflector/commit/0abcebfc9491f87f605f21faa3e53996fafedd9a))
## [0.12.1](https://github.com/Monadical-SAS/reflector/compare/v0.12.0...v0.12.1) (2025-09-17)
### Bug Fixes
* production blocked because having existing meeting with room_id null ([#657](https://github.com/Monadical-SAS/reflector/issues/657)) ([870e860](https://github.com/Monadical-SAS/reflector/commit/870e8605171a27155a9cbee215eeccb9a8d6c0a2))
## [0.12.0](https://github.com/Monadical-SAS/reflector/compare/v0.11.0...v0.12.0) (2025-09-17)
### Features
* calendar integration ([#608](https://github.com/Monadical-SAS/reflector/issues/608)) ([6f680b5](https://github.com/Monadical-SAS/reflector/commit/6f680b57954c688882c4ed49f40f161c52a00a24))
* self-hosted gpu api ([#636](https://github.com/Monadical-SAS/reflector/issues/636)) ([ab859d6](https://github.com/Monadical-SAS/reflector/commit/ab859d65a6bded904133a163a081a651b3938d42))
### Bug Fixes
* ignore player hotkeys for text inputs ([#646](https://github.com/Monadical-SAS/reflector/issues/646)) ([fa049e8](https://github.com/Monadical-SAS/reflector/commit/fa049e8d068190ce7ea015fd9fcccb8543f54a3f))
## [0.11.0](https://github.com/Monadical-SAS/reflector/compare/v0.10.0...v0.11.0) (2025-09-16)
### Features
* remove profanity filter that was there for conference ([#652](https://github.com/Monadical-SAS/reflector/issues/652)) ([b42f7cf](https://github.com/Monadical-SAS/reflector/commit/b42f7cfc606783afcee792590efcc78b507468ab))
### Bug Fixes
* zulip and consent handler on the file pipeline ([#645](https://github.com/Monadical-SAS/reflector/issues/645)) ([5f143fe](https://github.com/Monadical-SAS/reflector/commit/5f143fe3640875dcb56c26694254a93189281d17))
* zulip stream and topic selection in share dialog ([#644](https://github.com/Monadical-SAS/reflector/issues/644)) ([c546e69](https://github.com/Monadical-SAS/reflector/commit/c546e69739e68bb74fbc877eb62609928e5b8de6))
## [0.10.0](https://github.com/Monadical-SAS/reflector/compare/v0.9.0...v0.10.0) (2025-09-11)
### Features
* replace nextjs-config with environment variables ([#632](https://github.com/Monadical-SAS/reflector/issues/632)) ([369ecdf](https://github.com/Monadical-SAS/reflector/commit/369ecdff13f3862d926a9c0b87df52c9d94c4dde))
### Bug Fixes
* anonymous users transcript permissions ([#621](https://github.com/Monadical-SAS/reflector/issues/621)) ([f81fe99](https://github.com/Monadical-SAS/reflector/commit/f81fe9948a9237b3e0001b2d8ca84f54d76878f9))
* auth post ([#624](https://github.com/Monadical-SAS/reflector/issues/624)) ([cde99ca](https://github.com/Monadical-SAS/reflector/commit/cde99ca2716f84ba26798f289047732f0448742e))
* auth post ([#626](https://github.com/Monadical-SAS/reflector/issues/626)) ([3b85ff3](https://github.com/Monadical-SAS/reflector/commit/3b85ff3bdf4fb053b103070646811bc990c0e70a))
* auth post ([#627](https://github.com/Monadical-SAS/reflector/issues/627)) ([962038e](https://github.com/Monadical-SAS/reflector/commit/962038ee3f2a555dc3c03856be0e4409456e0996))
* missing follow_redirects=True on modal endpoint ([#630](https://github.com/Monadical-SAS/reflector/issues/630)) ([fc363bd](https://github.com/Monadical-SAS/reflector/commit/fc363bd49b17b075e64f9186e5e0185abc325ea7))
* sync backend and frontend token refresh logic ([#614](https://github.com/Monadical-SAS/reflector/issues/614)) ([5a5b323](https://github.com/Monadical-SAS/reflector/commit/5a5b3233820df9536da75e87ce6184a983d4713a))
## [0.9.0](https://github.com/Monadical-SAS/reflector/compare/v0.8.2...v0.9.0) (2025-09-06)
### Features
* frontend openapi react query ([#606](https://github.com/Monadical-SAS/reflector/issues/606)) ([c4d2825](https://github.com/Monadical-SAS/reflector/commit/c4d2825c81f81ad8835629fbf6ea8c7383f8c31b))
### Bug Fixes
* align whisper transcriber api with parakeet ([#602](https://github.com/Monadical-SAS/reflector/issues/602)) ([0663700](https://github.com/Monadical-SAS/reflector/commit/0663700a615a4af69a03c96c410f049e23ec9443))
* kv use tls explicit ([#610](https://github.com/Monadical-SAS/reflector/issues/610)) ([08d88ec](https://github.com/Monadical-SAS/reflector/commit/08d88ec349f38b0d13e0fa4cb73486c8dfd31836))
* source kind for file processing ([#601](https://github.com/Monadical-SAS/reflector/issues/601)) ([dc82f8b](https://github.com/Monadical-SAS/reflector/commit/dc82f8bb3bdf3ab3d4088e592a30fd63907319e1))
* token refresh locking ([#613](https://github.com/Monadical-SAS/reflector/issues/613)) ([7f5a4c9](https://github.com/Monadical-SAS/reflector/commit/7f5a4c9ddc7fd098860c8bdda2ca3b57f63ded2f))
## [0.8.2](https://github.com/Monadical-SAS/reflector/compare/v0.8.1...v0.8.2) (2025-08-29)
### Bug Fixes
* search-logspam ([#593](https://github.com/Monadical-SAS/reflector/issues/593)) ([695d1a9](https://github.com/Monadical-SAS/reflector/commit/695d1a957d4cd862753049f9beed88836cabd5ab))
## [0.8.1](https://github.com/Monadical-SAS/reflector/compare/v0.8.0...v0.8.1) (2025-08-29)
### Bug Fixes
* make webhook secret/url allowing null ([#590](https://github.com/Monadical-SAS/reflector/issues/590)) ([84a3812](https://github.com/Monadical-SAS/reflector/commit/84a381220bc606231d08d6f71d4babc818fa3c75))
## [0.8.0](https://github.com/Monadical-SAS/reflector/compare/v0.7.3...v0.8.0) (2025-08-29)
### Features
* **cleanup:** add automatic data retention for public instances ([#574](https://github.com/Monadical-SAS/reflector/issues/574)) ([6f0c7c1](https://github.com/Monadical-SAS/reflector/commit/6f0c7c1a5e751713366886c8e764c2009e12ba72))
* **rooms:** add webhook for transcript completion ([#578](https://github.com/Monadical-SAS/reflector/issues/578)) ([88ed7cf](https://github.com/Monadical-SAS/reflector/commit/88ed7cfa7804794b9b54cad4c3facc8a98cf85fd))
### Bug Fixes
* file pipeline status reporting and websocket updates ([#589](https://github.com/Monadical-SAS/reflector/issues/589)) ([9dfd769](https://github.com/Monadical-SAS/reflector/commit/9dfd76996f851cc52be54feea078adbc0816dc57))
* Igor/evaluation ([#575](https://github.com/Monadical-SAS/reflector/issues/575)) ([124ce03](https://github.com/Monadical-SAS/reflector/commit/124ce03bf86044c18313d27228a25da4bc20c9c5))
* optimize parakeet transcription batching algorithm ([#577](https://github.com/Monadical-SAS/reflector/issues/577)) ([7030e0f](https://github.com/Monadical-SAS/reflector/commit/7030e0f23649a8cf6c1eb6d5889684a41ce849ec))
## [0.7.3](https://github.com/Monadical-SAS/reflector/compare/v0.7.2...v0.7.3) (2025-08-22)
### Bug Fixes
* cleaned repo, and get git-leaks clean ([359280d](https://github.com/Monadical-SAS/reflector/commit/359280dd340433ba4402ed69034094884c825e67))
* restore previous behavior on live pipeline + audio downscaler ([#561](https://github.com/Monadical-SAS/reflector/issues/561)) ([9265d20](https://github.com/Monadical-SAS/reflector/commit/9265d201b590d23c628c5f19251b70f473859043))
## [0.7.2](https://github.com/Monadical-SAS/reflector/compare/v0.7.1...v0.7.2) (2025-08-21)
### Bug Fixes
* docker image not loading libgomp.so.1 for torch ([#560](https://github.com/Monadical-SAS/reflector/issues/560)) ([773fccd](https://github.com/Monadical-SAS/reflector/commit/773fccd93e887c3493abc2e4a4864dddce610177))
* include shared rooms to search ([#558](https://github.com/Monadical-SAS/reflector/issues/558)) ([499eced](https://github.com/Monadical-SAS/reflector/commit/499eced3360b84fb3a90e1c8a3b554290d21adc2))
## [0.7.1](https://github.com/Monadical-SAS/reflector/compare/v0.7.0...v0.7.1) (2025-08-21)
### Bug Fixes
* webvtt db null expectation mismatch ([#556](https://github.com/Monadical-SAS/reflector/issues/556)) ([e67ad1a](https://github.com/Monadical-SAS/reflector/commit/e67ad1a4a2054467bfeb1e0258fbac5868aaaf21))
## [0.7.0](https://github.com/Monadical-SAS/reflector/compare/v0.6.1...v0.7.0) (2025-08-21)
### Features
* delete recording with transcript ([#547](https://github.com/Monadical-SAS/reflector/issues/547)) ([99cc984](https://github.com/Monadical-SAS/reflector/commit/99cc9840b3f5de01e0adfbfae93234042d706d13))
* pipeline improvement with file processing, parakeet, silero-vad ([#540](https://github.com/Monadical-SAS/reflector/issues/540)) ([bcc29c9](https://github.com/Monadical-SAS/reflector/commit/bcc29c9e0050ae215f89d460e9d645aaf6a5e486))
* postgresql migration and removal of sqlite in pytest ([#546](https://github.com/Monadical-SAS/reflector/issues/546)) ([cd1990f](https://github.com/Monadical-SAS/reflector/commit/cd1990f8f0fe1503ef5069512f33777a73a93d7f))
* search backend ([#537](https://github.com/Monadical-SAS/reflector/issues/537)) ([5f9b892](https://github.com/Monadical-SAS/reflector/commit/5f9b89260c9ef7f3c921319719467df22830453f))
* search frontend ([#551](https://github.com/Monadical-SAS/reflector/issues/551)) ([3657242](https://github.com/Monadical-SAS/reflector/commit/365724271ca6e615e3425125a69ae2b46ce39285))
### Bug Fixes
* evaluation cli event wrap ([#536](https://github.com/Monadical-SAS/reflector/issues/536)) ([941c3db](https://github.com/Monadical-SAS/reflector/commit/941c3db0bdacc7b61fea412f3746cc5a7cb67836))
* use structlog not logging ([#550](https://github.com/Monadical-SAS/reflector/issues/550)) ([27e2f81](https://github.com/Monadical-SAS/reflector/commit/27e2f81fda5232e53edc729d3e99c5ef03adbfe9))
## [0.6.1](https://github.com/Monadical-SAS/reflector/compare/v0.6.0...v0.6.1) (2025-08-06)
### Bug Fixes
* delayed waveform loading ([#538](https://github.com/Monadical-SAS/reflector/issues/538)) ([ef64146](https://github.com/Monadical-SAS/reflector/commit/ef64146325d03f64dd9a1fe40234fb3e7e957ae2))
## [0.6.0](https://github.com/Monadical-SAS/reflector/compare/v0.5.0...v0.6.0) (2025-08-05)
### ⚠ BREAKING CHANGES
* Configuration keys have changed. Update your .env file:
- TRANSCRIPT_MODAL_API_KEY → TRANSCRIPT_API_KEY
- LLM_MODAL_API_KEY → (removed, use TRANSCRIPT_API_KEY)
- Add DIARIZATION_API_KEY and TRANSLATE_API_KEY if using those services
### Features
* implement service-specific Modal API keys with auto processor pattern ([#528](https://github.com/Monadical-SAS/reflector/issues/528)) ([650befb](https://github.com/Monadical-SAS/reflector/commit/650befb291c47a1f49e94a01ab37d8fdfcd2b65d))
* use llamaindex everywhere ([#525](https://github.com/Monadical-SAS/reflector/issues/525)) ([3141d17](https://github.com/Monadical-SAS/reflector/commit/3141d172bc4d3b3d533370c8e6e351ea762169bf))
### Miscellaneous Chores
* **main:** release 0.6.0 ([ecdbf00](https://github.com/Monadical-SAS/reflector/commit/ecdbf003ea2476c3e95fd231adaeb852f2943df0))
## [0.5.0](https://github.com/Monadical-SAS/reflector/compare/v0.4.0...v0.5.0) (2025-07-31)
### Features
* new summary using phi-4 and llama-index ([#519](https://github.com/Monadical-SAS/reflector/issues/519)) ([1bf9ce0](https://github.com/Monadical-SAS/reflector/commit/1bf9ce07c12f87f89e68a1dbb3b2c96c5ee62466))
### Bug Fixes
* remove unused settings and utils files ([#522](https://github.com/Monadical-SAS/reflector/issues/522)) ([2af4790](https://github.com/Monadical-SAS/reflector/commit/2af4790e4be9e588f282fbc1bb171c88a03d6479))
## [0.4.0](https://github.com/Monadical-SAS/reflector/compare/v0.3.2...v0.4.0) (2025-07-25)

View File

@@ -62,28 +62,29 @@ uv run python -m reflector.tools.process path/to/audio.wav
**Setup:**
```bash
# Install dependencies
pnpm install
yarn install
# Copy configuration templates
cp .env_template .env
cp config-template.ts config.ts
```
**Development:**
```bash
# Start development server
pnpm dev
yarn dev
# Generate TypeScript API client from OpenAPI spec
pnpm openapi
yarn openapi
# Lint code
pnpm lint
yarn lint
# Format code
pnpm format
yarn format
# Build for production
pnpm build
yarn build
```
### Docker Compose (Full Stack)
@@ -143,15 +144,13 @@ All endpoints prefixed `/v1/`:
**Backend** (`server/.env`):
- `DATABASE_URL` - Database connection string
- `REDIS_URL` - Redis broker for Celery
- `TRANSCRIPT_BACKEND=modal` + `TRANSCRIPT_MODAL_API_KEY` - Modal.com transcription
- `DIARIZATION_BACKEND=modal` + `DIARIZATION_MODAL_API_KEY` - Modal.com diarization
- `TRANSLATION_BACKEND=modal` + `TRANSLATION_MODAL_API_KEY` - Modal.com translation
- `MODAL_TOKEN_ID`, `MODAL_TOKEN_SECRET` - Modal.com GPU processing
- `WHEREBY_API_KEY` - Video platform integration
- `REFLECTOR_AUTH_BACKEND` - Authentication method (none, jwt)
**Frontend** (`www/.env`):
- `NEXTAUTH_URL`, `NEXTAUTH_SECRET` - Authentication configuration
- `REFLECTOR_API_URL` - Backend API endpoint
- `NEXT_PUBLIC_REFLECTOR_API_URL` - Backend API endpoint
- `REFLECTOR_DOMAIN_CONFIG` - Feature flags and domain settings
## Testing Strategy
@@ -173,7 +172,3 @@ Modal.com integration for scalable ML processing:
- **Audio Routing**: Use BlackHole (Mac) for merging multiple audio sources
- **WebRTC**: Ensure proper CORS configuration for cross-origin streaming
- **Database**: Run `uv run alembic upgrade head` after pulling schema changes
## Pipeline/worker related info
If you need to do any worker/pipeline related work, search for "Pipeline" classes and their "create" or "build" methods to find the main processor sequence. Look for task orchestration patterns (like "chord", "group", or "chain") to identify the post-processing flow with parallel execution chains. This will give you abstract vision on how processing pipeling is organized.

View File

@@ -1,345 +0,0 @@
# Multi-Provider Video Platform Implementation - Coder Briefing
## Your Mission
Implement multi-provider video platform support in Reflector, allowing the system to work with both Whereby and Daily.co video conferencing providers. The goal is to abstract the current Whereby-only implementation and add Daily.co as a second provider, with the ability to switch between them via environment variables.
**Branch:** `igor/dailico-2` (you're already on it)
**Estimated Time:** 12-16 hours (senior engineer)
**Complexity:** Medium-High (requires careful integration with existing codebase)
---
## What You Have
### 1. **PLAN.md** - Your Technical Specification (2,452 lines)
- Complete step-by-step implementation guide
- All code examples you need
- Architecture diagrams and design rationale
- Testing strategy and success metrics
- **Read this first** to understand the overall approach
### 2. **IMPLEMENTATION_GUIDE.md** - Your Practical Guide
- What to copy vs. adapt vs. rewrite
- Common pitfalls and how to avoid them
- Verification checklists for each phase
- Decision trees for implementation choices
- **Use this as your day-to-day reference**
### 3. **Reference Implementation** - `./reflector-dailyco-reference/`
- Working implementation from 2.5 months ago
- Good architecture and patterns
- **BUT:** 91 commits behind current main, DO NOT merge directly
- Use for inspiration and code patterns only
---
## Critical Context: Why Not Just Merge?
The reference branch (`origin/igor/feat-dailyco`) was started on August 1, 2025 and is now severely diverged from main:
- **91 commits behind main**
- Main has 12x more changes (45,840 insertions vs 3,689)
- Main added: calendar integration, webhooks, full-text search, React Query migration, security fixes
- Reference removed: features that main still has and needs
**Merging would be a disaster.** We're implementing fresh on current main, using the reference for validated patterns.
---
## High-Level Approach
### Phase 1: Analysis (2 hours)
- Study current Whereby integration
- Define abstraction requirements
- Create standard data models
### Phase 2: Abstraction Layer (4-5 hours)
- Build platform abstraction (base class, registry, factory)
- Extract Whereby into the abstraction
- Update database schema (add `platform` field)
- Integrate into rooms.py **without breaking calendar/webhooks**
### Phase 3: Daily.co Implementation (4-5 hours)
- Implement Daily.co client
- Add webhook handler
- Create frontend components (rewrite API calls for React Query)
- Add recording processing
### Phase 4: Testing (2-3 hours)
- Unit tests for platform abstraction
- Integration tests for webhooks
- Manual testing with both providers
---
## Key Files You'll Touch
### Backend (New)
```
server/reflector/video_platforms/
├── __init__.py
├── base.py ← Abstract base class
├── models.py ← Platform, MeetingData, VideoPlatformConfig
├── registry.py ← Platform registration system
├── factory.py ← Client creation and config
├── whereby.py ← Whereby client wrapper
├── daily.py ← Daily.co client
└── mock.py ← Mock client for testing
server/reflector/views/daily.py ← Daily.co webhooks
server/tests/test_video_platforms.py ← Platform tests
server/tests/test_daily_webhook.py ← Webhook tests
```
### Backend (Modified - Careful!)
```
server/reflector/settings.py ← Add Daily.co settings
server/reflector/db/rooms.py ← Add platform field, PRESERVE calendar fields
server/reflector/db/meetings.py ← Add platform field
server/reflector/views/rooms.py ← Integrate abstraction, PRESERVE calendar/webhooks
server/reflector/worker/process.py ← Add process_recording_from_url task
server/reflector/app.py ← Register daily router
server/env.example ← Document new env vars
```
### Frontend (New)
```
www/app/[roomName]/components/
├── RoomContainer.tsx ← Platform router
├── DailyRoom.tsx ← Daily.co component (rewrite API calls!)
└── WherebyRoom.tsx ← Extract existing logic
```
### Frontend (Modified)
```
www/app/[roomName]/page.tsx ← Use RoomContainer
www/package.json ← Add @daily-co/daily-js
```
### Database
```
server/migrations/versions/XXXXXX_add_platform_support.py ← Generate fresh migration
```
---
## Critical Warnings ⚠️
### 1. **DO NOT Copy Database Migrations**
The reference migration has the wrong `down_revision` and is based on old schema.
```bash
# Instead:
cd server
uv run alembic revision -m "add_platform_support"
# Then edit the generated file
```
### 2. **DO NOT Remove Main's Features**
Main has calendar integration, webhooks, ICS sync that reference doesn't have.
When modifying `rooms.py`, only change meeting creation logic, preserve everything else.
### 3. **DO NOT Copy Frontend API Calls**
Reference uses old OpenAPI client. Main uses React Query.
Check how main currently makes API calls and replicate that pattern.
### 4. **DO NOT Copy package.json/migrations**
These files are severely outdated in reference.
### 5. **Preserve Type Safety**
Use `TYPE_CHECKING` imports to avoid circular dependencies:
```python
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from reflector.db.rooms import Room
```
---
## How to Start
### Day 1 Morning: Setup & Understanding (2-3 hours)
```bash
# 1. Verify you're on the right branch
git branch
# Should show: igor/dailico-2
# 2. Read the docs (in order)
# - PLAN.md (skim to understand scope, read Phase 1 carefully)
# - IMPLEMENTATION_GUIDE.md (read fully, bookmark it)
# 3. Study current Whereby integration
cat server/reflector/views/rooms.py | grep -A 20 "whereby"
cat www/app/[roomName]/page.tsx
# 4. Check reference implementation structure
ls -la reflector-dailyco-reference/server/reflector/video_platforms/
```
### Day 1 Afternoon: Phase 1 Execution (2-3 hours)
```bash
# 5. Copy video_platforms directory from reference
cp -r reflector-dailyco-reference/server/reflector/video_platforms/ \
server/reflector/
# 6. Review and fix imports
cd server
uv run ruff check reflector/video_platforms/
# 7. Add settings to settings.py (see PLAN.md Phase 2.7)
# 8. Test imports work
uv run python -c "from reflector.video_platforms import create_platform_client; print('OK')"
```
### Day 2: Phase 2 - Database & Integration (4-5 hours)
```bash
# 9. Generate migration
uv run alembic revision -m "add_platform_support"
# Edit the file following PLAN.md Phase 2.8
# 10. Update Room/Meeting models
# Add platform field, PRESERVE all existing fields
# 11. Integrate into rooms.py
# Carefully modify meeting creation, preserve calendar/webhooks
# 12. Add Daily.co webhook handler
cp reflector-dailyco-reference/server/reflector/views/daily.py \
server/reflector/views/
# Register in app.py
```
### Day 3: Phase 3 - Frontend & Testing (4-5 hours)
```bash
# 13. Create frontend components
mkdir -p www/app/[roomName]/components
# 14. Add Daily.co dependency
cd www
pnpm add @daily-co/daily-js@^0.81.0
# 15. Create RoomContainer, DailyRoom, WherebyRoom
# IMPORTANT: Rewrite API calls using React Query patterns
# 16. Regenerate types
pnpm openapi
# 17. Copy and adapt tests
cp reflector-dailyco-reference/server/tests/test_*.py server/tests/
# 18. Run tests
cd server
REDIS_HOST=localhost \
CELERY_BROKER_URL=redis://localhost:6379/1 \
uv run pytest tests/test_video_platforms.py -v
```
---
## Verification Checklist
After implementation, all of these must pass:
**Backend:**
- [ ] `cd server && uv run ruff check .` passes
- [ ] `uv run alembic upgrade head` works cleanly
- [ ] `uv run pytest tests/test_video_platforms.py` passes
- [ ] Can import: `from reflector.video_platforms import create_platform_client`
- [ ] Settings has all Daily.co variables
**Frontend:**
- [ ] `cd www && pnpm lint` passes
- [ ] No TypeScript errors
- [ ] `pnpm openapi` generates platform field
- [ ] No `@ts-ignore` for platform field
**Integration:**
- [ ] Whereby meetings still work (existing flow unchanged)
- [ ] Calendar/webhook features still work in rooms.py
- [ ] env.example documents all new variables
---
## When You're Stuck
### Check These Resources:
1. **PLAN.md** - Detailed code examples for your exact scenario
2. **IMPLEMENTATION_GUIDE.md** - Common pitfalls section
3. **Reference code** - See how it was solved before
4. **Git diff** - Compare reference to your implementation
### Compare Files:
```bash
# See what reference did
diff reflector-dailyco-reference/server/reflector/views/rooms.py \
server/reflector/views/rooms.py
# See what changed in main since reference branch
git log --oneline --since="2025-08-01" -- server/reflector/views/rooms.py
```
### Common Issues:
- **Circular imports:** Use `TYPE_CHECKING` pattern
- **Tests fail with postgres error:** Use `REDIS_HOST=localhost` env vars
- **Frontend API calls broken:** Check current React Query patterns in main
- **Migrations fail:** Ensure you generated fresh, not copied
---
## Success Looks Like
When you're done:
- ✅ All tests pass
- ✅ Linting passes
- ✅ Can create Whereby meetings (unchanged behavior)
- ✅ Can create Daily.co meetings (with env vars)
- ✅ Calendar/webhooks still work
- ✅ Frontend has no TypeScript errors
- ✅ Platform selection via environment variables works
---
## Communication
If you need clarification on requirements, have questions about architecture decisions, or find issues with the spec, document them clearly with:
- What you expected
- What you found
- Your proposed solution
The PLAN.md document is comprehensive but you may find edge cases. Use your engineering judgment and document decisions.
---
## Final Notes
**This is not a simple copy-paste job.** You're doing careful integration work where you need to:
- Understand the abstraction pattern (PLAN.md)
- Preserve all of main's features
- Adapt reference code to current patterns
- Think about edge cases and testing
Take your time with Phase 2 (rooms.py integration) - that's where most bugs will come from if you accidentally break calendar/webhook features.
**Good luck! You've got comprehensive specs, working reference code, and a clean starting point. You can do this.**
---
## Quick Reference
```bash
# Your workspace
├── PLAN.md ← Complete technical spec (read first)
├── IMPLEMENTATION_GUIDE.md ← Practical guide (bookmark this)
├── CODER_BRIEFING.md ← This file
└── reflector-dailyco-reference/ ← Reference implementation (inspiration only)
# Key commands
cd server && uv run ruff check . # Lint backend
cd www && pnpm lint # Lint frontend
cd server && uv run alembic revision -m "..." # Create migration
cd www && pnpm openapi # Regenerate types
cd server && uv run pytest -v # Run tests
```

View File

@@ -1,489 +0,0 @@
# Daily.co Implementation Guide
## Overview
Implement multi-provider video platform support (Whereby + Daily.co) following PLAN.md.
## Reference Code Location
- **Reference branch:** `origin/igor/feat-dailyco` (on remote)
- **Worktree location:** `./reflector-dailyco-reference/`
- **Status:** Reference only - DO NOT merge or copy directly
## What Exists in Reference Branch (For Inspiration)
### ✅ Can Use As Reference (Well-Implemented)
```
server/reflector/video_platforms/
├── base.py ← Platform abstraction (good design, copy-safe)
├── models.py ← Data models (copy-safe)
├── registry.py ← Registry pattern (copy-safe)
├── factory.py ← Factory pattern (needs settings updates)
├── whereby.py ← Whereby client (needs adaptation)
├── daily.py ← Daily.co client (needs adaptation)
└── mock.py ← Mock client (copy-safe for tests)
server/reflector/views/daily.py ← Webhook handler (needs adaptation)
server/tests/test_video_platforms.py ← Tests (good reference)
server/tests/test_daily_webhook.py ← Tests (good reference)
www/app/[roomName]/components/
├── RoomContainer.tsx ← Platform router (needs React Query)
├── DailyRoom.tsx ← Daily component (needs React Query)
└── WherebyRoom.tsx ← Whereby extraction (needs React Query)
```
### ⚠️ Needs Significant Changes (Use Logic Only)
- `server/reflector/db/rooms.py` - Reference removed calendar/webhook fields that main has
- `server/reflector/db/meetings.py` - Same issue (missing user_id handling differences)
- `server/reflector/views/rooms.py` - Main has calendar integration, webhooks, ICS sync
- `server/reflector/worker/process.py` - Main has different recording flow
- Migration files - Must regenerate against current main schema
### ❌ Do NOT Use (Outdated/Incompatible)
- `package.json`/`pnpm-lock.yaml` - Main uses different dependency versions
- Frontend API client calls - Main uses React Query (reference uses old OpenAPI client)
- Database migrations - Must create new ones from scratch
- Any files that delete features present in main (search, calendar, webhooks)
## Key Differences: Reference vs Current Main
| Aspect | Reference Branch | Current Main | Action Required |
|--------|------------------|--------------|-----------------|
| **API client** | Old OpenAPI generated | React Query hooks | Rewrite all API calls |
| **Database schema** | Simplified (removed features) | Has calendar, webhooks, full-text search | Merge carefully, preserve main features |
| **Settings** | Aug 2025 structure | Current structure | Adapt carefully |
| **Migrations** | Branched from Aug 1 | Current main (91+ commits ahead) | Regenerate from scratch |
| **Frontend deps** | `@daily-co/daily-js@0.81.0` | Check current versions | Update to compatible versions |
| **Package manager** | yarn | pnpm (maybe both?) | Use what main uses |
## Branch Divergence Analysis
**The reference branch is 91 commits behind main and severely diverged:**
- Reference: 8 commits, 3,689 insertions, 425 deletions
- Main since divergence: 320 files changed, 45,840 insertions, 16,827 deletions
- **Main has 12x more changes**
**Major features in main that reference lacks:**
1. Calendar integration (ICS sync with rooms)
2. Self-hosted GPU API infrastructure
3. Frontend OpenAPI React Query migration
4. Full-text search (backend + frontend)
5. Webhook system for room events
6. Environment variable migration
7. Security fixes and auth improvements
8. Docker production frontend
9. Meeting user ID removal (schema change)
10. NextJS version upgrades
**High conflict risk files:**
- `server/reflector/views/rooms.py` - 12x more changes in main
- `server/reflector/db/rooms.py` - Main added 7+ fields
- `www/package.json` - NextJS major version bump
- Database migrations - 20+ new migrations in main
## Implementation Approach
### Phase 1: Copy Clean Abstractions (1-2 hours)
**Files to copy directly from reference:**
```bash
# Core abstraction (review but mostly safe to copy)
cp -r reflector-dailyco-reference/server/reflector/video_platforms/ \
server/reflector/
# BUT review each file for:
# - Import paths (make sure they match current main)
# - Settings references (adapt to current settings.py)
# - Type imports (ensure no circular dependencies)
```
**After copying, immediately:**
```bash
cd server
# Check for issues
uv run ruff check reflector/video_platforms/
# Fix any import errors or type issues
```
### Phase 2: Adapt to Current Main (2-3 hours)
**2.1 Settings Integration**
File: `server/reflector/settings.py`
Add at the appropriate location (near existing Whereby settings):
```python
# Daily.co API Integration (NEW)
DAILY_API_KEY: str | None = None
DAILY_WEBHOOK_SECRET: str | None = None
DAILY_SUBDOMAIN: str | None = None
AWS_DAILY_S3_BUCKET: str | None = None
AWS_DAILY_S3_REGION: str = "us-west-2"
AWS_DAILY_ROLE_ARN: str | None = None
# Platform Migration Feature Flags (NEW)
DAILY_MIGRATION_ENABLED: bool = False # Conservative default
DAILY_MIGRATION_ROOM_IDS: list[str] = []
DEFAULT_VIDEO_PLATFORM: Literal["whereby", "daily"] = "whereby"
```
**2.2 Database Migration**
⚠️ **CRITICAL: Do NOT copy migration from reference**
Generate new migration:
```bash
cd server
uv run alembic revision -m "add_platform_support"
```
Edit the generated migration file to add `platform` column:
```python
def upgrade():
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.add_column(
sa.Column("platform", sa.String(), nullable=False, server_default="whereby")
)
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.add_column(
sa.Column("platform", sa.String(), nullable=False, server_default="whereby")
)
```
**2.3 Update Database Models**
File: `server/reflector/db/rooms.py`
Add platform field (preserve all existing fields from main):
```python
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from reflector.video_platforms.models import Platform
class Room:
# ... ALL existing fields from main (calendar, webhooks, etc.) ...
# NEW: Platform field
platform: "Platform" = sqlalchemy.Column(
sqlalchemy.String,
nullable=False,
server_default="whereby",
)
```
File: `server/reflector/db/meetings.py`
Same approach - add platform field, preserve everything from main.
**2.4 Integrate Platform Abstraction into rooms.py**
⚠️ **This is the most delicate part - main has calendar/webhook features**
File: `server/reflector/views/rooms.py`
Strategy:
1. Add imports at top
2. Modify meeting creation logic only
3. Preserve all calendar/webhook/ICS logic from main
```python
# Add imports
from reflector.video_platforms import (
create_platform_client,
get_platform_for_room,
)
# In create_meeting endpoint:
# OLD: Direct Whereby API calls
# NEW: Platform abstraction
# Find the meeting creation section and replace:
platform = get_platform_for_room(room.id)
client = create_platform_client(platform)
meeting_data = await client.create_meeting(
room_name_prefix=room.name,
end_date=meeting_data.end_date,
room=room,
)
# Then create Meeting record with meeting_data.platform, meeting_data.meeting_id, etc.
```
**2.5 Add Daily.co Webhook Handler**
Copy from reference, minimal changes needed:
```bash
cp reflector-dailyco-reference/server/reflector/views/daily.py \
server/reflector/views/
```
Register in `server/reflector/app.py`:
```python
from reflector.views import daily
app.include_router(daily.router, prefix="/v1/daily", tags=["daily"])
```
**2.6 Add Recording Processing Task**
File: `server/reflector/worker/process.py`
Add the `process_recording_from_url` task from reference (copy the function).
### Phase 3: Frontend Adaptation (3-4 hours)
**3.1 Determine Current API Client Pattern**
First, check how main currently makes API calls:
```bash
cd www
grep -r "api\." app/ | head -20
# Look for patterns like: api.v1Something()
```
**3.2 Create Components**
Copy component structure from reference but **rewrite all API calls**:
```bash
mkdir -p www/app/[roomName]/components
```
Files to create:
- `RoomContainer.tsx` - Platform router (mostly copy-safe, just fix imports)
- `DailyRoom.tsx` - Needs React Query API calls
- `WherebyRoom.tsx` - Extract current room page logic
**Example React Query pattern** (adapt to your actual API):
```typescript
import { api } from '@/app/api/client'
// In DailyRoom.tsx
const handleConsent = async () => {
try {
await api.v1MeetingAudioConsent({
path: { meeting_id: meeting.id },
body: { consent: true },
})
// ...
} catch (error) {
// ...
}
}
```
**3.3 Add Daily.co Dependency**
Check current package manager:
```bash
cd www
ls package-lock.json yarn.lock pnpm-lock.yaml
```
Then install:
```bash
# If using pnpm
pnpm add @daily-co/daily-js@^0.81.0
# If using yarn
yarn add @daily-co/daily-js@^0.81.0
```
**3.4 Update TypeScript Types**
After backend changes, regenerate types:
```bash
cd www
pnpm openapi # or yarn openapi
```
This should pick up the new `platform` field on Meeting type.
### Phase 4: Testing (2-3 hours)
**4.1 Copy Test Structure**
```bash
cp reflector-dailyco-reference/server/tests/test_video_platforms.py \
server/tests/
cp reflector-dailyco-reference/server/tests/test_daily_webhook.py \
server/tests/
```
**4.2 Fix Test Imports and Fixtures**
Update imports to match current test infrastructure:
- Check `server/tests/conftest.py` for fixture patterns
- Update database access patterns if changed
- Fix any import errors
**4.3 Run Tests**
```bash
cd server
# Run with environment variables for Mac
REDIS_HOST=localhost \
CELERY_BROKER_URL=redis://localhost:6379/1 \
CELERY_RESULT_BACKEND=redis://localhost:6379/1 \
uv run pytest tests/test_video_platforms.py -v
```
### Phase 5: Environment Configuration
**Update `server/env.example`:**
Add at the end:
```bash
# Daily.co API Integration
DAILY_API_KEY=your-daily-api-key
DAILY_WEBHOOK_SECRET=your-daily-webhook-secret
DAILY_SUBDOMAIN=your-subdomain
AWS_DAILY_S3_BUCKET=your-daily-bucket
AWS_DAILY_S3_REGION=us-west-2
AWS_DAILY_ROLE_ARN=arn:aws:iam::ACCOUNT:role/DailyRecording
# Platform Selection
DAILY_MIGRATION_ENABLED=false # Master switch
DAILY_MIGRATION_ROOM_IDS=[] # Specific room IDs
DEFAULT_VIDEO_PLATFORM=whereby # Default platform
```
## Decision Tree: Copy vs Adapt vs Rewrite
```
┌─ Is it pure abstraction logic? (base.py, registry.py, models.py)
│ YES → Copy directly, review imports
│ NO → Continue ↓
├─ Does it touch database models?
│ YES → Adapt carefully, preserve main's fields
│ NO → Continue ↓
├─ Does it make API calls on frontend?
│ YES → Rewrite using React Query
│ NO → Continue ↓
├─ Is it a database migration?
│ YES → Generate fresh from current schema
│ NO → Continue ↓
└─ Does it touch rooms.py or core business logic?
YES → Merge carefully, preserve calendar/webhooks
NO → Safe to adapt from reference
```
## Verification Checklist
After each phase, verify:
**Phase 1 (Abstraction Layer):**
- [ ] `uv run ruff check server/reflector/video_platforms/` passes
- [ ] No circular import errors
- [ ] Can import `from reflector.video_platforms import create_platform_client`
**Phase 2 (Backend Integration):**
- [ ] `uv run ruff check server/` passes
- [ ] Migration file generated (not copied)
- [ ] Room and Meeting models have platform field
- [ ] rooms.py still has calendar/webhook features
**Phase 3 (Frontend):**
- [ ] `pnpm lint` passes
- [ ] No TypeScript errors
- [ ] No `@ts-ignore` for platform field
- [ ] API calls use React Query patterns
**Phase 4 (Testing):**
- [ ] Tests can be collected: `pytest tests/test_video_platforms.py --collect-only`
- [ ] Database fixtures work
- [ ] Mock platform works
**Phase 5 (Config):**
- [ ] env.example has Daily.co variables
- [ ] settings.py has all new variables
- [ ] No duplicate variable definitions
## Common Pitfalls
### 1. Database Schema Conflicts
**Problem:** Reference removed fields that main has (calendar, webhooks)
**Solution:** Always preserve main's fields, only add platform field
### 2. Migration Conflicts
**Problem:** Reference migration has wrong `down_revision`
**Solution:** Always generate fresh migration from current main
### 3. Frontend API Calls
**Problem:** Reference uses old API client patterns
**Solution:** Check current main's API usage, replicate that pattern
### 4. Import Errors
**Problem:** Circular imports with TYPE_CHECKING
**Solution:** Use `if TYPE_CHECKING:` for Room/Meeting imports in video_platforms
### 5. Test Database Issues
**Problem:** Tests fail with "could not translate host name 'postgres'"
**Solution:** Use environment variables: `REDIS_HOST=localhost DATABASE_URL=...`
### 6. Preserved Features Broken
**Problem:** Calendar/webhook features stop working
**Solution:** Carefully review rooms.py diff, only change meeting creation, not calendar logic
## File Modification Summary
**New files (can copy):**
- `server/reflector/video_platforms/*.py` (entire directory)
- `server/reflector/views/daily.py`
- `server/tests/test_video_platforms.py`
- `server/tests/test_daily_webhook.py`
- `www/app/[roomName]/components/RoomContainer.tsx`
- `www/app/[roomName]/components/DailyRoom.tsx`
- `www/app/[roomName]/components/WherebyRoom.tsx`
**Modified files (careful merging):**
- `server/reflector/settings.py` - Add Daily.co settings
- `server/reflector/db/rooms.py` - Add platform field
- `server/reflector/db/meetings.py` - Add platform field
- `server/reflector/views/rooms.py` - Integrate platform abstraction
- `server/reflector/worker/process.py` - Add process_recording_from_url
- `server/reflector/app.py` - Register daily router
- `server/env.example` - Add Daily.co variables
- `www/app/[roomName]/page.tsx` - Use RoomContainer
- `www/package.json` - Add @daily-co/daily-js
**Generated files (do not copy):**
- `server/migrations/versions/XXXXXX_add_platform_support.py` - Generate fresh
## Success Metrics
Implementation is complete when:
- [ ] All tests pass (including new platform tests)
- [ ] Linting passes (ruff, pnpm lint)
- [ ] Migration applies cleanly: `uv run alembic upgrade head`
- [ ] Can create Whereby meeting (existing flow unchanged)
- [ ] Can create Daily.co meeting (with env vars set)
- [ ] Frontend loads without TypeScript errors
- [ ] No features from main were accidentally removed
## Getting Help
**Reference documentation locations:**
- Implementation plan: `PLAN.md`
- Reference implementation: `./reflector-dailyco-reference/`
- Current main codebase: `./ ` (current directory)
**Compare implementations:**
```bash
# Compare specific files
diff reflector-dailyco-reference/server/reflector/video_platforms/base.py \
server/reflector/video_platforms/base.py
# See what changed in rooms.py between reference branch point and now
git log --oneline --since="2025-08-01" -- server/reflector/views/rooms.py
```
**Key insight:** The reference branch validates the approach and provides working code patterns, but you're implementing fresh against current main to avoid merge conflicts and preserve all new features.

2517
PLAN.md

File diff suppressed because it is too large Load Diff

View File

@@ -1,60 +1,43 @@
<div align="center">
<img width="100" alt="image" src="https://github.com/user-attachments/assets/66fb367b-2c89-4516-9912-f47ac59c6a7f"/>
# Reflector
Reflector is an AI-powered audio transcription and meeting analysis platform that provides real-time transcription, speaker diarization, translation and summarization for audio content and live meetings. It works 100% with local models (whisper/parakeet, pyannote, seamless-m4t, and your local llm like phi-4).
Reflector Audio Management and Analysis is a cutting-edge web application under development by Monadical. It utilizes AI to record meetings, providing a permanent record with transcripts, translations, and automated summaries.
[![Tests](https://github.com/monadical-sas/reflector/actions/workflows/test_server.yml/badge.svg?branch=main&event=push)](https://github.com/monadical-sas/reflector/actions/workflows/test_server.yml)
[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://opensource.org/licenses/MIT)
</div>
[![Tests](https://github.com/monadical-sas/cubbi/actions/workflows/pytests.yml/badge.svg?branch=main&event=push)](https://github.com/monadical-sas/cubbi/actions/workflows/pytests.yml)
[![License: MIT](https://img.shields.io/badge/license-AGPL--v3-green.svg)](https://opensource.org/licenses/AGPL-v3)
</div>
## Screenshots
<table>
<tr>
<td>
<a href="https://github.com/user-attachments/assets/21f5597c-2930-4899-a154-f7bd61a59e97">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/21f5597c-2930-4899-a154-f7bd61a59e97" />
<a href="https://github.com/user-attachments/assets/3a976930-56c1-47ef-8c76-55d3864309e3">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/3a976930-56c1-47ef-8c76-55d3864309e3" />
</a>
</td>
<td>
<a href="https://github.com/user-attachments/assets/f6b9399a-5e51-4bae-b807-59128d0a940c">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/f6b9399a-5e51-4bae-b807-59128d0a940c" />
<a href="https://github.com/user-attachments/assets/bfe3bde3-08af-4426-a9a1-11ad5cd63b33">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/bfe3bde3-08af-4426-a9a1-11ad5cd63b33" />
</a>
</td>
<td>
<a href="https://github.com/user-attachments/assets/a42ce460-c1fd-4489-a995-270516193897">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/a42ce460-c1fd-4489-a995-270516193897" />
</a>
</td>
<td>
<a href="https://github.com/user-attachments/assets/21929f6d-c309-42fe-9c11-f1299e50fbd4">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/21929f6d-c309-42fe-9c11-f1299e50fbd4" />
<a href="https://github.com/user-attachments/assets/7b60c9d0-efe4-474f-a27b-ea13bd0fabdc">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/7b60c9d0-efe4-474f-a27b-ea13bd0fabdc" />
</a>
</td>
</tr>
</table>
## What is Reflector?
Reflector is a web application that utilizes local models to process audio content, providing:
- **Real-time Transcription**: Convert speech to text using [Whisper](https://github.com/openai/whisper) (multi-language) or [Parakeet](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) (English) models
- **Speaker Diarization**: Identify and label different speakers using [Pyannote](https://github.com/pyannote/pyannote-audio) 3.1
- **Live Translation**: Translate audio content in real-time to many languages with [Facebook Seamless-M4T](https://github.com/facebookresearch/seamless_communication)
- **Topic Detection & Summarization**: Extract key topics and generate concise summaries using LLMs
- **Meeting Recording**: Create permanent records of meetings with searchable transcripts
Currently we provide [modal.com](https://modal.com/) gpu template to deploy.
## Background
The project architecture consists of three primary components:
- **Back-End**: Python server that offers an API and data persistence, found in `server/`.
- **Front-End**: NextJS React project hosted on Vercel, located in `www/`.
- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations.
- **Back-End**: Python server that offers an API and data persistence, found in `server/`.
- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations. Most reliable option is Modal deployment
It also uses authentik for authentication if activated.
It also uses authentik for authentication if activated, and Vercel for deployment and configuration of the front-end.
## Contribution Guidelines
@@ -89,25 +72,24 @@ Note: We currently do not have instructions for Windows users.
## Installation
*Note: we're working toward better installation, theses instructions are not accurate for now*
### Frontend
Start with `cd www`.
Start with `cd backend`.
**Installation**
```bash
pnpm install
cp .env.example .env
yarn install
cp .env_template .env
cp config-template.ts config.ts
```
Then, fill in the environment variables in `.env` as needed. If you are unsure on how to proceed, ask in Zulip.
Then, fill in the environment variables in `.env` and the configuration in `config.ts` as needed. If you are unsure on how to proceed, ask in Zulip.
**Run in development mode**
```bash
pnpm dev
yarn dev
```
Then (after completing server setup and starting it) open [http://localhost:3000](http://localhost:3000) to view it in the browser.
@@ -117,7 +99,7 @@ Then (after completing server setup and starting it) open [http://localhost:3000
To generate the TypeScript files from the openapi.json file, make sure the python server is running, then run:
```bash
pnpm openapi
yarn openapi
```
### Backend
@@ -167,41 +149,3 @@ You can manually process an audio file by calling the process tool:
```bash
uv run python -m reflector.tools.process path/to/audio.wav
```
## Build-time env variables
Next.js projects are more used to NEXT_PUBLIC_ prefixed buildtime vars. We don't have those for the reason we need to serve a ccustomizable prebuild docker container.
Instead, all the variables are runtime. Variables needed to the frontend are served to the frontend app at initial render.
It also means there's no static prebuild and no static files to serve for js/html.
## Feature Flags
Reflector uses environment variable-based feature flags to control application functionality. These flags allow you to enable or disable features without code changes.
### Available Feature Flags
| Feature Flag | Environment Variable |
|-------------|---------------------|
| `requireLogin` | `FEATURE_REQUIRE_LOGIN` |
| `privacy` | `FEATURE_PRIVACY` |
| `browse` | `FEATURE_BROWSE` |
| `sendToZulip` | `FEATURE_SEND_TO_ZULIP` |
| `rooms` | `FEATURE_ROOMS` |
### Setting Feature Flags
Feature flags are controlled via environment variables using the pattern `FEATURE_{FEATURE_NAME}` where `{FEATURE_NAME}` is the SCREAMING_SNAKE_CASE version of the feature name.
**Examples:**
```bash
# Enable user authentication requirement
FEATURE_REQUIRE_LOGIN=true
# Disable browse functionality
FEATURE_BROWSE=false
# Enable Zulip integration
FEATURE_SEND_TO_ZULIP=true
```

View File

@@ -6,7 +6,6 @@ services:
- 1250:1250
volumes:
- ./server/:/app/
- /app/.venv
env_file:
- ./server/.env
environment:
@@ -17,7 +16,6 @@ services:
context: server
volumes:
- ./server/:/app/
- /app/.venv
env_file:
- ./server/.env
environment:
@@ -28,7 +26,6 @@ services:
context: server
volumes:
- ./server/:/app/
- /app/.venv
env_file:
- ./server/.env
environment:
@@ -39,19 +36,16 @@ services:
ports:
- 6379:6379
web:
image: node:22-alpine
image: node:18
ports:
- "3000:3000"
command: sh -c "corepack enable && pnpm install && pnpm dev"
command: sh -c "yarn install && yarn dev"
restart: unless-stopped
working_dir: /app
volumes:
- ./www:/app/
- /app/node_modules
env_file:
- ./www/.env.local
environment:
- NODE_ENV=development
postgres:
image: postgres:17

View File

@@ -1,39 +0,0 @@
# Production Docker Compose configuration for Frontend
# Usage: docker compose -f docker-compose.prod.yml up -d
services:
web:
build:
context: ./www
dockerfile: Dockerfile
image: reflector-frontend:latest
environment:
- KV_URL=${KV_URL:-redis://redis:6379}
- SITE_URL=${SITE_URL}
- API_URL=${API_URL}
- WEBSOCKET_URL=${WEBSOCKET_URL}
- NEXTAUTH_URL=${NEXTAUTH_URL:-http://localhost:3000}
- NEXTAUTH_SECRET=${NEXTAUTH_SECRET:-changeme-in-production}
- AUTHENTIK_ISSUER=${AUTHENTIK_ISSUER}
- AUTHENTIK_CLIENT_ID=${AUTHENTIK_CLIENT_ID}
- AUTHENTIK_CLIENT_SECRET=${AUTHENTIK_CLIENT_SECRET}
- AUTHENTIK_REFRESH_TOKEN_URL=${AUTHENTIK_REFRESH_TOKEN_URL}
- SENTRY_DSN=${SENTRY_DSN}
- SENTRY_IGNORE_API_RESOLUTION_ERROR=${SENTRY_IGNORE_API_RESOLUTION_ERROR:-1}
depends_on:
- redis
restart: unless-stopped
redis:
image: redis:7.2-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 3s
retries: 3
volumes:
- redis_data:/data
volumes:
redis_data:

View File

@@ -1,33 +0,0 @@
# OS / Editor
.DS_Store
.vscode/
.idea/
# Python
__pycache__/
*.py[cod]
*$py.class
# Logs
*.log
# Env and secrets
.env
.env.*
*.env
*.secret
# Build / dist
build/
dist/
.eggs/
*.egg-info/
# Coverage / test
.pytest_cache/
.coverage*
htmlcov/
# Modal local state (if any)
modal_mounts/
.modal_cache/

View File

@@ -1,171 +0,0 @@
# Reflector GPU implementation - Transcription and LLM
This repository hold an API for the GPU implementation of the Reflector API service,
and use [Modal.com](https://modal.com)
- `reflector_diarizer.py` - Diarization API
- `reflector_transcriber.py` - Transcription API (Whisper)
- `reflector_transcriber_parakeet.py` - Transcription API (NVIDIA Parakeet)
- `reflector_translator.py` - Translation API
## Modal.com deployment
Create a modal secret, and name it `reflector-gpu`.
It should contain an `REFLECTOR_APIKEY` environment variable with a value.
The deployment is done using [Modal.com](https://modal.com) service.
```
$ modal deploy reflector_transcriber.py
...
└── 🔨 Created web => https://xxxx--reflector-transcriber-web.modal.run
$ modal deploy reflector_transcriber_parakeet.py
...
└── 🔨 Created web => https://xxxx--reflector-transcriber-parakeet-web.modal.run
$ modal deploy reflector_llm.py
...
└── 🔨 Created web => https://xxxx--reflector-llm-web.modal.run
```
Then in your reflector api configuration `.env`, you can set these keys:
```
TRANSCRIPT_BACKEND=modal
TRANSCRIPT_URL=https://xxxx--reflector-transcriber-web.modal.run
TRANSCRIPT_MODAL_API_KEY=REFLECTOR_APIKEY
DIARIZATION_BACKEND=modal
DIARIZATION_URL=https://xxxx--reflector-diarizer-web.modal.run
DIARIZATION_MODAL_API_KEY=REFLECTOR_APIKEY
TRANSLATION_BACKEND=modal
TRANSLATION_URL=https://xxxx--reflector-translator-web.modal.run
TRANSLATION_MODAL_API_KEY=REFLECTOR_APIKEY
```
## API
Authentication must be passed with the `Authorization` header, using the `bearer` scheme.
```
Authorization: bearer <REFLECTOR_APIKEY>
```
### LLM
`POST /llm`
**request**
```
{
"prompt": "xxx"
}
```
**response**
```
{
"text": "xxx completed"
}
```
### Transcription
#### Parakeet Transcriber (`reflector_transcriber_parakeet.py`)
NVIDIA Parakeet is a state-of-the-art ASR model optimized for real-time transcription with superior word-level timestamps.
**GPU Configuration:**
- **A10G GPU** - Used for `/v1/audio/transcriptions` endpoint (small files, live transcription)
- Higher concurrency (max_inputs=10)
- Optimized for multiple small audio files
- Supports batch processing for efficiency
- **L40S GPU** - Used for `/v1/audio/transcriptions-from-url` endpoint (large files)
- Lower concurrency but more powerful processing
- Optimized for single large audio files
- VAD-based chunking for long-form audio
##### `/v1/audio/transcriptions` - Small file transcription
**request** (multipart/form-data)
- `file` or `files[]` - audio file(s) to transcribe
- `model` - model name (default: `nvidia/parakeet-tdt-0.6b-v2`)
- `language` - language code (default: `en`)
- `batch` - whether to use batch processing for multiple files (default: `true`)
**response**
```json
{
"text": "transcribed text",
"words": [
{"word": "hello", "start": 0.0, "end": 0.5},
{"word": "world", "start": 0.5, "end": 1.0}
],
"filename": "audio.mp3"
}
```
For multiple files with batch=true:
```json
{
"results": [
{
"filename": "audio1.mp3",
"text": "transcribed text",
"words": [...]
},
{
"filename": "audio2.mp3",
"text": "transcribed text",
"words": [...]
}
]
}
```
##### `/v1/audio/transcriptions-from-url` - Large file transcription
**request** (application/json)
```json
{
"audio_file_url": "https://example.com/audio.mp3",
"model": "nvidia/parakeet-tdt-0.6b-v2",
"language": "en",
"timestamp_offset": 0.0
}
```
**response**
```json
{
"text": "transcribed text from large file",
"words": [
{"word": "hello", "start": 0.0, "end": 0.5},
{"word": "world", "start": 0.5, "end": 1.0}
]
}
```
**Supported file types:** mp3, mp4, mpeg, mpga, m4a, wav, webm
#### Whisper Transcriber (`reflector_transcriber.py`)
`POST /transcribe`
**request** (multipart/form-data)
- `file` - audio file
- `language` - language code (e.g. `en`)
**response**
```
{
"text": "xxx",
"words": [
{"text": "xxx", "start": 0.0, "end": 1.0}
]
}
```

View File

@@ -1,253 +0,0 @@
"""
Reflector GPU backend - diarizer
===================================
"""
import os
import uuid
from typing import Mapping, NewType
from urllib.parse import urlparse
import modal
PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.1"
MODEL_DIR = "/root/diarization_models"
UPLOADS_PATH = "/uploads"
SUPPORTED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
DiarizerUniqFilename = NewType("DiarizerUniqFilename", str)
AudioFileExtension = NewType("AudioFileExtension", str)
app = modal.App(name="reflector-diarizer")
# Volume for temporary file uploads
upload_volume = modal.Volume.from_name("diarizer-uploads", create_if_missing=True)
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
parsed_url = urlparse(url)
url_path = parsed_url.path
for ext in SUPPORTED_FILE_EXTENSIONS:
if url_path.lower().endswith(f".{ext}"):
return AudioFileExtension(ext)
content_type = headers.get("content-type", "").lower()
if "audio/mpeg" in content_type or "audio/mp3" in content_type:
return AudioFileExtension("mp3")
if "audio/wav" in content_type:
return AudioFileExtension("wav")
if "audio/mp4" in content_type:
return AudioFileExtension("mp4")
raise ValueError(
f"Unsupported audio format for URL: {url}. "
f"Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
)
def download_audio_to_volume(
audio_file_url: str,
) -> tuple[DiarizerUniqFilename, AudioFileExtension]:
import requests
from fastapi import HTTPException
print(f"Checking audio file at: {audio_file_url}")
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(status_code=404, detail="Audio file not found")
print(f"Downloading audio file from: {audio_file_url}")
response = requests.get(audio_file_url, allow_redirects=True)
if response.status_code != 200:
print(f"Download failed with status {response.status_code}: {response.text}")
raise HTTPException(
status_code=response.status_code,
detail=f"Failed to download audio file: {response.status_code}",
)
audio_suffix = detect_audio_format(audio_file_url, response.headers)
unique_filename = DiarizerUniqFilename(f"{uuid.uuid4()}.{audio_suffix}")
file_path = f"{UPLOADS_PATH}/{unique_filename}"
print(f"Writing file to: {file_path} (size: {len(response.content)} bytes)")
with open(file_path, "wb") as f:
f.write(response.content)
upload_volume.commit()
print(f"File saved as: {unique_filename}")
return unique_filename, audio_suffix
def migrate_cache_llm():
"""
XXX The cache for model files in Transformers v4.22.0 has been updated.
Migrating your old cache. This is a one-time only operation. You can
interrupt this and resume the migration later on by calling
`transformers.utils.move_cache()`.
"""
from transformers.utils.hub import move_cache
print("Moving LLM cache")
move_cache(cache_dir=MODEL_DIR, new_cache_dir=MODEL_DIR)
print("LLM cache moved")
def download_pyannote_audio():
from pyannote.audio import Pipeline
Pipeline.from_pretrained(
PYANNOTE_MODEL_NAME,
cache_dir=MODEL_DIR,
use_auth_token=os.environ["HF_TOKEN"],
)
diarizer_image = (
modal.Image.debian_slim(python_version="3.10.8")
.pip_install(
"pyannote.audio==3.1.0",
"requests",
"onnx",
"torchaudio",
"onnxruntime-gpu",
"torch==2.0.0",
"transformers==4.34.0",
"sentencepiece",
"protobuf",
"numpy",
"huggingface_hub",
"hf-transfer",
)
.run_function(
download_pyannote_audio,
secrets=[modal.Secret.from_name("hf_token")],
)
.run_function(migrate_cache_llm)
.env(
{
"LD_LIBRARY_PATH": (
"/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib/:"
"/opt/conda/lib/python3.10/site-packages/nvidia/cublas/lib/"
)
}
)
)
@app.cls(
gpu="A100",
timeout=60 * 30,
image=diarizer_image,
volumes={UPLOADS_PATH: upload_volume},
enable_memory_snapshot=True,
experimental_options={"enable_gpu_snapshot": True},
secrets=[
modal.Secret.from_name("hf_token"),
],
)
@modal.concurrent(max_inputs=1)
class Diarizer:
@modal.enter(snap=True)
def enter(self):
import torch
from pyannote.audio import Pipeline
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
print(f"Using device: {self.device}")
self.diarization_pipeline = Pipeline.from_pretrained(
PYANNOTE_MODEL_NAME,
cache_dir=MODEL_DIR,
use_auth_token=os.environ["HF_TOKEN"],
)
self.diarization_pipeline.to(torch.device(self.device))
@modal.method()
def diarize(self, filename: str, timestamp: float = 0.0):
import torchaudio
upload_volume.reload()
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
print(f"Diarizing audio from: {file_path}")
waveform, sample_rate = torchaudio.load(file_path)
diarization = self.diarization_pipeline(
{"waveform": waveform, "sample_rate": sample_rate}
)
words = []
for diarization_segment, _, speaker in diarization.itertracks(yield_label=True):
words.append(
{
"start": round(timestamp + diarization_segment.start, 3),
"end": round(timestamp + diarization_segment.end, 3),
"speaker": int(speaker[-2:]),
}
)
print("Diarization complete")
return {"diarization": words}
# -------------------------------------------------------------------
# Web API
# -------------------------------------------------------------------
@app.function(
timeout=60 * 10,
scaledown_window=60 * 3,
secrets=[
modal.Secret.from_name("reflector-gpu"),
],
volumes={UPLOADS_PATH: upload_volume},
image=diarizer_image,
)
@modal.concurrent(max_inputs=40)
@modal.asgi_app()
def web():
from fastapi import Depends, FastAPI, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel
diarizerstub = Diarizer()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
class DiarizationResponse(BaseModel):
result: dict
@app.post("/diarize", dependencies=[Depends(apikey_auth)])
def diarize(audio_file_url: str, timestamp: float = 0.0) -> DiarizationResponse:
unique_filename, audio_suffix = download_audio_to_volume(audio_file_url)
try:
func = diarizerstub.diarize.spawn(
filename=unique_filename, timestamp=timestamp
)
result = func.get()
return result
finally:
try:
file_path = f"{UPLOADS_PATH}/{unique_filename}"
print(f"Deleting file: {file_path}")
os.remove(file_path)
upload_volume.commit()
except Exception as e:
print(f"Error cleaning up {unique_filename}: {e}")
return app

View File

@@ -1,608 +0,0 @@
import os
import sys
import threading
import uuid
from typing import Generator, Mapping, NamedTuple, NewType, TypedDict
from urllib.parse import urlparse
import modal
MODEL_NAME = "large-v2"
MODEL_COMPUTE_TYPE: str = "float16"
MODEL_NUM_WORKERS: int = 1
MINUTES = 60 # seconds
SAMPLERATE = 16000
UPLOADS_PATH = "/uploads"
CACHE_PATH = "/models"
SUPPORTED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
VAD_CONFIG = {
"batch_max_duration": 30.0,
"silence_padding": 0.5,
"window_size": 512,
}
WhisperUniqFilename = NewType("WhisperUniqFilename", str)
AudioFileExtension = NewType("AudioFileExtension", str)
app = modal.App("reflector-transcriber")
model_cache = modal.Volume.from_name("models", create_if_missing=True)
upload_volume = modal.Volume.from_name("whisper-uploads", create_if_missing=True)
class TimeSegment(NamedTuple):
"""Represents a time segment with start and end times."""
start: float
end: float
class AudioSegment(NamedTuple):
"""Represents an audio segment with timing and audio data."""
start: float
end: float
audio: any
class TranscriptResult(NamedTuple):
"""Represents a transcription result with text and word timings."""
text: str
words: list["WordTiming"]
class WordTiming(TypedDict):
"""Represents a word with its timing information."""
word: str
start: float
end: float
def download_model():
from faster_whisper import download_model
model_cache.reload()
download_model(MODEL_NAME, cache_dir=CACHE_PATH)
model_cache.commit()
image = (
modal.Image.debian_slim(python_version="3.12")
.env(
{
"HF_HUB_ENABLE_HF_TRANSFER": "1",
"LD_LIBRARY_PATH": (
"/usr/local/lib/python3.12/site-packages/nvidia/cudnn/lib/:"
"/opt/conda/lib/python3.12/site-packages/nvidia/cublas/lib/"
),
}
)
.apt_install("ffmpeg")
.pip_install(
"huggingface_hub==0.27.1",
"hf-transfer==0.1.9",
"torch==2.5.1",
"faster-whisper==1.1.1",
"fastapi==0.115.12",
"requests",
"librosa==0.10.1",
"numpy<2",
"silero-vad==5.1.0",
)
.run_function(download_model, volumes={CACHE_PATH: model_cache})
)
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
parsed_url = urlparse(url)
url_path = parsed_url.path
for ext in SUPPORTED_FILE_EXTENSIONS:
if url_path.lower().endswith(f".{ext}"):
return AudioFileExtension(ext)
content_type = headers.get("content-type", "").lower()
if "audio/mpeg" in content_type or "audio/mp3" in content_type:
return AudioFileExtension("mp3")
if "audio/wav" in content_type:
return AudioFileExtension("wav")
if "audio/mp4" in content_type:
return AudioFileExtension("mp4")
raise ValueError(
f"Unsupported audio format for URL: {url}. "
f"Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
)
def download_audio_to_volume(
audio_file_url: str,
) -> tuple[WhisperUniqFilename, AudioFileExtension]:
import requests
from fastapi import HTTPException
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(status_code=404, detail="Audio file not found")
response = requests.get(audio_file_url, allow_redirects=True)
response.raise_for_status()
audio_suffix = detect_audio_format(audio_file_url, response.headers)
unique_filename = WhisperUniqFilename(f"{uuid.uuid4()}.{audio_suffix}")
file_path = f"{UPLOADS_PATH}/{unique_filename}"
with open(file_path, "wb") as f:
f.write(response.content)
upload_volume.commit()
return unique_filename, audio_suffix
def pad_audio(audio_array, sample_rate: int = SAMPLERATE):
"""Add 0.5s of silence if audio is shorter than the silence_padding window.
Whisper does not require this strictly, but aligning behavior with Parakeet
avoids edge-case crashes on extremely short inputs and makes comparisons easier.
"""
import numpy as np
audio_duration = len(audio_array) / sample_rate
if audio_duration < VAD_CONFIG["silence_padding"]:
silence_samples = int(sample_rate * VAD_CONFIG["silence_padding"])
silence = np.zeros(silence_samples, dtype=np.float32)
return np.concatenate([audio_array, silence])
return audio_array
@app.cls(
gpu="A10G",
timeout=5 * MINUTES,
scaledown_window=5 * MINUTES,
image=image,
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
)
@modal.concurrent(max_inputs=10)
class TranscriberWhisperLive:
"""Live transcriber class for small audio segments (A10G).
Mirrors the Parakeet live class API but uses Faster-Whisper under the hood.
"""
@modal.enter()
def enter(self):
import faster_whisper
import torch
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
self.model = faster_whisper.WhisperModel(
MODEL_NAME,
device=self.device,
compute_type=MODEL_COMPUTE_TYPE,
num_workers=MODEL_NUM_WORKERS,
download_root=CACHE_PATH,
local_files_only=True,
)
print(f"Model is on device: {self.device}")
@modal.method()
def transcribe_segment(
self,
filename: str,
language: str = "en",
):
"""Transcribe a single uploaded audio file by filename."""
upload_volume.reload()
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
with self.lock:
with NoStdStreams():
segments, _ = self.model.transcribe(
file_path,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(segment.text for segment in segments).strip()
words = [
{
"word": word.word,
"start": round(float(word.start), 2),
"end": round(float(word.end), 2),
}
for segment in segments
for word in segment.words
]
return {"text": text, "words": words}
@modal.method()
def transcribe_batch(
self,
filenames: list[str],
language: str = "en",
):
"""Transcribe multiple uploaded audio files and return per-file results."""
upload_volume.reload()
results = []
for filename in filenames:
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"Batch file not found: {file_path}")
with self.lock:
with NoStdStreams():
segments, _ = self.model.transcribe(
file_path,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(seg.text for seg in segments).strip()
words = [
{
"word": w.word,
"start": round(float(w.start), 2),
"end": round(float(w.end), 2),
}
for seg in segments
for w in seg.words
]
results.append(
{
"filename": filename,
"text": text,
"words": words,
}
)
return results
@app.cls(
gpu="L40S",
timeout=15 * MINUTES,
image=image,
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
)
class TranscriberWhisperFile:
"""File transcriber for larger/longer audio, using VAD-driven batching (L40S)."""
@modal.enter()
def enter(self):
import faster_whisper
import torch
from silero_vad import load_silero_vad
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
self.model = faster_whisper.WhisperModel(
MODEL_NAME,
device=self.device,
compute_type=MODEL_COMPUTE_TYPE,
num_workers=MODEL_NUM_WORKERS,
download_root=CACHE_PATH,
local_files_only=True,
)
self.vad_model = load_silero_vad(onnx=False)
@modal.method()
def transcribe_segment(
self, filename: str, timestamp_offset: float = 0.0, language: str = "en"
):
import librosa
import numpy as np
from silero_vad import VADIterator
def vad_segments(
audio_array,
sample_rate: int = SAMPLERATE,
window_size: int = VAD_CONFIG["window_size"],
) -> Generator[TimeSegment, None, None]:
"""Generate speech segments as TimeSegment using Silero VAD."""
iterator = VADIterator(self.vad_model, sampling_rate=sample_rate)
start = None
for i in range(0, len(audio_array), window_size):
chunk = audio_array[i : i + window_size]
if len(chunk) < window_size:
chunk = np.pad(
chunk, (0, window_size - len(chunk)), mode="constant"
)
speech = iterator(chunk)
if not speech:
continue
if "start" in speech:
start = speech["start"]
continue
if "end" in speech and start is not None:
end = speech["end"]
yield TimeSegment(
start / float(SAMPLERATE), end / float(SAMPLERATE)
)
start = None
iterator.reset_states()
upload_volume.reload()
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
audio_array, _sr = librosa.load(file_path, sr=SAMPLERATE, mono=True)
# Batch segments up to ~30s windows by merging contiguous VAD segments
merged_batches: list[TimeSegment] = []
batch_start = None
batch_end = None
max_duration = VAD_CONFIG["batch_max_duration"]
for segment in vad_segments(audio_array):
seg_start, seg_end = segment.start, segment.end
if batch_start is None:
batch_start, batch_end = seg_start, seg_end
continue
if seg_end - batch_start <= max_duration:
batch_end = seg_end
else:
merged_batches.append(TimeSegment(batch_start, batch_end))
batch_start, batch_end = seg_start, seg_end
if batch_start is not None and batch_end is not None:
merged_batches.append(TimeSegment(batch_start, batch_end))
all_text = []
all_words = []
for segment in merged_batches:
start_time, end_time = segment.start, segment.end
s_idx = int(start_time * SAMPLERATE)
e_idx = int(end_time * SAMPLERATE)
segment = audio_array[s_idx:e_idx]
segment = pad_audio(segment, SAMPLERATE)
with self.lock:
segments, _ = self.model.transcribe(
segment,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(seg.text for seg in segments).strip()
words = [
{
"word": w.word,
"start": round(float(w.start) + start_time + timestamp_offset, 2),
"end": round(float(w.end) + start_time + timestamp_offset, 2),
}
for seg in segments
for w in seg.words
]
if text:
all_text.append(text)
all_words.extend(words)
return {"text": " ".join(all_text), "words": all_words}
def detect_audio_format(url: str, headers: dict) -> str:
from urllib.parse import urlparse
from fastapi import HTTPException
url_path = urlparse(url).path
for ext in SUPPORTED_FILE_EXTENSIONS:
if url_path.lower().endswith(f".{ext}"):
return ext
content_type = headers.get("content-type", "").lower()
if "audio/mpeg" in content_type or "audio/mp3" in content_type:
return "mp3"
if "audio/wav" in content_type:
return "wav"
if "audio/mp4" in content_type:
return "mp4"
raise HTTPException(
status_code=400,
detail=(
f"Unsupported audio format for URL. Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
),
)
def download_audio_to_volume(audio_file_url: str) -> tuple[str, str]:
import requests
from fastapi import HTTPException
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(status_code=404, detail="Audio file not found")
response = requests.get(audio_file_url, allow_redirects=True)
response.raise_for_status()
audio_suffix = detect_audio_format(audio_file_url, response.headers)
unique_filename = f"{uuid.uuid4()}.{audio_suffix}"
file_path = f"{UPLOADS_PATH}/{unique_filename}"
with open(file_path, "wb") as f:
f.write(response.content)
upload_volume.commit()
return unique_filename, audio_suffix
@app.function(
scaledown_window=60,
timeout=600,
secrets=[
modal.Secret.from_name("reflector-gpu"),
],
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
image=image,
)
@modal.concurrent(max_inputs=40)
@modal.asgi_app()
def web():
from fastapi import (
Body,
Depends,
FastAPI,
Form,
HTTPException,
UploadFile,
status,
)
from fastapi.security import OAuth2PasswordBearer
transcriber_live = TranscriberWhisperLive()
transcriber_file = TranscriberWhisperFile()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey == os.environ["REFLECTOR_GPU_APIKEY"]:
return
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
class TranscriptResponse(dict):
pass
@app.post("/v1/audio/transcriptions", dependencies=[Depends(apikey_auth)])
def transcribe(
file: UploadFile = None,
files: list[UploadFile] | None = None,
model: str = Form(MODEL_NAME),
language: str = Form("en"),
batch: bool = Form(False),
):
if not file and not files:
raise HTTPException(
status_code=400, detail="Either 'file' or 'files' parameter is required"
)
if batch and not files:
raise HTTPException(
status_code=400, detail="Batch transcription requires 'files'"
)
upload_files = [file] if file else files
uploaded_filenames: list[str] = []
for upload_file in upload_files:
audio_suffix = upload_file.filename.split(".")[-1]
if audio_suffix not in SUPPORTED_FILE_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=(
f"Unsupported audio format. Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
),
)
unique_filename = f"{uuid.uuid4()}.{audio_suffix}"
file_path = f"{UPLOADS_PATH}/{unique_filename}"
with open(file_path, "wb") as f:
content = upload_file.file.read()
f.write(content)
uploaded_filenames.append(unique_filename)
upload_volume.commit()
try:
if batch and len(upload_files) > 1:
func = transcriber_live.transcribe_batch.spawn(
filenames=uploaded_filenames,
language=language,
)
results = func.get()
return {"results": results}
results = []
for filename in uploaded_filenames:
func = transcriber_live.transcribe_segment.spawn(
filename=filename,
language=language,
)
result = func.get()
result["filename"] = filename
results.append(result)
return {"results": results} if len(results) > 1 else results[0]
finally:
for filename in uploaded_filenames:
try:
file_path = f"{UPLOADS_PATH}/{filename}"
os.remove(file_path)
except Exception:
pass
upload_volume.commit()
@app.post("/v1/audio/transcriptions-from-url", dependencies=[Depends(apikey_auth)])
def transcribe_from_url(
audio_file_url: str = Body(
..., description="URL of the audio file to transcribe"
),
model: str = Body(MODEL_NAME),
language: str = Body("en"),
timestamp_offset: float = Body(0.0),
):
unique_filename, _audio_suffix = download_audio_to_volume(audio_file_url)
try:
func = transcriber_file.transcribe_segment.spawn(
filename=unique_filename,
timestamp_offset=timestamp_offset,
language=language,
)
result = func.get()
return result
finally:
try:
file_path = f"{UPLOADS_PATH}/{unique_filename}"
os.remove(file_path)
upload_volume.commit()
except Exception:
pass
return app
class NoStdStreams:
def __init__(self):
self.devnull = open(os.devnull, "w")
def __enter__(self):
self._stdout, self._stderr = sys.stdout, sys.stderr
self._stdout.flush()
self._stderr.flush()
sys.stdout, sys.stderr = self.devnull, self.devnull
def __exit__(self, exc_type, exc_value, traceback):
sys.stdout, sys.stderr = self._stdout, self._stderr
self.devnull.close()

View File

@@ -1,658 +0,0 @@
import logging
import os
import sys
import threading
import uuid
from typing import Generator, Mapping, NamedTuple, NewType, TypedDict
from urllib.parse import urlparse
import modal
MODEL_NAME = "nvidia/parakeet-tdt-0.6b-v2"
SUPPORTED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
SAMPLERATE = 16000
UPLOADS_PATH = "/uploads"
CACHE_PATH = "/cache"
VAD_CONFIG = {
"batch_max_duration": 30.0,
"silence_padding": 0.5,
"window_size": 512,
}
ParakeetUniqFilename = NewType("ParakeetUniqFilename", str)
AudioFileExtension = NewType("AudioFileExtension", str)
class TimeSegment(NamedTuple):
"""Represents a time segment with start and end times."""
start: float
end: float
class AudioSegment(NamedTuple):
"""Represents an audio segment with timing and audio data."""
start: float
end: float
audio: any
class TranscriptResult(NamedTuple):
"""Represents a transcription result with text and word timings."""
text: str
words: list["WordTiming"]
class WordTiming(TypedDict):
"""Represents a word with its timing information."""
word: str
start: float
end: float
app = modal.App("reflector-transcriber-parakeet")
# Volume for caching model weights
model_cache = modal.Volume.from_name("parakeet-model-cache", create_if_missing=True)
# Volume for temporary file uploads
upload_volume = modal.Volume.from_name("parakeet-uploads", create_if_missing=True)
image = (
modal.Image.from_registry(
"nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04", add_python="3.12"
)
.env(
{
"HF_HUB_ENABLE_HF_TRANSFER": "1",
"HF_HOME": "/cache",
"DEBIAN_FRONTEND": "noninteractive",
"CXX": "g++",
"CC": "g++",
}
)
.apt_install("ffmpeg")
.pip_install(
"hf_transfer==0.1.9",
"huggingface_hub[hf-xet]==0.31.2",
"nemo_toolkit[asr]==2.5.0",
"cuda-python==12.8.0",
"fastapi==0.115.12",
"numpy<2",
"librosa==0.10.1",
"requests",
"silero-vad==5.1.0",
"torch",
)
.entrypoint([]) # silence chatty logs by container on start
)
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
parsed_url = urlparse(url)
url_path = parsed_url.path
for ext in SUPPORTED_FILE_EXTENSIONS:
if url_path.lower().endswith(f".{ext}"):
return AudioFileExtension(ext)
content_type = headers.get("content-type", "").lower()
if "audio/mpeg" in content_type or "audio/mp3" in content_type:
return AudioFileExtension("mp3")
if "audio/wav" in content_type:
return AudioFileExtension("wav")
if "audio/mp4" in content_type:
return AudioFileExtension("mp4")
raise ValueError(
f"Unsupported audio format for URL: {url}. "
f"Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
)
def download_audio_to_volume(
audio_file_url: str,
) -> tuple[ParakeetUniqFilename, AudioFileExtension]:
import requests
from fastapi import HTTPException
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(status_code=404, detail="Audio file not found")
response = requests.get(audio_file_url, allow_redirects=True)
response.raise_for_status()
audio_suffix = detect_audio_format(audio_file_url, response.headers)
unique_filename = ParakeetUniqFilename(f"{uuid.uuid4()}.{audio_suffix}")
file_path = f"{UPLOADS_PATH}/{unique_filename}"
with open(file_path, "wb") as f:
f.write(response.content)
upload_volume.commit()
return unique_filename, audio_suffix
def pad_audio(audio_array, sample_rate: int = SAMPLERATE):
"""Add 0.5 seconds of silence if audio is less than 500ms.
This is a workaround for a Parakeet bug where very short audio (<500ms) causes:
ValueError: `char_offsets`: [] and `processed_tokens`: [157, 834, 834, 841]
have to be of the same length
See: https://github.com/NVIDIA/NeMo/issues/8451
"""
import numpy as np
audio_duration = len(audio_array) / sample_rate
if audio_duration < 0.5:
silence_samples = int(sample_rate * 0.5)
silence = np.zeros(silence_samples, dtype=np.float32)
return np.concatenate([audio_array, silence])
return audio_array
@app.cls(
gpu="A10G",
timeout=600,
scaledown_window=300,
image=image,
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
enable_memory_snapshot=True,
experimental_options={"enable_gpu_snapshot": True},
)
@modal.concurrent(max_inputs=10)
class TranscriberParakeetLive:
@modal.enter(snap=True)
def enter(self):
import nemo.collections.asr as nemo_asr
logging.getLogger("nemo_logger").setLevel(logging.CRITICAL)
self.lock = threading.Lock()
self.model = nemo_asr.models.ASRModel.from_pretrained(model_name=MODEL_NAME)
device = next(self.model.parameters()).device
print(f"Model is on device: {device}")
@modal.method()
def transcribe_segment(
self,
filename: str,
):
import librosa
upload_volume.reload()
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
audio_array, sample_rate = librosa.load(file_path, sr=SAMPLERATE, mono=True)
padded_audio = pad_audio(audio_array, sample_rate)
with self.lock:
with NoStdStreams():
(output,) = self.model.transcribe([padded_audio], timestamps=True)
text = output.text.strip()
words: list[WordTiming] = [
WordTiming(
# XXX the space added here is to match the output of whisper
# whisper add space to each words, while parakeet don't
word=word_info["word"] + " ",
start=round(word_info["start"], 2),
end=round(word_info["end"], 2),
)
for word_info in output.timestamp["word"]
]
return {"text": text, "words": words}
@modal.method()
def transcribe_batch(
self,
filenames: list[str],
):
import librosa
upload_volume.reload()
results = []
audio_arrays = []
# Load all audio files with padding
for filename in filenames:
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"Batch file not found: {file_path}")
audio_array, sample_rate = librosa.load(file_path, sr=SAMPLERATE, mono=True)
padded_audio = pad_audio(audio_array, sample_rate)
audio_arrays.append(padded_audio)
with self.lock:
with NoStdStreams():
outputs = self.model.transcribe(audio_arrays, timestamps=True)
# Process results for each file
for i, (filename, output) in enumerate(zip(filenames, outputs)):
text = output.text.strip()
words: list[WordTiming] = [
WordTiming(
word=word_info["word"] + " ",
start=round(word_info["start"], 2),
end=round(word_info["end"], 2),
)
for word_info in output.timestamp["word"]
]
results.append(
{
"filename": filename,
"text": text,
"words": words,
}
)
return results
# L40S class for file transcription (bigger files)
@app.cls(
gpu="L40S",
timeout=900,
image=image,
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
enable_memory_snapshot=True,
experimental_options={"enable_gpu_snapshot": True},
)
class TranscriberParakeetFile:
@modal.enter(snap=True)
def enter(self):
import nemo.collections.asr as nemo_asr
import torch
from silero_vad import load_silero_vad
logging.getLogger("nemo_logger").setLevel(logging.CRITICAL)
self.model = nemo_asr.models.ASRModel.from_pretrained(model_name=MODEL_NAME)
device = next(self.model.parameters()).device
print(f"Model is on device: {device}")
torch.set_num_threads(1)
self.vad_model = load_silero_vad(onnx=False)
print("Silero VAD initialized")
@modal.method()
def transcribe_segment(
self,
filename: str,
timestamp_offset: float = 0.0,
):
import librosa
import numpy as np
from silero_vad import VADIterator
def load_and_convert_audio(file_path):
audio_array, sample_rate = librosa.load(file_path, sr=SAMPLERATE, mono=True)
return audio_array
def vad_segment_generator(
audio_array,
) -> Generator[TimeSegment, None, None]:
"""Generate speech segments using VAD with start/end sample indices"""
vad_iterator = VADIterator(self.vad_model, sampling_rate=SAMPLERATE)
window_size = VAD_CONFIG["window_size"]
start = None
for i in range(0, len(audio_array), window_size):
chunk = audio_array[i : i + window_size]
if len(chunk) < window_size:
chunk = np.pad(
chunk, (0, window_size - len(chunk)), mode="constant"
)
speech_dict = vad_iterator(chunk)
if not speech_dict:
continue
if "start" in speech_dict:
start = speech_dict["start"]
continue
if "end" in speech_dict and start is not None:
end = speech_dict["end"]
start_time = start / float(SAMPLERATE)
end_time = end / float(SAMPLERATE)
yield TimeSegment(start_time, end_time)
start = None
vad_iterator.reset_states()
def batch_speech_segments(
segments: Generator[TimeSegment, None, None], max_duration: int
) -> Generator[TimeSegment, None, None]:
"""
Input segments:
[0-2] [3-5] [6-8] [10-11] [12-15] [17-19] [20-22]
↓ (max_duration=10)
Output batches:
[0-8] [10-19] [20-22]
Note: silences are kept for better transcription, previous implementation was
passing segments separatly, but the output was less accurate.
"""
batch_start_time = None
batch_end_time = None
for segment in segments:
start_time, end_time = segment.start, segment.end
if batch_start_time is None or batch_end_time is None:
batch_start_time = start_time
batch_end_time = end_time
continue
total_duration = end_time - batch_start_time
if total_duration <= max_duration:
batch_end_time = end_time
continue
yield TimeSegment(batch_start_time, batch_end_time)
batch_start_time = start_time
batch_end_time = end_time
if batch_start_time is None or batch_end_time is None:
return
yield TimeSegment(batch_start_time, batch_end_time)
def batch_segment_to_audio_segment(
segments: Generator[TimeSegment, None, None],
audio_array,
) -> Generator[AudioSegment, None, None]:
"""Extract audio segments and apply padding for Parakeet compatibility.
Uses pad_audio to ensure segments are at least 0.5s long, preventing
Parakeet crashes. This padding may cause slight timing overlaps between
segments, which are corrected by enforce_word_timing_constraints.
"""
for segment in segments:
start_time, end_time = segment.start, segment.end
start_sample = int(start_time * SAMPLERATE)
end_sample = int(end_time * SAMPLERATE)
audio_segment = audio_array[start_sample:end_sample]
padded_segment = pad_audio(audio_segment, SAMPLERATE)
yield AudioSegment(start_time, end_time, padded_segment)
def transcribe_batch(model, audio_segments: list) -> list:
with NoStdStreams():
outputs = model.transcribe(audio_segments, timestamps=True)
return outputs
def enforce_word_timing_constraints(
words: list[WordTiming],
) -> list[WordTiming]:
"""Enforce that word end times don't exceed the start time of the next word.
Due to silence padding added in batch_segment_to_audio_segment for better
transcription accuracy, word timings from different segments may overlap.
This function ensures there are no overlaps by adjusting end times.
"""
if len(words) <= 1:
return words
enforced_words = []
for i, word in enumerate(words):
enforced_word = word.copy()
if i < len(words) - 1:
next_start = words[i + 1]["start"]
if enforced_word["end"] > next_start:
enforced_word["end"] = next_start
enforced_words.append(enforced_word)
return enforced_words
def emit_results(
results: list,
segments_info: list[AudioSegment],
) -> Generator[TranscriptResult, None, None]:
"""Yield transcribed text and word timings from model output, adjusting timestamps to absolute positions."""
for i, (output, segment) in enumerate(zip(results, segments_info)):
start_time, end_time = segment.start, segment.end
text = output.text.strip()
words: list[WordTiming] = [
WordTiming(
word=word_info["word"] + " ",
start=round(
word_info["start"] + start_time + timestamp_offset, 2
),
end=round(word_info["end"] + start_time + timestamp_offset, 2),
)
for word_info in output.timestamp["word"]
]
yield TranscriptResult(text, words)
upload_volume.reload()
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
audio_array = load_and_convert_audio(file_path)
total_duration = len(audio_array) / float(SAMPLERATE)
all_text_parts: list[str] = []
all_words: list[WordTiming] = []
raw_segments = vad_segment_generator(audio_array)
speech_segments = batch_speech_segments(
raw_segments,
VAD_CONFIG["batch_max_duration"],
)
audio_segments = batch_segment_to_audio_segment(speech_segments, audio_array)
for batch in audio_segments:
audio_segment = batch.audio
results = transcribe_batch(self.model, [audio_segment])
for result in emit_results(
results,
[batch],
):
if not result.text:
continue
all_text_parts.append(result.text)
all_words.extend(result.words)
all_words = enforce_word_timing_constraints(all_words)
combined_text = " ".join(all_text_parts)
return {"text": combined_text, "words": all_words}
@app.function(
scaledown_window=60,
timeout=600,
secrets=[
modal.Secret.from_name("reflector-gpu"),
],
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
image=image,
)
@modal.concurrent(max_inputs=40)
@modal.asgi_app()
def web():
import os
import uuid
from fastapi import (
Body,
Depends,
FastAPI,
Form,
HTTPException,
UploadFile,
status,
)
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel
transcriber_live = TranscriberParakeetLive()
transcriber_file = TranscriberParakeetFile()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey == os.environ["REFLECTOR_GPU_APIKEY"]:
return
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
class TranscriptResponse(BaseModel):
result: dict
@app.post("/v1/audio/transcriptions", dependencies=[Depends(apikey_auth)])
def transcribe(
file: UploadFile = None,
files: list[UploadFile] | None = None,
model: str = Form(MODEL_NAME),
language: str = Form("en"),
batch: bool = Form(False),
):
# Parakeet only supports English
if language != "en":
raise HTTPException(
status_code=400,
detail=f"Parakeet model only supports English. Got language='{language}'",
)
# Handle both single file and multiple files
if not file and not files:
raise HTTPException(
status_code=400, detail="Either 'file' or 'files' parameter is required"
)
if batch and not files:
raise HTTPException(
status_code=400, detail="Batch transcription requires 'files'"
)
upload_files = [file] if file else files
# Upload files to volume
uploaded_filenames = []
for upload_file in upload_files:
audio_suffix = upload_file.filename.split(".")[-1]
assert audio_suffix in SUPPORTED_FILE_EXTENSIONS
# Generate unique filename
unique_filename = f"{uuid.uuid4()}.{audio_suffix}"
file_path = f"{UPLOADS_PATH}/{unique_filename}"
print(f"Writing file to: {file_path}")
with open(file_path, "wb") as f:
content = upload_file.file.read()
f.write(content)
uploaded_filenames.append(unique_filename)
upload_volume.commit()
try:
# Use A10G live transcriber for per-file transcription
if batch and len(upload_files) > 1:
# Use batch transcription
func = transcriber_live.transcribe_batch.spawn(
filenames=uploaded_filenames,
)
results = func.get()
return {"results": results}
# Per-file transcription
results = []
for filename in uploaded_filenames:
func = transcriber_live.transcribe_segment.spawn(
filename=filename,
)
result = func.get()
result["filename"] = filename
results.append(result)
return {"results": results} if len(results) > 1 else results[0]
finally:
for filename in uploaded_filenames:
try:
file_path = f"{UPLOADS_PATH}/{filename}"
print(f"Deleting file: {file_path}")
os.remove(file_path)
except Exception as e:
print(f"Error deleting {filename}: {e}")
upload_volume.commit()
@app.post("/v1/audio/transcriptions-from-url", dependencies=[Depends(apikey_auth)])
def transcribe_from_url(
audio_file_url: str = Body(
..., description="URL of the audio file to transcribe"
),
model: str = Body(MODEL_NAME),
language: str = Body("en", description="Language code (only 'en' supported)"),
timestamp_offset: float = Body(0.0),
):
# Parakeet only supports English
if language != "en":
raise HTTPException(
status_code=400,
detail=f"Parakeet model only supports English. Got language='{language}'",
)
unique_filename, audio_suffix = download_audio_to_volume(audio_file_url)
try:
func = transcriber_file.transcribe_segment.spawn(
filename=unique_filename,
timestamp_offset=timestamp_offset,
)
result = func.get()
return result
finally:
try:
file_path = f"{UPLOADS_PATH}/{unique_filename}"
print(f"Deleting file: {file_path}")
os.remove(file_path)
upload_volume.commit()
except Exception as e:
print(f"Error cleaning up {unique_filename}: {e}")
return app
class NoStdStreams:
def __init__(self):
self.devnull = open(os.devnull, "w")
def __enter__(self):
self._stdout, self._stderr = sys.stdout, sys.stderr
self._stdout.flush()
self._stderr.flush()
sys.stdout, sys.stderr = self.devnull, self.devnull
def __exit__(self, exc_type, exc_value, traceback):
sys.stdout, sys.stderr = self._stdout, self._stderr
self.devnull.close()

View File

@@ -1,2 +0,0 @@
REFLECTOR_GPU_APIKEY=
HF_TOKEN=

View File

@@ -1,38 +0,0 @@
cache/
# OS / Editor
.DS_Store
.vscode/
.idea/
# Python
__pycache__/
*.py[cod]
*$py.class
# Env and secrets
.env
*.env
*.secret
HF_TOKEN
REFLECTOR_GPU_APIKEY
# Virtual env / uv
.venv/
venv/
ENV/
uv/
# Build / dist
build/
dist/
.eggs/
*.egg-info/
# Coverage / test
.pytest_cache/
.coverage*
htmlcov/
# Logs
*.log

View File

@@ -1,46 +0,0 @@
FROM python:3.12-slim
ENV PYTHONUNBUFFERED=1 \
UV_LINK_MODE=copy \
UV_NO_CACHE=1
WORKDIR /tmp
RUN apt-get update \
&& apt-get install -y \
ffmpeg \
curl \
ca-certificates \
gnupg \
wget \
&& apt-get clean
# Add NVIDIA CUDA repo for Debian 12 (bookworm) and install cuDNN 9 for CUDA 12
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb /cuda-keyring.deb
RUN dpkg -i /cuda-keyring.deb \
&& rm /cuda-keyring.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
cuda-cudart-12-6 \
libcublas-12-6 \
libcudnn9-cuda-12 \
libcudnn9-dev-cuda-12 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ADD https://astral.sh/uv/install.sh /uv-installer.sh
RUN sh /uv-installer.sh && rm /uv-installer.sh
ENV PATH="/root/.local/bin/:$PATH"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
RUN mkdir -p /app
WORKDIR /app
COPY pyproject.toml uv.lock /app/
COPY ./app /app/app
COPY ./main.py /app/
COPY ./runserver.sh /app/
EXPOSE 8000
CMD ["sh", "/app/runserver.sh"]

View File

@@ -1,73 +0,0 @@
# Self-hosted Model API
Run transcription, translation, and diarization services compatible with Reflector's GPU Model API. Works on CPU or GPU.
Environment variables
- REFLECTOR_GPU_APIKEY: Optional Bearer token. If unset, auth is disabled.
- HF_TOKEN: Optional. Required for diarization to download pyannote pipelines
Requirements
- FFmpeg must be installed and on PATH (used for URL-based and segmented transcription)
- Python 3.12+
- NVIDIA GPU optional. If available, it will be used automatically
Local run
Set env vars in self_hosted/.env file
uv sync
uv run uvicorn main:app --host 0.0.0.0 --port 8000
Authentication
- If REFLECTOR_GPU_APIKEY is set, include header: Authorization: Bearer <key>
Endpoints
- POST /v1/audio/transcriptions
- multipart/form-data
- fields: file (single file) OR files[] (multiple files), language, batch (true/false)
- response: single { text, words, filename } or { results: [ ... ] }
- POST /v1/audio/transcriptions-from-url
- application/json
- body: { audio_file_url, language, timestamp_offset }
- response: { text, words }
- POST /translate
- text: query parameter
- body (application/json): { source_language, target_language }
- response: { text: { <src>: original, <tgt>: translated } }
- POST /diarize
- query parameters: audio_file_url, timestamp (optional)
- requires HF_TOKEN to be set (for pyannote)
- response: { diarization: [ { start, end, speaker } ] }
OpenAPI docs
- Visit /docs when the server is running
Docker
- Not yet provided in this directory. A Dockerfile will be added later. For now, use Local run above
Conformance tests
# From this directory
TRANSCRIPT_URL=http://localhost:8000 \
TRANSCRIPT_API_KEY=dev-key \
uv run -m pytest -m model_api --no-cov ../../server/tests/test_model_api_transcript.py
TRANSLATION_URL=http://localhost:8000 \
TRANSLATION_API_KEY=dev-key \
uv run -m pytest -m model_api --no-cov ../../server/tests/test_model_api_translation.py
DIARIZATION_URL=http://localhost:8000 \
DIARIZATION_API_KEY=dev-key \
uv run -m pytest -m model_api --no-cov ../../server/tests/test_model_api_diarization.py

View File

@@ -1,19 +0,0 @@
import os
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
required_key = os.environ.get("REFLECTOR_GPU_APIKEY")
if not required_key:
return
if apikey == required_key:
return
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)

View File

@@ -1,12 +0,0 @@
from pathlib import Path
SUPPORTED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
SAMPLE_RATE = 16000
VAD_CONFIG = {
"batch_max_duration": 30.0,
"silence_padding": 0.5,
"window_size": 512,
}
# App-level paths
UPLOADS_PATH = Path("/tmp/whisper-uploads")

View File

@@ -1,30 +0,0 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI
from .routers.diarization import router as diarization_router
from .routers.transcription import router as transcription_router
from .routers.translation import router as translation_router
from .services.transcriber import WhisperService
from .services.diarizer import PyannoteDiarizationService
from .utils import ensure_dirs
@asynccontextmanager
async def lifespan(app: FastAPI):
ensure_dirs()
whisper_service = WhisperService()
whisper_service.load()
app.state.whisper = whisper_service
diarization_service = PyannoteDiarizationService()
diarization_service.load()
app.state.diarizer = diarization_service
yield
def create_app() -> FastAPI:
app = FastAPI(lifespan=lifespan)
app.include_router(transcription_router)
app.include_router(translation_router)
app.include_router(diarization_router)
return app

View File

@@ -1,30 +0,0 @@
from typing import List
from fastapi import APIRouter, Depends, Request
from pydantic import BaseModel
from ..auth import apikey_auth
from ..services.diarizer import PyannoteDiarizationService
from ..utils import download_audio_file
router = APIRouter(tags=["diarization"])
class DiarizationSegment(BaseModel):
start: float
end: float
speaker: int
class DiarizationResponse(BaseModel):
diarization: List[DiarizationSegment]
@router.post(
"/diarize", dependencies=[Depends(apikey_auth)], response_model=DiarizationResponse
)
def diarize(request: Request, audio_file_url: str, timestamp: float = 0.0):
with download_audio_file(audio_file_url) as (file_path, _ext):
file_path = str(file_path)
diarizer: PyannoteDiarizationService = request.app.state.diarizer
return diarizer.diarize_file(file_path, timestamp=timestamp)

View File

@@ -1,109 +0,0 @@
import uuid
from typing import Optional, Union
from fastapi import APIRouter, Body, Depends, Form, HTTPException, Request, UploadFile
from pydantic import BaseModel
from pathlib import Path
from ..auth import apikey_auth
from ..config import SUPPORTED_FILE_EXTENSIONS, UPLOADS_PATH
from ..services.transcriber import MODEL_NAME
from ..utils import cleanup_uploaded_files, download_audio_file
router = APIRouter(prefix="/v1/audio", tags=["transcription"])
class WordTiming(BaseModel):
word: str
start: float
end: float
class TranscriptResult(BaseModel):
text: str
words: list[WordTiming]
filename: Optional[str] = None
class TranscriptBatchResponse(BaseModel):
results: list[TranscriptResult]
@router.post(
"/transcriptions",
dependencies=[Depends(apikey_auth)],
response_model=Union[TranscriptResult, TranscriptBatchResponse],
)
def transcribe(
request: Request,
file: UploadFile = None,
files: list[UploadFile] | None = None,
model: str = Form(MODEL_NAME),
language: str = Form("en"),
batch: bool = Form(False),
):
service = request.app.state.whisper
if not file and not files:
raise HTTPException(
status_code=400, detail="Either 'file' or 'files' parameter is required"
)
if batch and not files:
raise HTTPException(
status_code=400, detail="Batch transcription requires 'files'"
)
upload_files = [file] if file else files
uploaded_paths: list[Path] = []
with cleanup_uploaded_files(uploaded_paths):
for upload_file in upload_files:
audio_suffix = upload_file.filename.split(".")[-1].lower()
if audio_suffix not in SUPPORTED_FILE_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=(
f"Unsupported audio format. Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
),
)
unique_filename = f"{uuid.uuid4()}.{audio_suffix}"
file_path = UPLOADS_PATH / unique_filename
with open(file_path, "wb") as f:
content = upload_file.file.read()
f.write(content)
uploaded_paths.append(file_path)
if batch and len(upload_files) > 1:
results = []
for path in uploaded_paths:
result = service.transcribe_file(str(path), language=language)
result["filename"] = path.name
results.append(result)
return {"results": results}
results = []
for path in uploaded_paths:
result = service.transcribe_file(str(path), language=language)
result["filename"] = path.name
results.append(result)
return {"results": results} if len(results) > 1 else results[0]
@router.post(
"/transcriptions-from-url",
dependencies=[Depends(apikey_auth)],
response_model=TranscriptResult,
)
def transcribe_from_url(
request: Request,
audio_file_url: str = Body(..., description="URL of the audio file to transcribe"),
model: str = Body(MODEL_NAME),
language: str = Body("en"),
timestamp_offset: float = Body(0.0),
):
service = request.app.state.whisper
with download_audio_file(audio_file_url) as (file_path, _ext):
file_path = str(file_path)
result = service.transcribe_vad_url_segment(
file_path=file_path, timestamp_offset=timestamp_offset, language=language
)
return result

View File

@@ -1,28 +0,0 @@
from typing import Dict
from fastapi import APIRouter, Body, Depends
from pydantic import BaseModel
from ..auth import apikey_auth
from ..services.translator import TextTranslatorService
router = APIRouter(tags=["translation"])
translator = TextTranslatorService()
class TranslationResponse(BaseModel):
text: Dict[str, str]
@router.post(
"/translate",
dependencies=[Depends(apikey_auth)],
response_model=TranslationResponse,
)
def translate(
text: str,
source_language: str = Body("en"),
target_language: str = Body("fr"),
):
return translator.translate(text, source_language, target_language)

View File

@@ -1,42 +0,0 @@
import os
import threading
import torch
import torchaudio
from pyannote.audio import Pipeline
class PyannoteDiarizationService:
def __init__(self):
self._pipeline = None
self._device = "cpu"
self._lock = threading.Lock()
def load(self):
self._device = "cuda" if torch.cuda.is_available() else "cpu"
self._pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=os.environ.get("HF_TOKEN"),
)
self._pipeline.to(torch.device(self._device))
def diarize_file(self, file_path: str, timestamp: float = 0.0) -> dict:
if self._pipeline is None:
self.load()
waveform, sample_rate = torchaudio.load(file_path)
with self._lock:
diarization = self._pipeline(
{"waveform": waveform, "sample_rate": sample_rate}
)
words = []
for diarization_segment, _, speaker in diarization.itertracks(yield_label=True):
words.append(
{
"start": round(timestamp + diarization_segment.start, 3),
"end": round(timestamp + diarization_segment.end, 3),
"speaker": int(speaker[-2:])
if speaker and speaker[-2:].isdigit()
else 0,
}
)
return {"diarization": words}

View File

@@ -1,208 +0,0 @@
import os
import shutil
import subprocess
import threading
from typing import Generator
import faster_whisper
import librosa
import numpy as np
import torch
from fastapi import HTTPException
from silero_vad import VADIterator, load_silero_vad
from ..config import SAMPLE_RATE, VAD_CONFIG
# Whisper configuration (service-local defaults)
MODEL_NAME = "large-v2"
# None delegates compute type to runtime: float16 on CUDA, int8 on CPU
MODEL_COMPUTE_TYPE = None
MODEL_NUM_WORKERS = 1
CACHE_PATH = os.path.join(os.path.expanduser("~"), ".cache", "reflector-whisper")
from ..utils import NoStdStreams
class WhisperService:
def __init__(self):
self.model = None
self.device = "cpu"
self.lock = threading.Lock()
def load(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = MODEL_COMPUTE_TYPE or (
"float16" if self.device == "cuda" else "int8"
)
self.model = faster_whisper.WhisperModel(
MODEL_NAME,
device=self.device,
compute_type=compute_type,
num_workers=MODEL_NUM_WORKERS,
download_root=CACHE_PATH,
)
def pad_audio(self, audio_array, sample_rate: int = SAMPLE_RATE):
audio_duration = len(audio_array) / sample_rate
if audio_duration < VAD_CONFIG["silence_padding"]:
silence_samples = int(sample_rate * VAD_CONFIG["silence_padding"])
silence = np.zeros(silence_samples, dtype=np.float32)
return np.concatenate([audio_array, silence])
return audio_array
def enforce_word_timing_constraints(self, words: list[dict]) -> list[dict]:
if len(words) <= 1:
return words
enforced: list[dict] = []
for i, word in enumerate(words):
current = dict(word)
if i < len(words) - 1:
next_start = words[i + 1]["start"]
if current["end"] > next_start:
current["end"] = next_start
enforced.append(current)
return enforced
def transcribe_file(self, file_path: str, language: str = "en") -> dict:
input_for_model: str | "object" = file_path
try:
audio_array, _sample_rate = librosa.load(
file_path, sr=SAMPLE_RATE, mono=True
)
if len(audio_array) / float(SAMPLE_RATE) < VAD_CONFIG["silence_padding"]:
input_for_model = self.pad_audio(audio_array, SAMPLE_RATE)
except Exception:
pass
with self.lock:
with NoStdStreams():
segments, _ = self.model.transcribe(
input_for_model,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(segment.text for segment in segments).strip()
words = [
{
"word": word.word,
"start": round(float(word.start), 2),
"end": round(float(word.end), 2),
}
for segment in segments
for word in segment.words
]
words = self.enforce_word_timing_constraints(words)
return {"text": text, "words": words}
def transcribe_vad_url_segment(
self, file_path: str, timestamp_offset: float = 0.0, language: str = "en"
) -> dict:
def load_audio_via_ffmpeg(input_path: str, sample_rate: int) -> np.ndarray:
ffmpeg_bin = shutil.which("ffmpeg") or "ffmpeg"
cmd = [
ffmpeg_bin,
"-nostdin",
"-threads",
"1",
"-i",
input_path,
"-f",
"f32le",
"-acodec",
"pcm_f32le",
"-ac",
"1",
"-ar",
str(sample_rate),
"pipe:1",
]
try:
proc = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
)
except Exception as e:
raise HTTPException(status_code=400, detail=f"ffmpeg failed: {e}")
audio = np.frombuffer(proc.stdout, dtype=np.float32)
return audio
def vad_segments(
audio_array,
sample_rate: int = SAMPLE_RATE,
window_size: int = VAD_CONFIG["window_size"],
) -> Generator[tuple[float, float], None, None]:
vad_model = load_silero_vad(onnx=False)
iterator = VADIterator(vad_model, sampling_rate=sample_rate)
start = None
for i in range(0, len(audio_array), window_size):
chunk = audio_array[i : i + window_size]
if len(chunk) < window_size:
chunk = np.pad(
chunk, (0, window_size - len(chunk)), mode="constant"
)
speech = iterator(chunk)
if not speech:
continue
if "start" in speech:
start = speech["start"]
continue
if "end" in speech and start is not None:
end = speech["end"]
yield (start / float(SAMPLE_RATE), end / float(SAMPLE_RATE))
start = None
iterator.reset_states()
audio_array = load_audio_via_ffmpeg(file_path, SAMPLE_RATE)
merged_batches: list[tuple[float, float]] = []
batch_start = None
batch_end = None
max_duration = VAD_CONFIG["batch_max_duration"]
for seg_start, seg_end in vad_segments(audio_array):
if batch_start is None:
batch_start, batch_end = seg_start, seg_end
continue
if seg_end - batch_start <= max_duration:
batch_end = seg_end
else:
merged_batches.append((batch_start, batch_end))
batch_start, batch_end = seg_start, seg_end
if batch_start is not None and batch_end is not None:
merged_batches.append((batch_start, batch_end))
all_text = []
all_words = []
for start_time, end_time in merged_batches:
s_idx = int(start_time * SAMPLE_RATE)
e_idx = int(end_time * SAMPLE_RATE)
segment = audio_array[s_idx:e_idx]
segment = self.pad_audio(segment, SAMPLE_RATE)
with self.lock:
segments, _ = self.model.transcribe(
segment,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(seg.text for seg in segments).strip()
words = [
{
"word": w.word,
"start": round(float(w.start) + start_time + timestamp_offset, 2),
"end": round(float(w.end) + start_time + timestamp_offset, 2),
}
for seg in segments
for w in seg.words
]
if text:
all_text.append(text)
all_words.extend(words)
all_words = self.enforce_word_timing_constraints(all_words)
return {"text": " ".join(all_text), "words": all_words}

View File

@@ -1,44 +0,0 @@
import threading
from transformers import MarianMTModel, MarianTokenizer, pipeline
class TextTranslatorService:
"""Simple text-to-text translator using HuggingFace MarianMT models.
This mirrors the modal translator API shape but uses text translation only.
"""
def __init__(self):
self._pipeline = None
self._lock = threading.Lock()
def load(self, source_language: str = "en", target_language: str = "fr"):
# Pick a default MarianMT model pair if available; fall back to Helsinki-NLP en->fr
model_name = self._resolve_model_name(source_language, target_language)
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
self._pipeline = pipeline("translation", model=model, tokenizer=tokenizer)
def _resolve_model_name(self, src: str, tgt: str) -> str:
# Minimal mapping; extend as needed
pair = (src.lower(), tgt.lower())
mapping = {
("en", "fr"): "Helsinki-NLP/opus-mt-en-fr",
("fr", "en"): "Helsinki-NLP/opus-mt-fr-en",
("en", "es"): "Helsinki-NLP/opus-mt-en-es",
("es", "en"): "Helsinki-NLP/opus-mt-es-en",
("en", "de"): "Helsinki-NLP/opus-mt-en-de",
("de", "en"): "Helsinki-NLP/opus-mt-de-en",
}
return mapping.get(pair, "Helsinki-NLP/opus-mt-en-fr")
def translate(self, text: str, source_language: str, target_language: str) -> dict:
if self._pipeline is None:
self.load(source_language, target_language)
with self._lock:
results = self._pipeline(
text, src_lang=source_language, tgt_lang=target_language
)
translated = results[0]["translation_text"] if results else ""
return {"text": {source_language: text, target_language: translated}}

View File

@@ -1,107 +0,0 @@
import logging
import os
import sys
import uuid
from contextlib import contextmanager
from typing import Mapping
from urllib.parse import urlparse
from pathlib import Path
import requests
from fastapi import HTTPException
from .config import SUPPORTED_FILE_EXTENSIONS, UPLOADS_PATH
logger = logging.getLogger(__name__)
class NoStdStreams:
def __init__(self):
self.devnull = open(os.devnull, "w")
def __enter__(self):
self._stdout, self._stderr = sys.stdout, sys.stderr
self._stdout.flush()
self._stderr.flush()
sys.stdout, sys.stderr = self.devnull, self.devnull
def __exit__(self, exc_type, exc_value, traceback):
sys.stdout, sys.stderr = self._stdout, self._stderr
self.devnull.close()
def ensure_dirs():
UPLOADS_PATH.mkdir(parents=True, exist_ok=True)
def detect_audio_format(url: str, headers: Mapping[str, str]) -> str:
url_path = urlparse(url).path
for ext in SUPPORTED_FILE_EXTENSIONS:
if url_path.lower().endswith(f".{ext}"):
return ext
content_type = headers.get("content-type", "").lower()
if "audio/mpeg" in content_type or "audio/mp3" in content_type:
return "mp3"
if "audio/wav" in content_type:
return "wav"
if "audio/mp4" in content_type:
return "mp4"
raise HTTPException(
status_code=400,
detail=(
f"Unsupported audio format for URL. Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
),
)
def download_audio_to_uploads(audio_file_url: str) -> tuple[Path, str]:
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(status_code=404, detail="Audio file not found")
response = requests.get(audio_file_url, allow_redirects=True)
response.raise_for_status()
audio_suffix = detect_audio_format(audio_file_url, response.headers)
unique_filename = f"{uuid.uuid4()}.{audio_suffix}"
file_path: Path = UPLOADS_PATH / unique_filename
with open(file_path, "wb") as f:
f.write(response.content)
return file_path, audio_suffix
@contextmanager
def download_audio_file(audio_file_url: str):
"""Download an audio file to UPLOADS_PATH and remove it after use.
Yields (file_path: Path, audio_suffix: str).
"""
file_path, audio_suffix = download_audio_to_uploads(audio_file_url)
try:
yield file_path, audio_suffix
finally:
try:
file_path.unlink(missing_ok=True)
except Exception as e:
logger.error("Error deleting temporary file %s: %s", file_path, e)
@contextmanager
def cleanup_uploaded_files(file_paths: list[Path]):
"""Ensure provided file paths are removed after use.
The provided list can be populated inside the context; all present entries
at exit will be deleted.
"""
try:
yield file_paths
finally:
for path in list(file_paths):
try:
path.unlink(missing_ok=True)
except Exception as e:
logger.error("Error deleting temporary file %s: %s", path, e)

View File

@@ -1,10 +0,0 @@
services:
reflector_gpu:
build:
context: .
ports:
- "8000:8000"
env_file:
- .env
volumes:
- ./cache:/root/.cache

View File

@@ -1,3 +0,0 @@
from app.factory import create_app
app = create_app()

View File

@@ -1,19 +0,0 @@
[project]
name = "reflector-gpu"
version = "0.1.0"
description = "Self-hosted GPU service for speech transcription, diarization, and translation via FastAPI."
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"fastapi[standard]>=0.116.1",
"uvicorn[standard]>=0.30.0",
"torch>=2.3.0",
"faster-whisper>=1.1.0",
"librosa==0.10.1",
"numpy<2",
"silero-vad==5.1.0",
"transformers>=4.35.0",
"sentencepiece",
"pyannote.audio==3.1.0",
"torchaudio>=2.3.0",
]

View File

@@ -1,17 +0,0 @@
#!/bin/sh
set -e
export PATH="/root/.local/bin:$PATH"
cd /app
# Install Python dependencies at runtime (first run or when FORCE_SYNC=1)
if [ ! -d "/app/.venv" ] || [ "$FORCE_SYNC" = "1" ]; then
echo "[startup] Installing Python dependencies with uv..."
uv sync --compile-bytecode --locked
else
echo "[startup] Using existing virtual environment at /app/.venv"
fi
exec uv run uvicorn main:app --host 0.0.0.0 --port 8000

3013
gpu/self_hosted/uv.lock generated

File diff suppressed because it is too large Load Diff

16
server/.env_template Normal file
View File

@@ -0,0 +1,16 @@
TRANSCRIPT_BACKEND=modal
TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
TRANSCRIPT_MODAL_API_KEY=***REMOVED***
LLM_BACKEND=modal
LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
LLM_MODAL_API_KEY=***REMOVED***
TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
BASE_URL=https://xxxxx.ngrok.app
DIARIZATION_ENABLED=false
SQS_POLLING_TIMEOUT_SECONDS=60

3
server/.gitignore vendored
View File

@@ -176,8 +176,7 @@ artefacts/
audio_*.wav
# ignore local database
*.sqlite3
*.db
reflector.sqlite3
data/
dump.rdb

View File

@@ -1,613 +0,0 @@
# Daily.co Integration Test Plan
## ✅ IMPLEMENTATION STATUS: Real Transcription Active
**This test validates Daily.co multitrack recording integration with REAL transcription/diarization.**
The implementation includes complete audio processing pipeline:
- **Multitrack recordings** from Daily.co S3 (separate audio stream per participant)
- **PyAV-based audio mixdown** with PTS-based track alignment
- **Real transcription** via Modal GPU backend (Whisper)
- **Real diarization** via Modal GPU backend (speaker identification)
- **Per-track transcription** with timestamp synchronization
- **Complete database entities** (recording, transcript, topics, participants, words)
**Processing pipeline** (`PipelineMainMultitrack`):
1. Download all audio tracks from Daily.co S3
2. Align tracks by PTS (presentation timestamp) to handle late joiners
3. Mix tracks into single audio file for unified playback
4. Transcribe each track individually with proper offset handling
5. Perform diarization on mixed audio
6. Generate topics, summaries, and word-level timestamps
7. Convert audio to MP3 and generate waveform visualization
**Note:** A stub processor (`process_daily_recording`) exists for testing webhook flow without GPU costs, but the production code path uses `process_multitrack_recording` with full ML pipeline.
---
## Prerequisites
**1. Environment Variables** (check in `.env.development.local`):
```bash
# Daily.co API Configuration
DAILY_API_KEY=<key>
DAILY_SUBDOMAIN=monadical
DAILY_WEBHOOK_SECRET=<base64-encoded-secret>
AWS_DAILY_S3_BUCKET=reflector-dailyco-local
AWS_DAILY_S3_REGION=us-east-1
AWS_DAILY_ROLE_ARN=arn:aws:iam::950402358378:role/DailyCo
DAILY_MIGRATION_ENABLED=true
DAILY_MIGRATION_ROOM_IDS=["552640fd-16f2-4162-9526-8cf40cd2357e"]
# Transcription/Diarization Backend (Required for real processing)
DIARIZATION_BACKEND=modal
DIARIZATION_MODAL_API_KEY=<modal-api-key>
# TRANSCRIPTION_BACKEND is not explicitly set (uses default/modal)
```
**2. Services Running:**
```bash
docker compose ps # server, postgres, redis, worker, beat should be UP
```
**IMPORTANT:** Worker and beat services MUST be running for transcription processing:
```bash
docker compose up -d worker beat
```
**3. ngrok Tunnel for Webhooks:**
```bash
# Start ngrok (if not already running)
ngrok http 1250 --log=stdout > /tmp/ngrok.log 2>&1 &
# Get public URL
curl -s http://localhost:4040/api/tunnels | python3 -c "import sys, json; data=json.load(sys.stdin); print(data['tunnels'][0]['public_url'])"
```
**Current ngrok URL:** `https://0503947384a3.ngrok-free.app` (as of last registration)
**4. Webhook Created:**
```bash
cd server
uv run python scripts/recreate_daily_webhook.py https://0503947384a3.ngrok-free.app/v1/daily/webhook
# Verify: "Created webhook <uuid> (state: ACTIVE)"
```
**Current webhook status:** ✅ ACTIVE (webhook ID: dad5ad16-ceca-488e-8fc5-dae8650b51d0)
---
## Test 1: Database Configuration
**Check room platform:**
```bash
docker-compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT id, name, platform, recording_type FROM room WHERE name = 'test2';"
```
**Expected:**
```
id: 552640fd-16f2-4162-9526-8cf40cd2357e
name: test2
platform: whereby # DB value (overridden by env var DAILY_MIGRATION_ROOM_IDS)
recording_type: cloud
```
**Clear old meetings:**
```bash
docker-compose exec -T postgres psql -U reflector -d reflector -c \
"UPDATE meeting SET is_active = false WHERE room_id = '552640fd-16f2-4162-9526-8cf40cd2357e';"
```
---
## Test 2: Meeting Creation with Auto-Recording
**Create meeting:**
```bash
curl -s -X POST http://localhost:1250/v1/rooms/test2/meeting \
-H "Content-Type: application/json" \
-d '{"allow_duplicated":false}' | python3 -m json.tool
```
**Expected Response:**
```json
{
"room_name": "test2-YYYYMMDDHHMMSS", // Includes "test2" prefix!
"room_url": "https://monadical.daily.co/test2-...?t=<JWT_TOKEN>", // Has token!
"platform": "daily",
"recording_type": "cloud" // DB value (Whereby-specific)
}
```
**Decode token to verify auto-recording:**
```bash
# Extract token from room_url, decode JWT payload
echo "<token>" | python3 -c "
import sys, json, base64
token = sys.stdin.read().strip()
payload = token.split('.')[1] + '=' * (4 - len(token.split('.')[1]) % 4)
print(json.dumps(json.loads(base64.b64decode(payload)), indent=2))
"
```
**Expected token payload:**
```json
{
"r": "test2-YYYYMMDDHHMMSS", // Room name
"sr": true, // start_recording: true ✅
"d": "...", // Domain ID
"iat": 1234567890
}
```
---
## Test 3: Daily.co API Verification
**Check room configuration:**
```bash
ROOM_NAME="<from previous step>"
curl -s -X GET "https://api.daily.co/v1/rooms/$ROOM_NAME" \
-H "Authorization: Bearer $DAILY_API_KEY" | python3 -m json.tool
```
**Expected config:**
```json
{
"config": {
"enable_recording": "raw-tracks", // ✅
"recordings_bucket": {
"bucket_name": "reflector-dailyco-local",
"bucket_region": "us-east-1",
"assume_role_arn": "arn:aws:iam::950402358378:role/DailyCo"
}
}
}
```
---
## Test 4: Browser UI Test (Playwright MCP)
**Using Claude Code MCP tools:**
**Load room:**
```
Use: mcp__playwright__browser_navigate
Input: {"url": "http://localhost:3000/test2"}
Then wait 12 seconds for iframe to load
```
**Verify Daily.co iframe loaded:**
```
Use: mcp__playwright__browser_snapshot
Expected in snapshot:
- iframe element with src containing "monadical.daily.co"
- Daily.co pre-call UI visible
```
**Take screenshot:**
```
Use: mcp__playwright__browser_take_screenshot
Input: {"filename": "test2-before-join.png"}
Expected: Daily.co pre-call UI with "Join" button visible
```
**Join meeting:**
```
Note: Daily.co iframe interaction requires clicking inside iframe.
Use: mcp__playwright__browser_click
Input: {"element": "Join button in Daily.co iframe", "ref": "<ref-from-snapshot>"}
Then wait 5 seconds for call to connect
```
**Verify in-call:**
```
Use: mcp__playwright__browser_take_screenshot
Input: {"filename": "test2-in-call.png"}
Expected: "Waiting for others to join" or participant video visible
```
**Leave meeting:**
```
Use: mcp__playwright__browser_click
Input: {"element": "Leave button in Daily.co iframe", "ref": "<ref-from-snapshot>"}
```
---
**Alternative: JavaScript snippets (for manual testing):**
```javascript
await page.goto('http://localhost:3000/test2');
await new Promise(f => setTimeout(f, 12000)); // Wait for load
// Verify iframe
const iframes = document.querySelectorAll('iframe');
// Expected: 1 iframe with src containing "monadical.daily.co"
// Screenshot
await page.screenshot({ path: 'test2-before-join.png' });
// Join
await page.locator('iframe').contentFrame().getByRole('button', { name: 'Join' }).click();
await new Promise(f => setTimeout(f, 5000));
// In-call screenshot
await page.screenshot({ path: 'test2-in-call.png' });
// Leave
await page.locator('iframe').contentFrame().getByRole('button', { name: 'Leave' }).click();
```
---
## Test 5: Webhook Verification
**Check server logs for webhooks:**
```bash
docker-compose logs --since 15m server 2>&1 | grep -i "participant joined\|recording started"
```
**Expected logs:**
```
[info] Participant joined | meeting_id=... | num_clients=1 | recording_type=cloud | recording_trigger=automatic-2nd-participant
[info] Recording started | meeting_id=... | recording_id=... | platform=daily
```
**Check Daily.co webhook delivery logs:**
```bash
curl -s -X GET "https://api.daily.co/v1/logs/webhooks?limit=20" \
-H "Authorization: Bearer $DAILY_API_KEY" | python3 -c "
import sys, json
logs = json.load(sys.stdin)
for log in logs[:10]:
req = json.loads(log['request'])
room = req.get('payload', {}).get('room') or req.get('payload', {}).get('room_name', 'N/A')
print(f\"{req['type']:30s} | room: {room:30s} | status: {log['status']}\")
"
```
**Expected output:**
```
participant.joined | room: test2-YYYYMMDDHHMMSS | status: 200
recording.started | room: test2-YYYYMMDDHHMMSS | status: 200
participant.left | room: test2-YYYYMMDDHHMMSS | status: 200
recording.ready-to-download | room: test2-YYYYMMDDHHMMSS | status: 200
```
**Check database updated:**
```bash
docker-compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT room_name, num_clients FROM meeting WHERE room_name LIKE 'test2-%' ORDER BY end_date DESC LIMIT 1;"
```
**Expected:**
```
room_name: test2-YYYYMMDDHHMMSS
num_clients: 0 // After participant left
```
---
## Test 6: Recording in S3
**List recent recordings:**
```bash
curl -s -X GET "https://api.daily.co/v1/recordings" \
-H "Authorization: Bearer $DAILY_API_KEY" | python3 -c "
import sys, json
data = json.load(sys.stdin)
for rec in data.get('data', [])[:5]:
if 'test2-' in rec.get('room_name', ''):
print(f\"Room: {rec['room_name']}\")
print(f\"Status: {rec['status']}\")
print(f\"Duration: {rec.get('duration', 0)}s\")
print(f\"S3 key: {rec.get('s3key', 'N/A')}\")
print(f\"Tracks: {len(rec.get('tracks', []))} files\")
for track in rec.get('tracks', []):
print(f\" - {track['type']}: {track['s3Key'].split('/')[-1]} ({track['size']} bytes)\")
print()
"
```
**Expected output:**
```
Room: test2-20251009192341
Status: finished
Duration: ~30-120s
S3 key: monadical/test2-20251009192341/1760037914930
Tracks: 2 files
- audio: 1760037914930-<uuid>-cam-audio-1760037915265 (~400 KB)
- video: 1760037914930-<uuid>-cam-video-1760037915269 (~10-30 MB)
```
**Verify S3 path structure:**
- `monadical/` - Daily.co subdomain
- `test2-20251009192341/` - Reflector room name + timestamp
- `<timestamp>-<participant-uuid>-<media-type>-<track-start>.webm` - Individual track files
---
## Test 7: Database Check - Recording and Transcript
**Check recording created:**
```bash
docker-compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT id, bucket_name, object_key, status, meeting_id, recorded_at
FROM recording
ORDER BY recorded_at DESC LIMIT 1;"
```
**Expected:**
```
id: <recording-id-from-webhook>
bucket_name: reflector-dailyco-local
object_key: monadical/test2-<timestamp>/<recording-timestamp>-<uuid>-cam-audio-<track-start>.webm
status: completed
meeting_id: <meeting-id>
recorded_at: <recent-timestamp>
```
**Check transcript created:**
```bash
docker compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT id, title, status, duration, recording_id, meeting_id, room_id
FROM transcript
ORDER BY created_at DESC LIMIT 1;"
```
**Expected (REAL transcription):**
```
id: <transcript-id>
title: <AI-generated title based on actual conversation content>
status: uploaded (audio file processed and available)
duration: <actual meeting duration in seconds>
recording_id: <same-as-recording-id-above>
meeting_id: <meeting-id>
room_id: 552640fd-16f2-4162-9526-8cf40cd2357e
```
**Note:** Title and content will reflect the ACTUAL conversation, not mock data. Processing time depends on recording length and GPU backend availability (Modal).
**Verify audio file exists:**
```bash
ls -lh data/<transcript-id>/upload.webm
```
**Expected:**
```
-rw-r--r-- 1 user staff ~100-200K Oct 10 18:48 upload.webm
```
**Check transcript topics (REAL transcription):**
```bash
TRANSCRIPT_ID=$(docker compose exec -T postgres psql -U reflector -d reflector -t -c \
"SELECT id FROM transcript ORDER BY created_at DESC LIMIT 1;")
docker compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT
jsonb_array_length(topics) as num_topics,
jsonb_array_length(participants) as num_participants,
short_summary,
title
FROM transcript
WHERE id = '$TRANSCRIPT_ID';"
```
**Expected (REAL data):**
```
num_topics: <varies based on conversation>
num_participants: <actual number of participants who spoke>
short_summary: <AI-generated summary of actual conversation>
title: <AI-generated title based on content>
```
**Check topics contain actual transcription:**
```bash
docker compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT topics->0->'title', topics->0->'summary', topics->0->'transcript'
FROM transcript
ORDER BY created_at DESC LIMIT 1;" | head -20
```
**Expected output:** Will contain the ACTUAL transcribed conversation from the Daily.co meeting, not mock data.
**Check participants:**
```bash
docker compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT participants FROM transcript ORDER BY created_at DESC LIMIT 1;" \
| python3 -c "import sys, json; data=json.loads(sys.stdin.read()); print(json.dumps(data, indent=2))"
```
**Expected (REAL diarization):**
```json
[
{
"id": "<uuid>",
"speaker": 0,
"name": "Speaker 1"
},
{
"id": "<uuid>",
"speaker": 1,
"name": "Speaker 2"
}
]
```
**Note:** Speaker names will be generic ("Speaker 1", "Speaker 2", etc.) as determined by the diarization backend. Number of participants depends on how many actually spoke during the meeting.
**Check word-level data:**
```bash
docker compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT jsonb_array_length(topics->0->'words') as num_words_first_topic
FROM transcript
ORDER BY created_at DESC LIMIT 1;"
```
**Expected:**
```
num_words_first_topic: <varies based on actual conversation length and topic chunking>
```
**Verify speaker diarization in words:**
```bash
docker compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT
topics->0->'words'->0->>'text' as first_word,
topics->0->'words'->0->>'speaker' as speaker,
topics->0->'words'->0->>'start' as start_time,
topics->0->'words'->0->>'end' as end_time
FROM transcript
ORDER BY created_at DESC LIMIT 1;"
```
**Expected (REAL transcription):**
```
first_word: <actual first word from transcription>
speaker: 0, 1, 2, ... (actual speaker ID from diarization)
start_time: <actual timestamp in seconds>
end_time: <actual end timestamp>
```
**Note:** All timestamps and speaker IDs are from real transcription/diarization, synchronized across tracks.
---
## Test 8: Recording Type Verification
**Check what Daily.co received:**
```bash
curl -s -X GET "https://api.daily.co/v1/rooms/test2-<timestamp>" \
-H "Authorization: Bearer $DAILY_API_KEY" | python3 -m json.tool | grep "enable_recording"
```
**Expected:**
```json
"enable_recording": "raw-tracks"
```
**NOT:** `"enable_recording": "cloud"` (that would be wrong - we want raw tracks)
---
## Troubleshooting
### Issue: No webhooks received
**Check webhook state:**
```bash
curl -s -X GET "https://api.daily.co/v1/webhooks" \
-H "Authorization: Bearer $DAILY_API_KEY" | python3 -m json.tool
```
**If state is FAILED:**
```bash
cd server
uv run python scripts/recreate_daily_webhook.py https://<ngrok-url>/v1/daily/webhook
```
### Issue: Webhooks return 422
**Check server logs:**
```bash
docker-compose logs --tail=50 server | grep "Failed to parse webhook event"
```
**Common cause:** Event structure mismatch. Daily.co events use:
```json
{
"version": "1.0.0",
"type": "participant.joined",
"payload": {...}, // NOT "data"
"event_ts": 123.456 // NOT "ts"
}
```
### Issue: Recording not starting
1. **Check token has `sr: true`:**
- Decode JWT token from room_url query param
- Should contain `"sr": true`
2. **Check Daily.co room config:**
- `enable_recording` must be set (not false)
- For raw-tracks: must be exactly `"raw-tracks"`
3. **Check participant actually joined:**
- Logs should show "Participant joined"
- Must click "Join" button, not just pre-call screen
### Issue: Recording in S3 but wrong format
**Daily.co recording types:**
- `"cloud"` → Single MP4 file (`download_link` in webhook)
- `"raw-tracks"` → Multiple WebM files (`tracks` array in webhook)
- `"raw-tracks-audio-only"` → Only audio WebM files
**Current implementation:** Always uses `"raw-tracks"` (better for transcription)
---
## Quick Validation Commands
**One-liner to verify everything:**
```bash
# 1. Check room exists
docker-compose exec -T postgres psql -U reflector -d reflector -c \
"SELECT name, platform FROM room WHERE name = 'test2';" && \
# 2. Create meeting
MEETING=$(curl -s -X POST http://localhost:1250/v1/rooms/test2/meeting \
-H "Content-Type: application/json" -d '{"allow_duplicated":false}') && \
echo "$MEETING" | python3 -c "import sys,json; m=json.load(sys.stdin); print(f'Room: {m[\"room_name\"]}\nURL: {m[\"room_url\"][:80]}...')" && \
# 3. Check Daily.co config
ROOM_NAME=$(echo "$MEETING" | python3 -c "import sys,json; print(json.load(sys.stdin)['room_name'])") && \
curl -s -X GET "https://api.daily.co/v1/rooms/$ROOM_NAME" \
-H "Authorization: Bearer $DAILY_API_KEY" | python3 -c "import sys,json; print(f'Recording: {json.load(sys.stdin)[\"config\"][\"enable_recording\"]}')"
```
**Expected output:**
```
name: test2, platform: whereby
Room: test2-20251009192341
URL: https://monadical.daily.co/test2-20251009192341?t=eyJhbGc...
Recording: raw-tracks
```
---
## Success Criteria Checklist
- [x] Room name includes Reflector room prefix (`test2-...`)
- [x] Meeting URL contains JWT token (`?t=...`)
- [x] Token has `sr: true` (auto-recording enabled)
- [x] Daily.co room config: `enable_recording: "raw-tracks"`
- [x] Browser loads Daily.co interface (not Whereby)
- [x] Recording auto-starts when participant joins
- [x] Webhooks received: participant.joined, recording.started, participant.left, recording.ready-to-download
- [x] Recording status: `finished`
- [x] S3 contains 2 files: audio (.webm) and video (.webm)
- [x] S3 path: `monadical/test2-{timestamp}/{recording-start-ts}-{participant-uuid}-cam-{audio|video}-{track-start-ts}`
- [x] Database `num_clients` increments/decrements correctly
- [x] **Database recording entry created** with correct S3 path and status `completed`
- [ ] **Database transcript entry created** with status `uploaded`
- [ ] **Audio file downloaded** to `data/{transcript_id}/upload.webm`
- [ ] **Transcript has REAL data**: AI-generated title based on conversation
- [ ] **Transcript has topics** generated from actual content
- [ ] **Transcript has participants** with proper speaker diarization
- [ ] **Topics contain word-level data** with accurate timestamps and speaker IDs
- [ ] **Total duration** matches actual meeting length
- [ ] **MP3 and waveform files generated** by file processing pipeline
- [ ] **Frontend transcript page loads** without "Failed to load audio" error
- [ ] **Audio player functional** with working playback and waveform visualization
- [ ] **Multitrack processing completed** without errors in worker logs
- [ ] **Modal GPU backends accessible** (transcription and diarization)

View File

@@ -1,12 +1,11 @@
FROM python:3.12-slim
ENV PYTHONUNBUFFERED=1 \
UV_LINK_MODE=copy \
UV_NO_CACHE=1
UV_LINK_MODE=copy
# builder install base dependencies
WORKDIR /tmp
RUN apt-get update && apt-get install -y curl ffmpeg && apt-get clean
RUN apt-get update && apt-get install -y curl && apt-get clean
ADD https://astral.sh/uv/install.sh /uv-installer.sh
RUN sh /uv-installer.sh && rm /uv-installer.sh
ENV PATH="/root/.local/bin/:$PATH"
@@ -14,8 +13,8 @@ ENV PATH="/root/.local/bin/:$PATH"
# install application dependencies
RUN mkdir -p /app
WORKDIR /app
COPY pyproject.toml uv.lock README.md /app/
RUN uv sync --compile-bytecode --locked
COPY pyproject.toml uv.lock /app/
RUN touch README.md && env uv sync --compile-bytecode --locked
# pre-download nltk packages
RUN uv run python -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')"
@@ -27,15 +26,4 @@ COPY migrations /app/migrations
COPY reflector /app/reflector
WORKDIR /app
# Create symlink for libgomp if it doesn't exist (for ARM64 compatibility)
RUN if [ "$(uname -m)" = "aarch64" ] && [ ! -f /usr/lib/libgomp.so.1 ]; then \
LIBGOMP_PATH=$(find /app/.venv/lib -path "*/torch.libs/libgomp*.so.*" 2>/dev/null | head -n1); \
if [ -n "$LIBGOMP_PATH" ]; then \
ln -sf "$LIBGOMP_PATH" /usr/lib/libgomp.so.1; \
fi \
fi
# Pre-check just to make sure the image will not fail
RUN uv run python -c "import silero_vad.model"
CMD ["./runserver.sh"]

View File

@@ -20,25 +20,3 @@ Polls SQS every 60 seconds via /server/reflector/worker/process.py:24-62:
# Every 60 seconds, check for new recordings
sqs = boto3.client("sqs", ...)
response = sqs.receive_message(QueueUrl=queue_url, ...)
# Requeue
```bash
uv run /app/requeue_uploaded_file.py TRANSCRIPT_ID
```
## Pipeline Management
### Continue stuck pipeline from final summaries (identify_participants) step:
```bash
uv run python -c "from reflector.pipelines.main_live_pipeline import task_pipeline_final_summaries; result = task_pipeline_final_summaries.delay(transcript_id='TRANSCRIPT_ID'); print(f'Task queued: {result.id}')"
```
### Run full post-processing pipeline (continues to completion):
```bash
uv run python -c "from reflector.pipelines.main_live_pipeline import pipeline_post; pipeline_post(transcript_id='TRANSCRIPT_ID')"
```
.

View File

@@ -1,95 +0,0 @@
# Data Retention and Cleanup
## Overview
For public instances of Reflector, a data retention policy is automatically enforced to delete anonymous user data after a configurable period (default: 7 days). This ensures compliance with privacy expectations and prevents unbounded storage growth.
## Configuration
### Environment Variables
- `PUBLIC_MODE` (bool): Must be set to `true` to enable automatic cleanup
- `PUBLIC_DATA_RETENTION_DAYS` (int): Number of days to retain anonymous data (default: 7)
### What Gets Deleted
When data reaches the retention period, the following items are automatically removed:
1. **Transcripts** from anonymous users (where `user_id` is NULL):
- Database records
- Local files (audio.wav, audio.mp3, audio.json waveform)
- Storage files (cloud storage if configured)
## Automatic Cleanup
### Celery Beat Schedule
When `PUBLIC_MODE=true`, a Celery beat task runs daily at 3 AM to clean up old data:
```python
# Automatically scheduled when PUBLIC_MODE=true
"cleanup_old_public_data": {
"task": "reflector.worker.cleanup.cleanup_old_public_data",
"schedule": crontab(hour=3, minute=0), # Daily at 3 AM
}
```
### Running the Worker
Ensure both Celery worker and beat scheduler are running:
```bash
# Start Celery worker
uv run celery -A reflector.worker.app worker --loglevel=info
# Start Celery beat scheduler (in another terminal)
uv run celery -A reflector.worker.app beat
```
## Manual Cleanup
For testing or manual intervention, use the cleanup tool:
```bash
# Delete data older than 7 days (default)
uv run python -m reflector.tools.cleanup_old_data
# Delete data older than 30 days
uv run python -m reflector.tools.cleanup_old_data --days 30
```
Note: The manual tool uses the same implementation as the Celery worker task to ensure consistency.
## Important Notes
1. **User Data Deletion**: Only anonymous data (where `user_id` is NULL) is deleted. Authenticated user data is preserved.
2. **Storage Cleanup**: The system properly cleans up both local files and cloud storage when configured.
3. **Error Handling**: If individual deletions fail, the cleanup continues and logs errors. Failed deletions are reported in the task output.
4. **Public Instance Only**: The automatic cleanup task only runs when `PUBLIC_MODE=true` to prevent accidental data loss in private deployments.
## Testing
Run the cleanup tests:
```bash
uv run pytest tests/test_cleanup.py -v
```
## Monitoring
Check Celery logs for cleanup task execution:
```bash
# Look for cleanup task logs
grep "cleanup_old_public_data" celery.log
grep "Starting cleanup of old public data" celery.log
```
Task statistics are logged after each run:
- Number of transcripts deleted
- Number of meetings deleted
- Number of orphaned recordings deleted
- Any errors encountered

View File

@@ -1,194 +0,0 @@
## Reflector GPU Transcription API (Specification)
This document defines the Reflector GPU transcription API that all implementations must adhere to. Current implementations include NVIDIA Parakeet (NeMo) and Whisper (faster-whisper), both deployed on Modal.com. The API surface and response shapes are OpenAI/Whisper-compatible, so clients can switch implementations by changing only the base URL.
### Base URL and Authentication
- Example base URLs (Modal web endpoints):
- Parakeet: `https://<account>--reflector-transcriber-parakeet-web.modal.run`
- Whisper: `https://<account>--reflector-transcriber-web.modal.run`
- All endpoints are served under `/v1` and require a Bearer token:
```
Authorization: Bearer <REFLECTOR_GPU_APIKEY>
```
Note: To switch implementations, deploy the desired variant and point `TRANSCRIPT_URL` to its base URL. The API is identical.
### Supported file types
`mp3, mp4, mpeg, mpga, m4a, wav, webm`
### Models and languages
- Parakeet (NVIDIA NeMo): default `nvidia/parakeet-tdt-0.6b-v2`
- Language support: only `en`. Other languages return HTTP 400.
- Whisper (faster-whisper): default `large-v2` (or deployment-specific)
- Language support: multilingual (per Whisper model capabilities).
Note: The `model` parameter is accepted by all implementations for interface parity. Some backends may treat it as informational.
### Endpoints
#### POST /v1/audio/transcriptions
Transcribe one or more uploaded audio files.
Request: multipart/form-data
- `file` (File) — optional. Single file to transcribe.
- `files` (File[]) — optional. One or more files to transcribe.
- `model` (string) — optional. Defaults to the implementation-specific model (see above).
- `language` (string) — optional, defaults to `en`.
- Parakeet: only `en` is accepted; other values return HTTP 400
- Whisper: model-dependent; typically multilingual
- `batch` (boolean) — optional, defaults to `false`.
Notes:
- Provide either `file` or `files`, not both. If neither is provided, HTTP 400.
- `batch` requires `files`; using `batch=true` without `files` returns HTTP 400.
- Response shape for multiple files is the same regardless of `batch`.
- Files sent to this endpoint are processed in a single pass (no VAD/chunking). This is intended for short clips (roughly ≤ 30s; depends on GPU memory/model). For longer audio, prefer `/v1/audio/transcriptions-from-url` which supports VAD-based chunking.
Responses
Single file response:
```json
{
"text": "transcribed text",
"words": [
{ "word": "hello", "start": 0.0, "end": 0.5 },
{ "word": "world", "start": 0.5, "end": 1.0 }
],
"filename": "audio.mp3"
}
```
Multiple files response:
```json
{
"results": [
{"filename": "a1.mp3", "text": "...", "words": [...]},
{"filename": "a2.mp3", "text": "...", "words": [...]}]
}
```
Notes:
- Word objects always include keys: `word`, `start`, `end`.
- Some implementations may include a trailing space in `word` to match Whisper tokenization behavior; clients should trim if needed.
Example curl (single file):
```bash
curl -X POST \
-H "Authorization: Bearer $REFLECTOR_GPU_APIKEY" \
-F "file=@/path/to/audio.mp3" \
-F "language=en" \
"$BASE_URL/v1/audio/transcriptions"
```
Example curl (multiple files, batch):
```bash
curl -X POST \
-H "Authorization: Bearer $REFLECTOR_GPU_APIKEY" \
-F "files=@/path/a1.mp3" -F "files=@/path/a2.mp3" \
-F "batch=true" -F "language=en" \
"$BASE_URL/v1/audio/transcriptions"
```
#### POST /v1/audio/transcriptions-from-url
Transcribe a single remote audio file by URL.
Request: application/json
Body parameters:
- `audio_file_url` (string) — required. URL of the audio file to transcribe.
- `model` (string) — optional. Defaults to the implementation-specific model (see above).
- `language` (string) — optional, defaults to `en`. Parakeet only accepts `en`.
- `timestamp_offset` (number) — optional, defaults to `0.0`. Added to each word's `start`/`end` in the response.
```json
{
"audio_file_url": "https://example.com/audio.mp3",
"model": "nvidia/parakeet-tdt-0.6b-v2",
"language": "en",
"timestamp_offset": 0.0
}
```
Response:
```json
{
"text": "transcribed text",
"words": [
{ "word": "hello", "start": 10.0, "end": 10.5 },
{ "word": "world", "start": 10.5, "end": 11.0 }
]
}
```
Notes:
- `timestamp_offset` is added to each words `start`/`end` in the response.
- Implementations may perform VAD-based chunking and batching for long-form audio; word timings are adjusted accordingly.
Example curl:
```bash
curl -X POST \
-H "Authorization: Bearer $REFLECTOR_GPU_APIKEY" \
-H "Content-Type: application/json" \
-d '{
"audio_file_url": "https://example.com/audio.mp3",
"language": "en",
"timestamp_offset": 0
}' \
"$BASE_URL/v1/audio/transcriptions-from-url"
```
### Error handling
- 400 Bad Request
- Parakeet: `language` other than `en`
- Missing required parameters (`file`/`files` for upload; `audio_file_url` for URL endpoint)
- Unsupported file extension
- 401 Unauthorized
- Missing or invalid Bearer token
- 404 Not Found
- `audio_file_url` does not exist
### Implementation details
- GPUs: A10G for small-file/live, L40S for large-file URL transcription (subject to deployment)
- VAD chunking and segment batching; word timings adjusted and overlapping ends constrained
- Pads very short segments (< 0.5s) to avoid model crashes on some backends
### Server configuration (Reflector API)
Set the Reflector server to use the Modal backend and point `TRANSCRIPT_URL` to your chosen deployment:
```
TRANSCRIPT_BACKEND=modal
TRANSCRIPT_URL=https://<account>--reflector-transcriber-parakeet-web.modal.run
TRANSCRIPT_MODAL_API_KEY=<REFLECTOR_GPU_APIKEY>
```
### Conformance tests
Use the pytest-based conformance tests to validate any new implementation (including self-hosted) against this spec:
```
TRANSCRIPT_URL=https://<your-deployment-base> \
TRANSCRIPT_MODAL_API_KEY=your-api-key \
uv run -m pytest -m model_api --no-cov server/tests/test_model_api_transcript.py
```

View File

@@ -1,233 +0,0 @@
# Reflector Webhook Documentation
## Overview
Reflector supports webhook notifications to notify external systems when transcript processing is completed. Webhooks can be configured per room and are triggered automatically after a transcript is successfully processed.
## Configuration
Webhooks are configured at the room level with two fields:
- `webhook_url`: The HTTPS endpoint to receive webhook notifications
- `webhook_secret`: Optional secret key for HMAC signature verification (auto-generated if not provided)
## Events
### `transcript.completed`
Triggered when a transcript has been fully processed, including transcription, diarization, summarization, topic detection and calendar event integration.
### `test`
A test event that can be triggered manually to verify webhook configuration.
## Webhook Request Format
### Headers
All webhook requests include the following headers:
| Header | Description | Example |
|--------|-------------|---------|
| `Content-Type` | Always `application/json` | `application/json` |
| `User-Agent` | Identifies Reflector as the source | `Reflector-Webhook/1.0` |
| `X-Webhook-Event` | The event type | `transcript.completed` or `test` |
| `X-Webhook-Retry` | Current retry attempt number | `0`, `1`, `2`... |
| `X-Webhook-Signature` | HMAC signature (if secret configured) | `t=1735306800,v1=abc123...` |
### Signature Verification
If a webhook secret is configured, Reflector includes an HMAC-SHA256 signature in the `X-Webhook-Signature` header to verify the webhook authenticity.
The signature format is: `t={timestamp},v1={signature}`
To verify the signature:
1. Extract the timestamp and signature from the header
2. Create the signed payload: `{timestamp}.{request_body}`
3. Compute HMAC-SHA256 of the signed payload using your webhook secret
4. Compare the computed signature with the received signature
Example verification (Python):
```python
import hmac
import hashlib
def verify_webhook_signature(payload: bytes, signature_header: str, secret: str) -> bool:
# Parse header: "t=1735306800,v1=abc123..."
parts = dict(part.split("=") for part in signature_header.split(","))
timestamp = parts["t"]
received_signature = parts["v1"]
# Create signed payload
signed_payload = f"{timestamp}.{payload.decode('utf-8')}"
# Compute expected signature
expected_signature = hmac.new(
secret.encode("utf-8"),
signed_payload.encode("utf-8"),
hashlib.sha256
).hexdigest()
# Compare signatures
return hmac.compare_digest(expected_signature, received_signature)
```
## Event Payloads
### `transcript.completed` Event
This event includes a convenient URL for accessing the transcript:
- `frontend_url`: Direct link to view the transcript in the web interface
```json
{
"event": "transcript.completed",
"event_id": "transcript.completed-abc-123-def-456",
"timestamp": "2025-08-27T12:34:56.789012Z",
"transcript": {
"id": "abc-123-def-456",
"room_id": "room-789",
"created_at": "2025-08-27T12:00:00Z",
"duration": 1800.5,
"title": "Q3 Product Planning Meeting",
"short_summary": "Team discussed Q3 product roadmap, prioritizing mobile app features and API improvements.",
"long_summary": "The product team met to finalize the Q3 roadmap. Key decisions included...",
"webvtt": "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\n<v Speaker 1>Welcome everyone to today's meeting...",
"topics": [
{
"title": "Introduction and Agenda",
"summary": "Meeting kickoff with agenda review",
"timestamp": 0.0,
"duration": 120.0,
"webvtt": "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\n<v Speaker 1>Welcome everyone..."
},
{
"title": "Mobile App Features Discussion",
"summary": "Team reviewed proposed mobile app features for Q3",
"timestamp": 120.0,
"duration": 600.0,
"webvtt": "WEBVTT\n\n00:02:00.000 --> 00:02:10.000\n<v Speaker 2>Let's talk about the mobile app..."
}
],
"participants": [
{
"id": "participant-1",
"name": "John Doe",
"speaker": "Speaker 1"
},
{
"id": "participant-2",
"name": "Jane Smith",
"speaker": "Speaker 2"
}
],
"source_language": "en",
"target_language": "en",
"status": "completed",
"frontend_url": "https://app.reflector.com/transcripts/abc-123-def-456"
},
"room": {
"id": "room-789",
"name": "Product Team Room"
},
"calendar_event": {
"id": "calendar-event-123",
"ics_uid": "event-123",
"title": "Q3 Product Planning Meeting",
"start_time": "2025-08-27T12:00:00Z",
"end_time": "2025-08-27T12:30:00Z",
"description": "Team discussed Q3 product roadmap, prioritizing mobile app features and API improvements.",
"location": "Conference Room 1",
"attendees": [
{
"id": "participant-1",
"name": "John Doe",
"speaker": "Speaker 1"
},
{
"id": "participant-2",
"name": "Jane Smith",
"speaker": "Speaker 2"
}
]
}
}
```
### `test` Event
```json
{
"event": "test",
"event_id": "test.2025-08-27T12:34:56.789012Z",
"timestamp": "2025-08-27T12:34:56.789012Z",
"message": "This is a test webhook from Reflector",
"room": {
"id": "room-789",
"name": "Product Team Room"
}
}
```
## Retry Policy
Webhooks are delivered with automatic retry logic to handle transient failures. When a webhook delivery fails due to server errors or network issues, Reflector will automatically retry the delivery multiple times over an extended period.
### Retry Mechanism
Reflector implements an exponential backoff strategy for webhook retries:
- **Initial retry delay**: 60 seconds after the first failure
- **Exponential backoff**: Each subsequent retry waits approximately twice as long as the previous one
- **Maximum retry interval**: 1 hour (backoff is capped at this duration)
- **Maximum retry attempts**: 30 attempts total
- **Total retry duration**: Retries continue for approximately 24 hours
### How Retries Work
When a webhook fails, Reflector will:
1. Wait 60 seconds, then retry (attempt #1)
2. If it fails again, wait ~2 minutes, then retry (attempt #2)
3. Continue doubling the wait time up to a maximum of 1 hour between attempts
4. Keep retrying at 1-hour intervals until successful or 30 attempts are exhausted
The `X-Webhook-Retry` header indicates the current retry attempt number (0 for the initial attempt, 1 for first retry, etc.), allowing your endpoint to track retry attempts.
### Retry Behavior by HTTP Status Code
| Status Code | Behavior |
|-------------|----------|
| 2xx (Success) | No retry, webhook marked as delivered |
| 4xx (Client Error) | No retry, request is considered permanently failed |
| 5xx (Server Error) | Automatic retry with exponential backoff |
| Network/Timeout Error | Automatic retry with exponential backoff |
**Important Notes:**
- Webhooks timeout after 30 seconds. If your endpoint takes longer to respond, it will be considered a timeout error and retried.
- During the retry period (~24 hours), you may receive the same webhook multiple times if your endpoint experiences intermittent failures.
- There is no mechanism to manually retry failed webhooks after the retry period expires.
## Testing Webhooks
You can test your webhook configuration before processing transcripts:
```http
POST /v1/rooms/{room_id}/webhook/test
```
Response:
```json
{
"success": true,
"status_code": 200,
"message": "Webhook test successful",
"response_preview": "OK"
}
```
Or in case of failure:
```json
{
"success": false,
"error": "Webhook request timed out (10 seconds)"
}
```

View File

@@ -20,24 +20,24 @@ AUTH_JWT_AUDIENCE=
## Using local whisper
#TRANSCRIPT_BACKEND=whisper
#WHISPER_MODEL_SIZE=tiny
## Using serverless modal.com (require reflector-gpu-modal deployed)
#TRANSCRIPT_BACKEND=modal
#TRANSCRIPT_URL=https://xxxxx--reflector-transcriber-web.modal.run
#TRANSLATE_URL=https://xxxxx--reflector-translator-web.modal.run
#TRANSCRIPT_MODAL_API_KEY=xxxxx
TRANSCRIPT_BACKEND=modal
TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-parakeet-web.modal.run
TRANSCRIPT_MODAL_API_KEY=
TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
TRANSCRIPT_MODAL_API_KEY=***REMOVED***
## =======================================================
## Translation backend
## Transcription backend
##
## Only available in modal atm
## =======================================================
TRANSLATION_BACKEND=modal
TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
#TRANSLATION_MODAL_API_KEY=xxxxx
## =======================================================
## LLM backend
@@ -47,11 +47,28 @@ TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
## llm backend implementation
## =======================================================
## Context size for summary generation (tokens)
# LLM_MODEL=microsoft/phi-4
LLM_CONTEXT_WINDOW=16000
LLM_URL=
LLM_API_KEY=sk-
## Using serverless modal.com (require reflector-gpu-modal deployed)
LLM_BACKEND=modal
LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
LLM_MODAL_API_KEY=***REMOVED***
ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
## Using OpenAI
#LLM_BACKEND=openai
#LLM_OPENAI_KEY=xxx
#LLM_OPENAI_MODEL=gpt-3.5-turbo
## Using GPT4ALL
#LLM_BACKEND=openai
#LLM_URL=http://localhost:4891/v1/completions
#LLM_OPENAI_MODEL="GPT4All Falcon"
## Default LLM MODEL NAME
#DEFAULT_LLM=lmsys/vicuna-13b-v1.5
## Cache directory to store models
CACHE_DIR=data
## =======================================================
## Diarization
@@ -60,9 +77,7 @@ LLM_API_KEY=sk-
## To allow diarization, you need to expose expose the files to be dowloded by the pipeline
## =======================================================
DIARIZATION_ENABLED=false
DIARIZATION_BACKEND=modal
DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
#DIARIZATION_MODAL_API_KEY=xxxxx
## =======================================================
@@ -71,27 +86,3 @@ DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
## Sentry DSN configuration
#SENTRY_DSN=
## =======================================================
## Video Platform Configuration
## =======================================================
## Whereby
#WHEREBY_API_KEY=your-whereby-api-key
#WHEREBY_WEBHOOK_SECRET=your-whereby-webhook-secret
#AWS_WHEREBY_ACCESS_KEY_ID=your-aws-key
#AWS_WHEREBY_ACCESS_KEY_SECRET=your-aws-secret
#AWS_PROCESS_RECORDING_QUEUE_URL=https://sqs.us-west-2.amazonaws.com/...
## Daily.co
#DAILY_API_KEY=your-daily-api-key
#DAILY_WEBHOOK_SECRET=your-daily-webhook-secret
#DAILY_SUBDOMAIN=your-subdomain
#AWS_DAILY_S3_BUCKET=your-daily-bucket
#AWS_DAILY_S3_REGION=us-west-2
#AWS_DAILY_ROLE_ARN=arn:aws:iam::ACCOUNT:role/DailyRecording
## Platform Selection
#DAILY_MIGRATION_ENABLED=false # Enable Daily.co support
#DAILY_MIGRATION_ROOM_IDS=[] # Specific rooms to use Daily
#DEFAULT_VIDEO_PLATFORM=whereby # Default platform for new rooms

View File

@@ -0,0 +1,81 @@
# Reflector GPU implementation - Transcription and LLM
This repository hold an API for the GPU implementation of the Reflector API service,
and use [Modal.com](https://modal.com)
- `reflector_llm.py` - LLM API
- `reflector_transcriber.py` - Transcription API
## Modal.com deployment
Create a modal secret, and name it `reflector-gpu`.
It should contain an `REFLECTOR_APIKEY` environment variable with a value.
The deployment is done using [Modal.com](https://modal.com) service.
```
$ modal deploy reflector_transcriber.py
...
└── 🔨 Created web => https://xxxx--reflector-transcriber-web.modal.run
$ modal deploy reflector_llm.py
...
└── 🔨 Created web => https://xxxx--reflector-llm-web.modal.run
```
Then in your reflector api configuration `.env`, you can set theses keys:
```
TRANSCRIPT_BACKEND=modal
TRANSCRIPT_URL=https://xxxx--reflector-transcriber-web.modal.run
TRANSCRIPT_MODAL_API_KEY=REFLECTOR_APIKEY
LLM_BACKEND=modal
LLM_URL=https://xxxx--reflector-llm-web.modal.run
LLM_MODAL_API_KEY=REFLECTOR_APIKEY
```
## API
Authentication must be passed with the `Authorization` header, using the `bearer` scheme.
```
Authorization: bearer <REFLECTOR_APIKEY>
```
### LLM
`POST /llm`
**request**
```
{
"prompt": "xxx"
}
```
**response**
```
{
"text": "xxx completed"
}
```
### Transcription
`POST /transcribe`
**request** (multipart/form-data)
- `file` - audio file
- `language` - language code (e.g. `en`)
**response**
```
{
"text": "xxx",
"words": [
{"text": "xxx", "start": 0.0, "end": 1.0}
]
}
```

View File

@@ -0,0 +1,187 @@
"""
Reflector GPU backend - diarizer
===================================
"""
import os
import modal.gpu
from modal import App, Image, Secret, asgi_app, enter, method
from pydantic import BaseModel
PYANNOTE_MODEL_NAME: str = "pyannote/speaker-diarization-3.1"
MODEL_DIR = "/root/diarization_models"
app = App(name="reflector-diarizer")
def migrate_cache_llm():
"""
XXX The cache for model files in Transformers v4.22.0 has been updated.
Migrating your old cache. This is a one-time only operation. You can
interrupt this and resume the migration later on by calling
`transformers.utils.move_cache()`.
"""
from transformers.utils.hub import move_cache
print("Moving LLM cache")
move_cache(cache_dir=MODEL_DIR, new_cache_dir=MODEL_DIR)
print("LLM cache moved")
def download_pyannote_audio():
from pyannote.audio import Pipeline
Pipeline.from_pretrained(
PYANNOTE_MODEL_NAME,
cache_dir=MODEL_DIR,
use_auth_token=os.environ["HF_TOKEN"],
)
diarizer_image = (
Image.debian_slim(python_version="3.10.8")
.pip_install(
"pyannote.audio==3.1.0",
"requests",
"onnx",
"torchaudio",
"onnxruntime-gpu",
"torch==2.0.0",
"transformers==4.34.0",
"sentencepiece",
"protobuf",
"numpy",
"huggingface_hub",
"hf-transfer",
)
.run_function(
download_pyannote_audio, secrets=[Secret.from_name("my-huggingface-secret")]
)
.run_function(migrate_cache_llm)
.env(
{
"LD_LIBRARY_PATH": (
"/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib/:"
"/opt/conda/lib/python3.10/site-packages/nvidia/cublas/lib/"
)
}
)
)
@app.cls(
gpu=modal.gpu.A100(size="40GB"),
timeout=60 * 30,
scaledown_window=60,
allow_concurrent_inputs=1,
image=diarizer_image,
)
class Diarizer:
@enter()
def enter(self):
import torch
from pyannote.audio import Pipeline
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
self.diarization_pipeline = Pipeline.from_pretrained(
PYANNOTE_MODEL_NAME, cache_dir=MODEL_DIR
)
self.diarization_pipeline.to(torch.device(self.device))
@method()
def diarize(self, audio_data: str, audio_suffix: str, timestamp: float):
import tempfile
import torchaudio
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
fp.write(audio_data)
print("Diarizing audio")
waveform, sample_rate = torchaudio.load(fp.name)
diarization = self.diarization_pipeline(
{"waveform": waveform, "sample_rate": sample_rate}
)
words = []
for diarization_segment, _, speaker in diarization.itertracks(
yield_label=True
):
words.append(
{
"start": round(timestamp + diarization_segment.start, 3),
"end": round(timestamp + diarization_segment.end, 3),
"speaker": int(speaker[-2:]),
}
)
print("Diarization complete")
return {"diarization": words}
# -------------------------------------------------------------------
# Web API
# -------------------------------------------------------------------
@app.function(
timeout=60 * 10,
scaledown_window=60 * 3,
allow_concurrent_inputs=40,
secrets=[
Secret.from_name("reflector-gpu"),
],
image=diarizer_image,
)
@asgi_app()
def web():
import requests
from fastapi import Depends, FastAPI, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
diarizerstub = Diarizer()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
def validate_audio_file(audio_file_url: str):
# Check if the audio file exists
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(
status_code=response.status_code,
detail="The audio file does not exist.",
)
class DiarizationResponse(BaseModel):
result: dict
@app.post(
"/diarize", dependencies=[Depends(apikey_auth), Depends(validate_audio_file)]
)
def diarize(
audio_file_url: str, timestamp: float = 0.0
) -> HTTPException | DiarizationResponse:
# Currently the uploaded files are in mp3 format
audio_suffix = "mp3"
print("Downloading audio file")
response = requests.get(audio_file_url, allow_redirects=True)
print("Audio file downloaded successfully")
func = diarizerstub.diarize.spawn(
audio_data=response.content, audio_suffix=audio_suffix, timestamp=timestamp
)
result = func.get()
return result
return app

View File

@@ -0,0 +1,214 @@
"""
Reflector GPU backend - LLM
===========================
"""
import json
import os
import threading
from typing import Optional
import modal
from modal import App, Image, Secret, asgi_app, enter, exit, method
# LLM
LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
LLM_LOW_CPU_MEM_USAGE: bool = True
LLM_TORCH_DTYPE: str = "bfloat16"
LLM_MAX_NEW_TOKENS: int = 300
IMAGE_MODEL_DIR = "/root/llm_models"
app = App(name="reflector-llm")
def download_llm():
from huggingface_hub import snapshot_download
print("Downloading LLM model")
snapshot_download(LLM_MODEL, cache_dir=IMAGE_MODEL_DIR)
print("LLM model downloaded")
def migrate_cache_llm():
"""
XXX The cache for model files in Transformers v4.22.0 has been updated.
Migrating your old cache. This is a one-time only operation. You can
interrupt this and resume the migration later on by calling
`transformers.utils.move_cache()`.
"""
from transformers.utils.hub import move_cache
print("Moving LLM cache")
move_cache(cache_dir=IMAGE_MODEL_DIR, new_cache_dir=IMAGE_MODEL_DIR)
print("LLM cache moved")
llm_image = (
Image.debian_slim(python_version="3.10.8")
.apt_install("git")
.pip_install(
"transformers",
"torch",
"sentencepiece",
"protobuf",
"jsonformer==0.12.0",
"accelerate==0.21.0",
"einops==0.6.1",
"hf-transfer~=0.1",
"huggingface_hub==0.16.4",
)
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
.run_function(download_llm)
.run_function(migrate_cache_llm)
)
@app.cls(
gpu="A100",
timeout=60 * 5,
scaledown_window=60 * 5,
allow_concurrent_inputs=15,
image=llm_image,
)
class LLM:
@enter()
def enter(self):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
print("Instance llm model")
model = AutoModelForCausalLM.from_pretrained(
LLM_MODEL,
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
cache_dir=IMAGE_MODEL_DIR,
local_files_only=True,
)
# JSONFormer doesn't yet support generation configs
print("Instance llm generation config")
model.config.max_new_tokens = LLM_MAX_NEW_TOKENS
# generation configuration
gen_cfg = GenerationConfig.from_model_config(model.config)
gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS
# load tokenizer
print("Instance llm tokenizer")
tokenizer = AutoTokenizer.from_pretrained(
LLM_MODEL, cache_dir=IMAGE_MODEL_DIR, local_files_only=True
)
# move model to gpu
print("Move llm model to GPU")
model = model.cuda()
print("Warmup llm done")
self.model = model
self.tokenizer = tokenizer
self.gen_cfg = gen_cfg
self.GenerationConfig = GenerationConfig
self.lock = threading.Lock()
@exit()
def exit():
print("Exit llm")
@method()
def generate(
self, prompt: str, gen_schema: str | None, gen_cfg: str | None
) -> dict:
"""
Perform a generation action using the LLM
"""
print(f"Generate {prompt=}")
if gen_cfg:
gen_cfg = self.GenerationConfig.from_dict(json.loads(gen_cfg))
else:
gen_cfg = self.gen_cfg
# If a gen_schema is given, conform to gen_schema
with self.lock:
if gen_schema:
import jsonformer
print(f"Schema {gen_schema=}")
jsonformer_llm = jsonformer.Jsonformer(
model=self.model,
tokenizer=self.tokenizer,
json_schema=json.loads(gen_schema),
prompt=prompt,
max_string_token_length=gen_cfg.max_new_tokens,
)
response = jsonformer_llm()
else:
# If no gen_schema, perform prompt only generation
# tokenize prompt
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
self.model.device
)
output = self.model.generate(input_ids, generation_config=gen_cfg)
# decode output
response = self.tokenizer.decode(
output[0].cpu(), skip_special_tokens=True
)
response = response[len(prompt) :]
print(f"Generated {response=}")
return {"text": response}
# -------------------------------------------------------------------
# Web API
# -------------------------------------------------------------------
@app.function(
scaledown_window=60 * 10,
timeout=60 * 5,
allow_concurrent_inputs=45,
secrets=[
Secret.from_name("reflector-gpu"),
],
)
@asgi_app()
def web():
from fastapi import Depends, FastAPI, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel
llmstub = LLM()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
class LLMRequest(BaseModel):
prompt: str
gen_schema: Optional[dict] = None
gen_cfg: Optional[dict] = None
@app.post("/llm", dependencies=[Depends(apikey_auth)])
def llm(
req: LLMRequest,
):
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
func = llmstub.generate.spawn(
prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg
)
result = func.get()
return result
return app

View File

@@ -0,0 +1,220 @@
"""
Reflector GPU backend - LLM
===========================
"""
import json
import os
import threading
from typing import Optional
import modal
from modal import App, Image, Secret, asgi_app, enter, exit, method
# LLM
LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
LLM_LOW_CPU_MEM_USAGE: bool = True
LLM_TORCH_DTYPE: str = "bfloat16"
LLM_MAX_NEW_TOKENS: int = 300
IMAGE_MODEL_DIR = "/root/llm_models/zephyr"
app = App(name="reflector-llm-zephyr")
def download_llm():
from huggingface_hub import snapshot_download
print("Downloading LLM model")
snapshot_download(LLM_MODEL, cache_dir=IMAGE_MODEL_DIR)
print("LLM model downloaded")
def migrate_cache_llm():
"""
XXX The cache for model files in Transformers v4.22.0 has been updated.
Migrating your old cache. This is a one-time only operation. You can
interrupt this and resume the migration later on by calling
`transformers.utils.move_cache()`.
"""
from transformers.utils.hub import move_cache
print("Moving LLM cache")
move_cache(cache_dir=IMAGE_MODEL_DIR, new_cache_dir=IMAGE_MODEL_DIR)
print("LLM cache moved")
llm_image = (
Image.debian_slim(python_version="3.10.8")
.apt_install("git")
.pip_install(
"transformers==4.34.0",
"torch",
"sentencepiece",
"protobuf",
"jsonformer==0.12.0",
"accelerate==0.21.0",
"einops==0.6.1",
"hf-transfer~=0.1",
"huggingface_hub==0.16.4",
)
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
.run_function(download_llm)
.run_function(migrate_cache_llm)
)
@app.cls(
gpu="A10G",
timeout=60 * 5,
scaledown_window=60 * 5,
allow_concurrent_inputs=10,
image=llm_image,
)
class LLM:
@enter()
def enter(self):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
print("Instance llm model")
model = AutoModelForCausalLM.from_pretrained(
LLM_MODEL,
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
cache_dir=IMAGE_MODEL_DIR,
local_files_only=True,
)
# JSONFormer doesn't yet support generation configs
print("Instance llm generation config")
model.config.max_new_tokens = LLM_MAX_NEW_TOKENS
# generation configuration
gen_cfg = GenerationConfig.from_model_config(model.config)
gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS
# load tokenizer
print("Instance llm tokenizer")
tokenizer = AutoTokenizer.from_pretrained(
LLM_MODEL, cache_dir=IMAGE_MODEL_DIR, local_files_only=True
)
gen_cfg.pad_token_id = tokenizer.eos_token_id
gen_cfg.eos_token_id = tokenizer.eos_token_id
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id
# move model to gpu
print("Move llm model to GPU")
model = model.cuda()
print("Warmup llm done")
self.model = model
self.tokenizer = tokenizer
self.gen_cfg = gen_cfg
self.GenerationConfig = GenerationConfig
self.lock = threading.Lock()
@exit()
def exit():
print("Exit llm")
@method()
def generate(
self, prompt: str, gen_schema: str | None, gen_cfg: str | None
) -> dict:
"""
Perform a generation action using the LLM
"""
print(f"Generate {prompt=}")
if gen_cfg:
gen_cfg = self.GenerationConfig.from_dict(json.loads(gen_cfg))
gen_cfg.pad_token_id = self.tokenizer.eos_token_id
gen_cfg.eos_token_id = self.tokenizer.eos_token_id
else:
gen_cfg = self.gen_cfg
# If a gen_schema is given, conform to gen_schema
with self.lock:
if gen_schema:
import jsonformer
print(f"Schema {gen_schema=}")
jsonformer_llm = jsonformer.Jsonformer(
model=self.model,
tokenizer=self.tokenizer,
json_schema=json.loads(gen_schema),
prompt=prompt,
max_string_token_length=gen_cfg.max_new_tokens,
)
response = jsonformer_llm()
else:
# If no gen_schema, perform prompt only generation
# tokenize prompt
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
self.model.device
)
output = self.model.generate(input_ids, generation_config=gen_cfg)
# decode output
response = self.tokenizer.decode(
output[0].cpu(), skip_special_tokens=True
)
response = response[len(prompt) :]
response = {"long_summary": response}
print(f"Generated {response=}")
return {"text": response}
# -------------------------------------------------------------------
# Web API
# -------------------------------------------------------------------
@app.function(
scaledown_window=60 * 10,
timeout=60 * 5,
allow_concurrent_inputs=30,
secrets=[
Secret.from_name("reflector-gpu"),
],
)
@asgi_app()
def web():
from fastapi import Depends, FastAPI, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel
llmstub = LLM()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
class LLMRequest(BaseModel):
prompt: str
gen_schema: Optional[dict] = None
gen_cfg: Optional[dict] = None
@app.post("/llm", dependencies=[Depends(apikey_auth)])
def llm(
req: LLMRequest,
):
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
func = llmstub.generate.spawn(
prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg
)
result = func.get()
return result
return app

View File

@@ -0,0 +1,161 @@
import os
import tempfile
import threading
import modal
from pydantic import BaseModel
MODELS_DIR = "/models"
MODEL_NAME = "large-v2"
MODEL_COMPUTE_TYPE: str = "float16"
MODEL_NUM_WORKERS: int = 1
MINUTES = 60 # seconds
volume = modal.Volume.from_name("models", create_if_missing=True)
app = modal.App("reflector-transcriber")
def download_model():
from faster_whisper import download_model
volume.reload()
download_model(MODEL_NAME, cache_dir=MODELS_DIR)
volume.commit()
image = (
modal.Image.debian_slim(python_version="3.12")
.pip_install(
"huggingface_hub==0.27.1",
"hf-transfer==0.1.9",
"torch==2.5.1",
"faster-whisper==1.1.1",
)
.env(
{
"HF_HUB_ENABLE_HF_TRANSFER": "1",
"LD_LIBRARY_PATH": (
"/usr/local/lib/python3.12/site-packages/nvidia/cudnn/lib/:"
"/opt/conda/lib/python3.12/site-packages/nvidia/cublas/lib/"
),
}
)
.run_function(download_model, volumes={MODELS_DIR: volume})
)
@app.cls(
gpu="A10G",
timeout=5 * MINUTES,
scaledown_window=5 * MINUTES,
allow_concurrent_inputs=6,
image=image,
volumes={MODELS_DIR: volume},
)
class Transcriber:
@modal.enter()
def enter(self):
import faster_whisper
import torch
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
self.model = faster_whisper.WhisperModel(
MODEL_NAME,
device=self.device,
compute_type=MODEL_COMPUTE_TYPE,
num_workers=MODEL_NUM_WORKERS,
download_root=MODELS_DIR,
local_files_only=True,
)
@modal.method()
def transcribe_segment(
self,
audio_data: str,
audio_suffix: str,
language: str,
):
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
fp.write(audio_data)
with self.lock:
segments, _ = self.model.transcribe(
fp.name,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(segment.text for segment in segments)
words = [
{"word": word.word, "start": word.start, "end": word.end}
for segment in segments
for word in segment.words
]
return {"text": text, "words": words}
@app.function(
scaledown_window=60,
timeout=60,
allow_concurrent_inputs=40,
secrets=[
modal.Secret.from_name("reflector-gpu"),
],
volumes={MODELS_DIR: volume},
)
@modal.asgi_app()
def web():
from fastapi import Body, Depends, FastAPI, HTTPException, UploadFile, status
from fastapi.security import OAuth2PasswordBearer
from typing_extensions import Annotated
transcriber = Transcriber()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
supported_file_types = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
class TranscriptResponse(BaseModel):
result: dict
@app.post("/v1/audio/transcriptions", dependencies=[Depends(apikey_auth)])
def transcribe(
file: UploadFile,
model: str = "whisper-1",
language: Annotated[str, Body(...)] = "en",
) -> TranscriptResponse:
audio_data = file.file.read()
audio_suffix = file.filename.split(".")[-1]
assert audio_suffix in supported_file_types
func = transcriber.transcribe_segment.spawn(
audio_data=audio_data,
audio_suffix=audio_suffix,
language=language,
)
result = func.get()
return result
return app

View File

@@ -0,0 +1,171 @@
# # Run an OpenAI-Compatible vLLM Server
import modal
MODELS_DIR = "/llamas"
MODEL_NAME = "NousResearch/Hermes-3-Llama-3.1-8B"
N_GPU = 1
def download_llm():
from huggingface_hub import snapshot_download
print("Downloading LLM model")
snapshot_download(
MODEL_NAME,
local_dir=f"{MODELS_DIR}/{MODEL_NAME}",
ignore_patterns=[
"*.pt",
"*.bin",
"*.pth",
"original/*",
], # Ensure safetensors
)
print("LLM model downloaded")
def move_cache():
from transformers.utils import move_cache as transformers_move_cache
transformers_move_cache()
vllm_image = (
modal.Image.debian_slim(python_version="3.10")
.pip_install("vllm==0.5.3post1")
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
.pip_install(
# "accelerate==0.34.2",
"einops==0.8.0",
"hf-transfer~=0.1",
)
.run_function(download_llm)
.run_function(move_cache)
.pip_install(
"bitsandbytes>=0.42.9",
)
)
app = modal.App("reflector-vllm-hermes3")
@app.function(
image=vllm_image,
gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
timeout=60 * 5,
scaledown_window=60 * 5,
allow_concurrent_inputs=100,
secrets=[
modal.Secret.from_name("reflector-gpu"),
],
)
@modal.asgi_app()
def serve():
import os
import fastapi
import vllm.entrypoints.openai.api_server as api_server
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.usage.usage_lib import UsageContext
TOKEN = os.environ["REFLECTOR_GPU_APIKEY"]
# create a fastAPI app that uses vLLM's OpenAI-compatible router
web_app = fastapi.FastAPI(
title=f"OpenAI-compatible {MODEL_NAME} server",
description="Run an OpenAI-compatible LLM server with vLLM on modal.com",
version="0.0.1",
docs_url="/docs",
)
# security: CORS middleware for external requests
http_bearer = fastapi.security.HTTPBearer(
scheme_name="Bearer Token",
description="See code for authentication details.",
)
web_app.add_middleware(
fastapi.middleware.cors.CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# security: inject dependency on authed routes
async def is_authenticated(api_key: str = fastapi.Security(http_bearer)):
if api_key.credentials != TOKEN:
raise fastapi.HTTPException(
status_code=fastapi.status.HTTP_401_UNAUTHORIZED,
detail="Invalid authentication credentials",
)
return {"username": "authenticated_user"}
router = fastapi.APIRouter(dependencies=[fastapi.Depends(is_authenticated)])
# wrap vllm's router in auth router
router.include_router(api_server.router)
# add authed vllm to our fastAPI app
web_app.include_router(router)
engine_args = AsyncEngineArgs(
model=MODELS_DIR + "/" + MODEL_NAME,
tensor_parallel_size=N_GPU,
gpu_memory_utilization=0.90,
# max_model_len=8096,
enforce_eager=False, # capture the graph for faster inference, but slower cold starts (30s > 20s)
# --- 4 bits load
# quantization="bitsandbytes",
# load_format="bitsandbytes",
)
engine = AsyncLLMEngine.from_engine_args(
engine_args, usage_context=UsageContext.OPENAI_API_SERVER
)
model_config = get_model_config(engine)
request_logger = RequestLogger(max_log_len=2048)
api_server.openai_serving_chat = OpenAIServingChat(
engine,
model_config=model_config,
served_model_names=[MODEL_NAME],
chat_template=None,
response_role="assistant",
lora_modules=[],
prompt_adapters=[],
request_logger=request_logger,
)
api_server.openai_serving_completion = OpenAIServingCompletion(
engine,
model_config=model_config,
served_model_names=[MODEL_NAME],
lora_modules=[],
prompt_adapters=[],
request_logger=request_logger,
)
return web_app
def get_model_config(engine):
import asyncio
try: # adapted from vLLM source -- https://github.com/vllm-project/vllm/blob/507ef787d85dec24490069ffceacbd6b161f4f72/vllm/entrypoints/openai/api_server.py#L235C1-L247C1
event_loop = asyncio.get_running_loop()
except RuntimeError:
event_loop = None
if event_loop is not None and event_loop.is_running():
# If the current is instanced by Ray Serve,
# there is already a running event loop
model_config = event_loop.run_until_complete(engine.get_model_config())
else:
# When using single vLLM without engine_use_ray
model_config = asyncio.run(engine.get_model_config())
return model_config

16
server/migration.load Normal file
View File

@@ -0,0 +1,16 @@
LOAD DATABASE
FROM sqlite:///app/reflector.sqlite3
INTO pgsql://reflector:reflector@postgres:5432/reflector
WITH
include drop,
create tables,
create indexes,
reset sequences,
preserve index names,
prefetch rows = 10
SET
work_mem to '512MB',
maintenance_work_mem to '1024MB'
CAST
column transcript.duration to float using (lambda (val) (when val (format nil "~f" val)))
;

View File

@@ -1,3 +1 @@
Generic single-database configuration.
Both data migrations and schema migrations must be in migrations.
Generic single-database configuration.

View File

@@ -1,10 +1,9 @@
from logging.config import fileConfig
from alembic import context
from sqlalchemy import engine_from_config, pool
from reflector.db import metadata
from reflector.settings import settings
from sqlalchemy import engine_from_config, pool
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.

View File

@@ -1,36 +0,0 @@
"""Add webhook fields to rooms
Revision ID: 0194f65cd6d3
Revises: 5a8907fd1d78
Create Date: 2025-08-27 09:03:19.610995
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "0194f65cd6d3"
down_revision: Union[str, None] = "5a8907fd1d78"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.add_column(sa.Column("webhook_url", sa.String(), nullable=True))
batch_op.add_column(sa.Column("webhook_secret", sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.drop_column("webhook_secret")
batch_op.drop_column("webhook_url")
# ### end Alembic commands ###

View File

@@ -0,0 +1,26 @@
"""add_room_background_information
Revision ID: 082fa608201c
Revises: b7df9609542c
Create Date: 2025-07-29 01:41:37.912195
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '082fa608201c'
down_revision: Union[str, None] = 'b7df9609542c'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column('room', sa.Column('background_information', sa.Text(), nullable=True))
def downgrade() -> None:
op.drop_column('room', 'background_information')

View File

@@ -8,6 +8,7 @@ Create Date: 2024-09-24 16:12:56.944133
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -1,64 +0,0 @@
"""add_long_summary_to_search_vector
Revision ID: 0ab2d7ffaa16
Revises: b1c33bd09963
Create Date: 2025-08-15 13:27:52.680211
"""
from typing import Sequence, Union
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "0ab2d7ffaa16"
down_revision: Union[str, None] = "b1c33bd09963"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Drop the existing search vector column and index
op.drop_index("idx_transcript_search_vector_en", table_name="transcript")
op.drop_column("transcript", "search_vector_en")
# Recreate the search vector column with long_summary included
op.execute("""
ALTER TABLE transcript ADD COLUMN search_vector_en tsvector
GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(long_summary, '')), 'B') ||
setweight(to_tsvector('english', coalesce(webvtt, '')), 'C')
) STORED
""")
# Recreate the GIN index for the search vector
op.create_index(
"idx_transcript_search_vector_en",
"transcript",
["search_vector_en"],
postgresql_using="gin",
)
def downgrade() -> None:
# Drop the updated search vector column and index
op.drop_index("idx_transcript_search_vector_en", table_name="transcript")
op.drop_column("transcript", "search_vector_en")
# Recreate the original search vector column without long_summary
op.execute("""
ALTER TABLE transcript ADD COLUMN search_vector_en tsvector
GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(webvtt, '')), 'B')
) STORED
""")
# Recreate the GIN index for the search vector
op.create_index(
"idx_transcript_search_vector_en",
"transcript",
["search_vector_en"],
postgresql_using="gin",
)

View File

@@ -1,25 +0,0 @@
"""add_webvtt_field_to_transcript
Revision ID: 0bc0f3ff0111
Revises: b7df9609542c
Create Date: 2025-08-05 19:36:41.740957
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0bc0f3ff0111"
down_revision: Union[str, None] = "b7df9609542c"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("transcript", sa.Column("webvtt", sa.Text(), nullable=True))
def downgrade() -> None:
op.drop_column("transcript", "webvtt")

View File

@@ -1,36 +0,0 @@
"""remove user_id from meeting table
Revision ID: 0ce521cda2ee
Revises: 6dec9fb5b46c
Create Date: 2025-09-10 12:40:55.688899
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "0ce521cda2ee"
down_revision: Union[str, None] = "6dec9fb5b46c"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.drop_column("user_id")
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.add_column(
sa.Column("user_id", sa.VARCHAR(), autoincrement=False, nullable=True)
)
# ### end Alembic commands ###

View File

@@ -5,11 +5,11 @@ Revises: f819277e5169
Create Date: 2023-11-07 11:12:21.614198
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "0fea6d96b096"

View File

@@ -1,46 +0,0 @@
"""add_full_text_search
Revision ID: 116b2f287eab
Revises: 0bc0f3ff0111
Create Date: 2025-08-07 11:27:38.473517
"""
from typing import Sequence, Union
from alembic import op
revision: str = "116b2f287eab"
down_revision: Union[str, None] = "0bc0f3ff0111"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
if conn.dialect.name != "postgresql":
return
op.execute("""
ALTER TABLE transcript ADD COLUMN search_vector_en tsvector
GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(webvtt, '')), 'B')
) STORED
""")
op.create_index(
"idx_transcript_search_vector_en",
"transcript",
["search_vector_en"],
postgresql_using="gin",
)
def downgrade() -> None:
conn = op.get_bind()
if conn.dialect.name != "postgresql":
return
op.drop_index("idx_transcript_search_vector_en", table_name="transcript")
op.drop_column("transcript", "search_vector_en")

View File

@@ -5,26 +5,26 @@ Revises: 0fea6d96b096
Create Date: 2023-11-30 15:56:03.341466
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "125031f7cb78"
down_revision: Union[str, None] = "0fea6d96b096"
revision: str = '125031f7cb78'
down_revision: Union[str, None] = '0fea6d96b096'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("transcript", sa.Column("participants", sa.JSON(), nullable=True))
op.add_column('transcript', sa.Column('participants', sa.JSON(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("transcript", "participants")
op.drop_column('transcript', 'participants')
# ### end Alembic commands ###

View File

@@ -1,50 +0,0 @@
"""add_platform_support
Revision ID: 1e49625677e4
Revises: dc035ff72fd5
Create Date: 2025-10-08 13:17:29.943612
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "1e49625677e4"
down_revision: Union[str, None] = "dc035ff72fd5"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Add platform field with default 'whereby' for backward compatibility."""
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.add_column(
sa.Column(
"platform",
sa.String(),
nullable=False,
server_default="whereby",
)
)
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.add_column(
sa.Column(
"platform",
sa.String(),
nullable=False,
server_default="whereby",
)
)
def downgrade() -> None:
"""Remove platform field."""
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.drop_column("platform")
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.drop_column("platform")

View File

@@ -5,7 +5,6 @@ Revises: f819277e5169
Create Date: 2025-06-17 14:00:03.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
@@ -20,16 +19,16 @@ depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"meeting_consent",
sa.Column("id", sa.String(), nullable=False),
sa.Column("meeting_id", sa.String(), nullable=False),
sa.Column("user_id", sa.String(), nullable=True),
sa.Column("consent_given", sa.Boolean(), nullable=False),
sa.Column("consent_timestamp", sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(["meeting_id"], ["meeting.id"]),
'meeting_consent',
sa.Column('id', sa.String(), nullable=False),
sa.Column('meeting_id', sa.String(), nullable=False),
sa.Column('user_id', sa.String(), nullable=True),
sa.Column('consent_given', sa.Boolean(), nullable=False),
sa.Column('consent_timestamp', sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint('id'),
sa.ForeignKeyConstraint(['meeting_id'], ['meeting.id']),
)
def downgrade() -> None:
op.drop_table("meeting_consent")
op.drop_table('meeting_consent')

View File

@@ -5,7 +5,6 @@ Revises: 20250617140003
Create Date: 2025-06-18 14:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
@@ -23,4 +22,4 @@ def upgrade() -> None:
def downgrade() -> None:
op.drop_column("transcript", "audio_deleted")
op.drop_column("transcript", "audio_deleted")

View File

@@ -1,32 +0,0 @@
"""clean up orphaned room_id references in meeting table
Revision ID: 2ae3db106d4e
Revises: def1b5867d4c
Create Date: 2025-09-11 10:35:15.759967
"""
from typing import Sequence, Union
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "2ae3db106d4e"
down_revision: Union[str, None] = "def1b5867d4c"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Set room_id to NULL for meetings that reference non-existent rooms
op.execute("""
UPDATE meeting
SET room_id = NULL
WHERE room_id IS NOT NULL
AND room_id NOT IN (SELECT id FROM room WHERE id IS NOT NULL)
""")
def downgrade() -> None:
# Cannot restore orphaned references - no operation needed
pass

View File

@@ -5,40 +5,36 @@ Revises: ccd68dc784ff
Create Date: 2025-07-15 16:53:40.397394
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "2cf0b60a9d34"
down_revision: Union[str, None] = "ccd68dc784ff"
revision: str = '2cf0b60a9d34'
down_revision: Union[str, None] = 'ccd68dc784ff'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.alter_column(
"duration",
existing_type=sa.INTEGER(),
type_=sa.Float(),
existing_nullable=True,
)
with op.batch_alter_table('transcript', schema=None) as batch_op:
batch_op.alter_column('duration',
existing_type=sa.INTEGER(),
type_=sa.Float(),
existing_nullable=True)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.alter_column(
"duration",
existing_type=sa.Float(),
type_=sa.INTEGER(),
existing_nullable=True,
)
with op.batch_alter_table('transcript', schema=None) as batch_op:
batch_op.alter_column('duration',
existing_type=sa.Float(),
type_=sa.INTEGER(),
existing_nullable=True)
# ### end Alembic commands ###

View File

@@ -5,17 +5,17 @@ Revises: 9920ecfe2735
Create Date: 2023-11-02 19:53:09.116240
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table, column
from sqlalchemy import select
from sqlalchemy.sql import column, table
# revision identifiers, used by Alembic.
revision: str = "38a927dcb099"
down_revision: Union[str, None] = "9920ecfe2735"
revision: str = '38a927dcb099'
down_revision: Union[str, None] = '9920ecfe2735'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

View File

@@ -5,13 +5,13 @@ Revises: 38a927dcb099
Create Date: 2023-11-10 18:12:17.886522
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table, column
from sqlalchemy import select
from sqlalchemy.sql import column, table
# revision identifiers, used by Alembic.
revision: str = "4814901632bc"
@@ -24,10 +24,8 @@ def upgrade() -> None:
# for all the transcripts, calculate the duration from the mp3
# and update the duration column
from pathlib import Path
import av
from reflector.settings import settings
import av
bind = op.get_bind()
transcript = table(

View File

@@ -5,11 +5,14 @@ Revises:
Create Date: 2023-08-29 10:54:45.142974
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "543ed284d69a"
revision: str = '543ed284d69a'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

View File

@@ -1,50 +0,0 @@
"""add cascade delete to meeting consent foreign key
Revision ID: 5a8907fd1d78
Revises: 0ab2d7ffaa16
Create Date: 2025-08-26 17:26:50.945491
"""
from typing import Sequence, Union
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "5a8907fd1d78"
down_revision: Union[str, None] = "0ab2d7ffaa16"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
batch_op.drop_constraint(
batch_op.f("meeting_consent_meeting_id_fkey"), type_="foreignkey"
)
batch_op.create_foreign_key(
batch_op.f("meeting_consent_meeting_id_fkey"),
"meeting",
["meeting_id"],
["id"],
ondelete="CASCADE",
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
batch_op.drop_constraint(
batch_op.f("meeting_consent_meeting_id_fkey"), type_="foreignkey"
)
batch_op.create_foreign_key(
batch_op.f("meeting_consent_meeting_id_fkey"),
"meeting",
["meeting_id"],
["id"],
)
# ### end Alembic commands ###

View File

@@ -1,53 +0,0 @@
"""remove_one_active_meeting_per_room_constraint
Revision ID: 6025e9b2bef2
Revises: 2ae3db106d4e
Create Date: 2025-08-18 18:45:44.418392
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "6025e9b2bef2"
down_revision: Union[str, None] = "2ae3db106d4e"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Remove the unique constraint that prevents multiple active meetings per room
# This is needed to support calendar integration with overlapping meetings
# Check if index exists before trying to drop it
from alembic import context
if context.get_context().dialect.name == "postgresql":
conn = op.get_bind()
result = conn.execute(
sa.text(
"SELECT 1 FROM pg_indexes WHERE indexname = 'idx_one_active_meeting_per_room'"
)
)
if result.fetchone():
op.drop_index("idx_one_active_meeting_per_room", table_name="meeting")
else:
# For SQLite, just try to drop it
try:
op.drop_index("idx_one_active_meeting_per_room", table_name="meeting")
except:
pass
def downgrade() -> None:
# Restore the unique constraint
op.create_index(
"idx_one_active_meeting_per_room",
"meeting",
["room_id"],
unique=True,
postgresql_where=sa.text("is_active = true"),
sqlite_where=sa.text("is_active = 1"),
)

View File

@@ -1,28 +0,0 @@
"""webhook url and secret null by default
Revision ID: 61882a919591
Revises: 0194f65cd6d3
Create Date: 2025-08-29 11:46:36.738091
"""
from typing import Sequence, Union
# revision identifiers, used by Alembic.
revision: str = "61882a919591"
down_revision: Union[str, None] = "0194f65cd6d3"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@@ -8,8 +8,9 @@ Create Date: 2025-06-27 09:04:21.006823
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "62dea3db63a5"
@@ -32,7 +33,7 @@ def upgrade() -> None:
sa.Column("user_id", sa.String(), nullable=True),
sa.Column("room_id", sa.String(), nullable=True),
sa.Column(
"is_locked", sa.Boolean(), server_default=sa.text("false"), nullable=False
"is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False
),
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
sa.Column(
@@ -53,15 +54,12 @@ def upgrade() -> None:
sa.Column("user_id", sa.String(), nullable=False),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column(
"zulip_auto_post",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
"zulip_auto_post", sa.Boolean(), server_default=sa.text("0"), nullable=False
),
sa.Column("zulip_stream", sa.String(), nullable=True),
sa.Column("zulip_topic", sa.String(), nullable=True),
sa.Column(
"is_locked", sa.Boolean(), server_default=sa.text("false"), nullable=False
"is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False
),
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
sa.Column(

View File

@@ -1,35 +0,0 @@
"""make meeting room_id required and add foreign key
Revision ID: 6dec9fb5b46c
Revises: 61882a919591
Create Date: 2025-09-10 10:47:06.006819
"""
from typing import Sequence, Union
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "6dec9fb5b46c"
down_revision: Union[str, None] = "61882a919591"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.create_foreign_key(
None, "room", ["room_id"], ["id"], ondelete="CASCADE"
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.drop_constraint("meeting_room_id_fkey", type_="foreignkey")
# ### end Alembic commands ###

View File

@@ -20,14 +20,11 @@ depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
sourcekind_enum = sa.Enum("room", "live", "file", name="sourcekind")
sourcekind_enum.create(op.get_bind())
op.add_column(
"transcript",
sa.Column(
"source_kind",
sourcekind_enum,
sa.Enum("ROOM", "LIVE", "FILE", name="sourcekind"),
nullable=True,
),
)
@@ -46,8 +43,6 @@ def upgrade() -> None:
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("transcript", "source_kind")
sourcekind_enum = sa.Enum(name="sourcekind")
sourcekind_enum.drop(op.get_bind())
# ### end Alembic commands ###

View File

@@ -5,28 +5,26 @@ Revises: 62dea3db63a5
Create Date: 2024-09-06 14:02:06.649665
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "764ce6db4388"
down_revision: Union[str, None] = "62dea3db63a5"
revision: str = '764ce6db4388'
down_revision: Union[str, None] = '62dea3db63a5'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"transcript", sa.Column("zulip_message_id", sa.Integer(), nullable=True)
)
op.add_column('transcript', sa.Column('zulip_message_id', sa.Integer(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("transcript", "zulip_message_id")
op.drop_column('transcript', 'zulip_message_id')
# ### end Alembic commands ###

View File

@@ -1,106 +0,0 @@
"""populate_webvtt_from_topics
Revision ID: 8120ebc75366
Revises: 116b2f287eab
Create Date: 2025-08-11 19:11:01.316947
"""
import json
from typing import Sequence, Union
from alembic import op
from sqlalchemy import text
# revision identifiers, used by Alembic.
revision: str = "8120ebc75366"
down_revision: Union[str, None] = "116b2f287eab"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def topics_to_webvtt(topics):
"""Convert topics list to WebVTT format string."""
if not topics:
return None
lines = ["WEBVTT", ""]
for topic in topics:
start_time = format_timestamp(topic.get("start"))
end_time = format_timestamp(topic.get("end"))
text = topic.get("text", "").strip()
if start_time and end_time and text:
lines.append(f"{start_time} --> {end_time}")
lines.append(text)
lines.append("")
return "\n".join(lines).strip()
def format_timestamp(seconds):
"""Format seconds to WebVTT timestamp format (HH:MM:SS.mmm)."""
if seconds is None:
return None
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = seconds % 60
return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
def upgrade() -> None:
"""Populate WebVTT field for all transcripts with topics."""
# Get connection
connection = op.get_bind()
# Query all transcripts with topics
result = connection.execute(
text("SELECT id, topics FROM transcript WHERE topics IS NOT NULL")
)
rows = result.fetchall()
print(f"Found {len(rows)} transcripts with topics")
updated_count = 0
error_count = 0
for row in rows:
transcript_id = row[0]
topics_data = row[1]
if not topics_data:
continue
try:
# Parse JSON if it's a string
if isinstance(topics_data, str):
topics_data = json.loads(topics_data)
# Convert topics to WebVTT format
webvtt_content = topics_to_webvtt(topics_data)
if webvtt_content:
# Update the webvtt field
connection.execute(
text("UPDATE transcript SET webvtt = :webvtt WHERE id = :id"),
{"webvtt": webvtt_content, "id": transcript_id},
)
updated_count += 1
print(f"✓ Updated transcript {transcript_id}")
except Exception as e:
error_count += 1
print(f"✗ Error updating transcript {transcript_id}: {e}")
print(f"\nMigration complete!")
print(f" Updated: {updated_count}")
print(f" Errors: {error_count}")
def downgrade() -> None:
"""Clear WebVTT field for all transcripts."""
op.execute(text("UPDATE transcript SET webvtt = NULL"))

View File

@@ -9,6 +9,8 @@ Create Date: 2025-07-15 19:30:19.876332
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "88d292678ba2"
@@ -19,7 +21,7 @@ depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
import json
import re
from sqlalchemy import text
# Get database connection
@@ -56,9 +58,7 @@ def upgrade() -> None:
fixed_events = json.dumps(jevents)
assert "NaN" not in fixed_events
except (json.JSONDecodeError, AssertionError) as e:
print(
f"Warning: Invalid JSON for transcript {transcript_id}, skipping: {e}"
)
print(f"Warning: Invalid JSON for transcript {transcript_id}, skipping: {e}")
continue
# Update the record with fixed JSON

View File

@@ -5,13 +5,13 @@ Revises: 99365b0cd87b
Create Date: 2023-11-02 18:55:17.019498
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table, column
from sqlalchemy import select
from sqlalchemy.sql import column, table
# revision identifiers, used by Alembic.
revision: str = "9920ecfe2735"

View File

@@ -8,8 +8,8 @@ Create Date: 2023-09-01 20:19:47.216334
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "99365b0cd87b"
@@ -22,7 +22,7 @@ def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.execute(
"UPDATE transcript SET events = "
'REPLACE(events::text, \'"event": "SUMMARY"\', \'"event": "LONG_SUMMARY"\')::json;'
'REPLACE(events, \'"event": "SUMMARY"\', \'"event": "LONG_SUMMARY"\');'
)
op.alter_column("transcript", "summary", new_column_name="long_summary")
op.add_column("transcript", sa.Column("title", sa.String(), nullable=True))
@@ -34,7 +34,7 @@ def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.execute(
"UPDATE transcript SET events = "
'REPLACE(events::text, \'"event": "LONG_SUMMARY"\', \'"event": "SUMMARY"\')::json;'
'REPLACE(events, \'"event": "LONG_SUMMARY"\', \'"event": "SUMMARY"\');'
)
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.alter_column("long_summary", nullable=True, new_column_name="summary")

View File

@@ -1,121 +0,0 @@
"""datetime timezone
Revision ID: 9f5c78d352d6
Revises: 8120ebc75366
Create Date: 2025-08-13 19:18:27.113593
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "9f5c78d352d6"
down_revision: Union[str, None] = "8120ebc75366"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.alter_column(
"start_date",
existing_type=postgresql.TIMESTAMP(),
type_=sa.DateTime(timezone=True),
existing_nullable=True,
)
batch_op.alter_column(
"end_date",
existing_type=postgresql.TIMESTAMP(),
type_=sa.DateTime(timezone=True),
existing_nullable=True,
)
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
batch_op.alter_column(
"consent_timestamp",
existing_type=postgresql.TIMESTAMP(),
type_=sa.DateTime(timezone=True),
existing_nullable=False,
)
with op.batch_alter_table("recording", schema=None) as batch_op:
batch_op.alter_column(
"recorded_at",
existing_type=postgresql.TIMESTAMP(),
type_=sa.DateTime(timezone=True),
existing_nullable=False,
)
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.alter_column(
"created_at",
existing_type=postgresql.TIMESTAMP(),
type_=sa.DateTime(timezone=True),
existing_nullable=False,
)
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.alter_column(
"created_at",
existing_type=postgresql.TIMESTAMP(),
type_=sa.DateTime(timezone=True),
existing_nullable=True,
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.alter_column(
"created_at",
existing_type=sa.DateTime(timezone=True),
type_=postgresql.TIMESTAMP(),
existing_nullable=True,
)
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.alter_column(
"created_at",
existing_type=sa.DateTime(timezone=True),
type_=postgresql.TIMESTAMP(),
existing_nullable=False,
)
with op.batch_alter_table("recording", schema=None) as batch_op:
batch_op.alter_column(
"recorded_at",
existing_type=sa.DateTime(timezone=True),
type_=postgresql.TIMESTAMP(),
existing_nullable=False,
)
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
batch_op.alter_column(
"consent_timestamp",
existing_type=sa.DateTime(timezone=True),
type_=postgresql.TIMESTAMP(),
existing_nullable=False,
)
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.alter_column(
"end_date",
existing_type=sa.DateTime(timezone=True),
type_=postgresql.TIMESTAMP(),
existing_nullable=True,
)
batch_op.alter_column(
"start_date",
existing_type=sa.DateTime(timezone=True),
type_=postgresql.TIMESTAMP(),
existing_nullable=True,
)
# ### end Alembic commands ###

View File

@@ -25,7 +25,7 @@ def upgrade() -> None:
sa.Column(
"is_shared",
sa.Boolean(),
server_default=sa.text("false"),
server_default=sa.text("0"),
nullable=False,
),
)

View File

@@ -9,6 +9,8 @@ Create Date: 2025-07-15 20:09:40.253018
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "a9c9c229ee36"

View File

@@ -5,37 +5,30 @@ Revises: 6ea59639f30e
Create Date: 2025-01-28 10:06:50.446233
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "b0e5f7876032"
down_revision: Union[str, None] = "6ea59639f30e"
revision: str = 'b0e5f7876032'
down_revision: Union[str, None] = '6ea59639f30e'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.add_column(
sa.Column(
"is_active",
sa.Boolean(),
server_default=sa.text("true"),
nullable=False,
)
)
with op.batch_alter_table('meeting', schema=None) as batch_op:
batch_op.add_column(sa.Column('is_active', sa.Boolean(), server_default=sa.text('1'), nullable=False))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting", schema=None) as batch_op:
batch_op.drop_column("is_active")
with op.batch_alter_table('meeting', schema=None) as batch_op:
batch_op.drop_column('is_active')
# ### end Alembic commands ###

View File

@@ -1,41 +0,0 @@
"""add_search_optimization_indexes
Revision ID: b1c33bd09963
Revises: 9f5c78d352d6
Create Date: 2025-08-14 17:26:02.117408
"""
from typing import Sequence, Union
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "b1c33bd09963"
down_revision: Union[str, None] = "9f5c78d352d6"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Add indexes for actual search filtering patterns used in frontend
# Based on /browse page filters: room_id and source_kind
# Index for room_id + created_at (for room-specific searches with date ordering)
op.create_index(
"idx_transcript_room_id_created_at",
"transcript",
["room_id", "created_at"],
if_not_exists=True,
)
# Index for source_kind alone (actively used filter in frontend)
op.create_index(
"idx_transcript_source_kind", "transcript", ["source_kind"], if_not_exists=True
)
def downgrade() -> None:
# Remove the indexes in reverse order
op.drop_index("idx_transcript_source_kind", "transcript", if_exists=True)
op.drop_index("idx_transcript_room_id_created_at", "transcript", if_exists=True)

View File

@@ -8,8 +8,9 @@ Create Date: 2025-06-27 08:57:16.306940
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "b3df9681cae9"

View File

@@ -8,8 +8,9 @@ Create Date: 2024-10-11 13:45:28.914902
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "b469348df210"

View File

@@ -5,15 +5,15 @@ Revises: d7fbb74b673b
Create Date: 2025-07-25 16:27:06.959868
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "b7df9609542c"
down_revision: Union[str, None] = "d7fbb74b673b"
revision: str = 'b7df9609542c'
down_revision: Union[str, None] = 'd7fbb74b673b'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
@@ -22,14 +22,14 @@ def upgrade() -> None:
# Create a partial unique index that ensures only one active meeting per room
# This works for both PostgreSQL and SQLite
op.create_index(
"idx_one_active_meeting_per_room",
"meeting",
["room_id"],
'idx_one_active_meeting_per_room',
'meeting',
['room_id'],
unique=True,
postgresql_where=sa.text("is_active = true"),
sqlite_where=sa.text("is_active = 1"),
postgresql_where=sa.text('is_active = true'),
sqlite_where=sa.text('is_active = 1')
)
def downgrade() -> None:
op.drop_index("idx_one_active_meeting_per_room", table_name="meeting")
op.drop_index('idx_one_active_meeting_per_room', table_name='meeting')

View File

@@ -5,31 +5,25 @@ Revises: 125031f7cb78
Create Date: 2023-12-13 15:37:51.303970
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "b9348748bbbc"
down_revision: Union[str, None] = "125031f7cb78"
revision: str = 'b9348748bbbc'
down_revision: Union[str, None] = '125031f7cb78'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"transcript",
sa.Column(
"reviewed", sa.Boolean(), server_default=sa.text("false"), nullable=False
),
)
op.add_column('transcript', sa.Column('reviewed', sa.Boolean(), server_default=sa.text('0'), nullable=False))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("transcript", "reviewed")
op.drop_column('transcript', 'reviewed')
# ### end Alembic commands ###

View File

@@ -9,6 +9,8 @@ Create Date: 2025-07-15 11:48:42.854741
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "ccd68dc784ff"

Some files were not shown because too many files have changed in this diff Show More