Compare commits

..

1 Commits

Author SHA1 Message Date
Igor Loskutov
d428f9fa93 file upload is file 2025-08-22 11:48:21 -04:00
141 changed files with 7629 additions and 10749 deletions

View File

@@ -2,8 +2,6 @@ name: Test Database Migrations
on:
push:
branches:
- main
paths:
- "server/migrations/**"
- "server/reflector/db/**"
@@ -19,9 +17,6 @@ on:
jobs:
test-migrations:
runs-on: ubuntu-latest
concurrency:
group: db-ubuntu-latest-${{ github.ref }}
cancel-in-progress: true
services:
postgres:
image: postgres:17

View File

@@ -1,45 +0,0 @@
name: Test Next Server
on:
pull_request:
paths:
- "www/**"
push:
branches:
- main
paths:
- "www/**"
jobs:
test-next-server:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./www
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
version: 8
- name: Setup Node.js cache
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'pnpm'
cache-dependency-path: './www/pnpm-lock.yaml'
- name: Install dependencies
run: pnpm install
- name: Run tests
run: pnpm test

View File

@@ -5,17 +5,12 @@ on:
paths:
- "server/**"
push:
branches:
- main
paths:
- "server/**"
jobs:
pytest:
runs-on: ubuntu-latest
concurrency:
group: pytest-${{ github.ref }}
cancel-in-progress: true
services:
redis:
image: redis:6
@@ -35,9 +30,6 @@ jobs:
docker-amd64:
runs-on: linux-amd64
concurrency:
group: docker-amd64-${{ github.ref }}
cancel-in-progress: true
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
@@ -53,9 +45,6 @@ jobs:
docker-arm64:
runs-on: linux-arm64
concurrency:
group: docker-arm64-${{ github.ref }}
cancel-in-progress: true
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx

5
.gitignore vendored
View File

@@ -14,7 +14,4 @@ data/
www/REFACTOR.md
www/reload-frontend
server/test.sqlite
CLAUDE.local.md
www/.env.development
www/.env.production
.playwright-mcp
CLAUDE.local.md

View File

@@ -1 +0,0 @@
b9d891d3424f371642cb032ecfd0e2564470a72c:server/tests/test_transcripts_recording_deletion.py:generic-api-key:15

View File

@@ -27,8 +27,3 @@ repos:
files: ^server/
- id: ruff-format
files: ^server/
- repo: https://github.com/gitleaks/gitleaks
rev: v8.28.0
hooks:
- id: gitleaks

View File

@@ -1,57 +1,5 @@
# Changelog
## [0.9.0](https://github.com/Monadical-SAS/reflector/compare/v0.8.2...v0.9.0) (2025-09-06)
### Features
* frontend openapi react query ([#606](https://github.com/Monadical-SAS/reflector/issues/606)) ([c4d2825](https://github.com/Monadical-SAS/reflector/commit/c4d2825c81f81ad8835629fbf6ea8c7383f8c31b))
### Bug Fixes
* align whisper transcriber api with parakeet ([#602](https://github.com/Monadical-SAS/reflector/issues/602)) ([0663700](https://github.com/Monadical-SAS/reflector/commit/0663700a615a4af69a03c96c410f049e23ec9443))
* kv use tls explicit ([#610](https://github.com/Monadical-SAS/reflector/issues/610)) ([08d88ec](https://github.com/Monadical-SAS/reflector/commit/08d88ec349f38b0d13e0fa4cb73486c8dfd31836))
* source kind for file processing ([#601](https://github.com/Monadical-SAS/reflector/issues/601)) ([dc82f8b](https://github.com/Monadical-SAS/reflector/commit/dc82f8bb3bdf3ab3d4088e592a30fd63907319e1))
* token refresh locking ([#613](https://github.com/Monadical-SAS/reflector/issues/613)) ([7f5a4c9](https://github.com/Monadical-SAS/reflector/commit/7f5a4c9ddc7fd098860c8bdda2ca3b57f63ded2f))
## [0.8.2](https://github.com/Monadical-SAS/reflector/compare/v0.8.1...v0.8.2) (2025-08-29)
### Bug Fixes
* search-logspam ([#593](https://github.com/Monadical-SAS/reflector/issues/593)) ([695d1a9](https://github.com/Monadical-SAS/reflector/commit/695d1a957d4cd862753049f9beed88836cabd5ab))
## [0.8.1](https://github.com/Monadical-SAS/reflector/compare/v0.8.0...v0.8.1) (2025-08-29)
### Bug Fixes
* make webhook secret/url allowing null ([#590](https://github.com/Monadical-SAS/reflector/issues/590)) ([84a3812](https://github.com/Monadical-SAS/reflector/commit/84a381220bc606231d08d6f71d4babc818fa3c75))
## [0.8.0](https://github.com/Monadical-SAS/reflector/compare/v0.7.3...v0.8.0) (2025-08-29)
### Features
* **cleanup:** add automatic data retention for public instances ([#574](https://github.com/Monadical-SAS/reflector/issues/574)) ([6f0c7c1](https://github.com/Monadical-SAS/reflector/commit/6f0c7c1a5e751713366886c8e764c2009e12ba72))
* **rooms:** add webhook for transcript completion ([#578](https://github.com/Monadical-SAS/reflector/issues/578)) ([88ed7cf](https://github.com/Monadical-SAS/reflector/commit/88ed7cfa7804794b9b54cad4c3facc8a98cf85fd))
### Bug Fixes
* file pipeline status reporting and websocket updates ([#589](https://github.com/Monadical-SAS/reflector/issues/589)) ([9dfd769](https://github.com/Monadical-SAS/reflector/commit/9dfd76996f851cc52be54feea078adbc0816dc57))
* Igor/evaluation ([#575](https://github.com/Monadical-SAS/reflector/issues/575)) ([124ce03](https://github.com/Monadical-SAS/reflector/commit/124ce03bf86044c18313d27228a25da4bc20c9c5))
* optimize parakeet transcription batching algorithm ([#577](https://github.com/Monadical-SAS/reflector/issues/577)) ([7030e0f](https://github.com/Monadical-SAS/reflector/commit/7030e0f23649a8cf6c1eb6d5889684a41ce849ec))
## [0.7.3](https://github.com/Monadical-SAS/reflector/compare/v0.7.2...v0.7.3) (2025-08-22)
### Bug Fixes
* cleaned repo, and get git-leaks clean ([359280d](https://github.com/Monadical-SAS/reflector/commit/359280dd340433ba4402ed69034094884c825e67))
* restore previous behavior on live pipeline + audio downscaler ([#561](https://github.com/Monadical-SAS/reflector/issues/561)) ([9265d20](https://github.com/Monadical-SAS/reflector/commit/9265d201b590d23c628c5f19251b70f473859043))
## [0.7.2](https://github.com/Monadical-SAS/reflector/compare/v0.7.1...v0.7.2) (2025-08-21)

View File

@@ -1,60 +1,43 @@
<div align="center">
<img width="100" alt="image" src="https://github.com/user-attachments/assets/66fb367b-2c89-4516-9912-f47ac59c6a7f"/>
# Reflector
Reflector is an AI-powered audio transcription and meeting analysis platform that provides real-time transcription, speaker diarization, translation and summarization for audio content and live meetings. It works 100% with local models (whisper/parakeet, pyannote, seamless-m4t, and your local llm like phi-4).
Reflector Audio Management and Analysis is a cutting-edge web application under development by Monadical. It utilizes AI to record meetings, providing a permanent record with transcripts, translations, and automated summaries.
[![Tests](https://github.com/monadical-sas/reflector/actions/workflows/test_server.yml/badge.svg?branch=main&event=push)](https://github.com/monadical-sas/reflector/actions/workflows/test_server.yml)
[![Tests](https://github.com/monadical-sas/reflector/actions/workflows/pytests.yml/badge.svg?branch=main&event=push)](https://github.com/monadical-sas/reflector/actions/workflows/pytests.yml)
[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://opensource.org/licenses/MIT)
</div>
</div>
## Screenshots
<table>
<tr>
<td>
<a href="https://github.com/user-attachments/assets/21f5597c-2930-4899-a154-f7bd61a59e97">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/21f5597c-2930-4899-a154-f7bd61a59e97" />
<a href="https://github.com/user-attachments/assets/3a976930-56c1-47ef-8c76-55d3864309e3">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/3a976930-56c1-47ef-8c76-55d3864309e3" />
</a>
</td>
<td>
<a href="https://github.com/user-attachments/assets/f6b9399a-5e51-4bae-b807-59128d0a940c">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/f6b9399a-5e51-4bae-b807-59128d0a940c" />
<a href="https://github.com/user-attachments/assets/bfe3bde3-08af-4426-a9a1-11ad5cd63b33">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/bfe3bde3-08af-4426-a9a1-11ad5cd63b33" />
</a>
</td>
<td>
<a href="https://github.com/user-attachments/assets/a42ce460-c1fd-4489-a995-270516193897">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/a42ce460-c1fd-4489-a995-270516193897" />
</a>
</td>
<td>
<a href="https://github.com/user-attachments/assets/21929f6d-c309-42fe-9c11-f1299e50fbd4">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/21929f6d-c309-42fe-9c11-f1299e50fbd4" />
<a href="https://github.com/user-attachments/assets/7b60c9d0-efe4-474f-a27b-ea13bd0fabdc">
<img width="700" alt="image" src="https://github.com/user-attachments/assets/7b60c9d0-efe4-474f-a27b-ea13bd0fabdc" />
</a>
</td>
</tr>
</table>
## What is Reflector?
Reflector is a web application that utilizes local models to process audio content, providing:
- **Real-time Transcription**: Convert speech to text using [Whisper](https://github.com/openai/whisper) (multi-language) or [Parakeet](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) (English) models
- **Speaker Diarization**: Identify and label different speakers using [Pyannote](https://github.com/pyannote/pyannote-audio) 3.1
- **Live Translation**: Translate audio content in real-time to many languages with [Facebook Seamless-M4T](https://github.com/facebookresearch/seamless_communication)
- **Topic Detection & Summarization**: Extract key topics and generate concise summaries using LLMs
- **Meeting Recording**: Create permanent records of meetings with searchable transcripts
Currently we provide [modal.com](https://modal.com/) gpu template to deploy.
## Background
The project architecture consists of three primary components:
- **Back-End**: Python server that offers an API and data persistence, found in `server/`.
- **Front-End**: NextJS React project hosted on Vercel, located in `www/`.
- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations.
- **Back-End**: Python server that offers an API and data persistence, found in `server/`.
- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations. Most reliable option is Modal deployment
It also uses authentik for authentication if activated.
It also uses authentik for authentication if activated, and Vercel for deployment and configuration of the front-end.
## Contribution Guidelines
@@ -89,8 +72,6 @@ Note: We currently do not have instructions for Windows users.
## Installation
*Note: we're working toward better installation, theses instructions are not accurate for now*
### Frontend
Start with `cd www`.

View File

@@ -6,7 +6,6 @@ services:
- 1250:1250
volumes:
- ./server/:/app/
- /app/.venv
env_file:
- ./server/.env
environment:
@@ -17,7 +16,6 @@ services:
context: server
volumes:
- ./server/:/app/
- /app/.venv
env_file:
- ./server/.env
environment:
@@ -28,7 +26,6 @@ services:
context: server
volumes:
- ./server/:/app/
- /app/.venv
env_file:
- ./server/.env
environment:

View File

@@ -1,95 +0,0 @@
# Data Retention and Cleanup
## Overview
For public instances of Reflector, a data retention policy is automatically enforced to delete anonymous user data after a configurable period (default: 7 days). This ensures compliance with privacy expectations and prevents unbounded storage growth.
## Configuration
### Environment Variables
- `PUBLIC_MODE` (bool): Must be set to `true` to enable automatic cleanup
- `PUBLIC_DATA_RETENTION_DAYS` (int): Number of days to retain anonymous data (default: 7)
### What Gets Deleted
When data reaches the retention period, the following items are automatically removed:
1. **Transcripts** from anonymous users (where `user_id` is NULL):
- Database records
- Local files (audio.wav, audio.mp3, audio.json waveform)
- Storage files (cloud storage if configured)
## Automatic Cleanup
### Celery Beat Schedule
When `PUBLIC_MODE=true`, a Celery beat task runs daily at 3 AM to clean up old data:
```python
# Automatically scheduled when PUBLIC_MODE=true
"cleanup_old_public_data": {
"task": "reflector.worker.cleanup.cleanup_old_public_data",
"schedule": crontab(hour=3, minute=0), # Daily at 3 AM
}
```
### Running the Worker
Ensure both Celery worker and beat scheduler are running:
```bash
# Start Celery worker
uv run celery -A reflector.worker.app worker --loglevel=info
# Start Celery beat scheduler (in another terminal)
uv run celery -A reflector.worker.app beat
```
## Manual Cleanup
For testing or manual intervention, use the cleanup tool:
```bash
# Delete data older than 7 days (default)
uv run python -m reflector.tools.cleanup_old_data
# Delete data older than 30 days
uv run python -m reflector.tools.cleanup_old_data --days 30
```
Note: The manual tool uses the same implementation as the Celery worker task to ensure consistency.
## Important Notes
1. **User Data Deletion**: Only anonymous data (where `user_id` is NULL) is deleted. Authenticated user data is preserved.
2. **Storage Cleanup**: The system properly cleans up both local files and cloud storage when configured.
3. **Error Handling**: If individual deletions fail, the cleanup continues and logs errors. Failed deletions are reported in the task output.
4. **Public Instance Only**: The automatic cleanup task only runs when `PUBLIC_MODE=true` to prevent accidental data loss in private deployments.
## Testing
Run the cleanup tests:
```bash
uv run pytest tests/test_cleanup.py -v
```
## Monitoring
Check Celery logs for cleanup task execution:
```bash
# Look for cleanup task logs
grep "cleanup_old_public_data" celery.log
grep "Starting cleanup of old public data" celery.log
```
Task statistics are logged after each run:
- Number of transcripts deleted
- Number of meetings deleted
- Number of orphaned recordings deleted
- Any errors encountered

View File

@@ -1,194 +0,0 @@
## Reflector GPU Transcription API (Specification)
This document defines the Reflector GPU transcription API that all implementations must adhere to. Current implementations include NVIDIA Parakeet (NeMo) and Whisper (faster-whisper), both deployed on Modal.com. The API surface and response shapes are OpenAI/Whisper-compatible, so clients can switch implementations by changing only the base URL.
### Base URL and Authentication
- Example base URLs (Modal web endpoints):
- Parakeet: `https://<account>--reflector-transcriber-parakeet-web.modal.run`
- Whisper: `https://<account>--reflector-transcriber-web.modal.run`
- All endpoints are served under `/v1` and require a Bearer token:
```
Authorization: Bearer <REFLECTOR_GPU_APIKEY>
```
Note: To switch implementations, deploy the desired variant and point `TRANSCRIPT_URL` to its base URL. The API is identical.
### Supported file types
`mp3, mp4, mpeg, mpga, m4a, wav, webm`
### Models and languages
- Parakeet (NVIDIA NeMo): default `nvidia/parakeet-tdt-0.6b-v2`
- Language support: only `en`. Other languages return HTTP 400.
- Whisper (faster-whisper): default `large-v2` (or deployment-specific)
- Language support: multilingual (per Whisper model capabilities).
Note: The `model` parameter is accepted by all implementations for interface parity. Some backends may treat it as informational.
### Endpoints
#### POST /v1/audio/transcriptions
Transcribe one or more uploaded audio files.
Request: multipart/form-data
- `file` (File) — optional. Single file to transcribe.
- `files` (File[]) — optional. One or more files to transcribe.
- `model` (string) — optional. Defaults to the implementation-specific model (see above).
- `language` (string) — optional, defaults to `en`.
- Parakeet: only `en` is accepted; other values return HTTP 400
- Whisper: model-dependent; typically multilingual
- `batch` (boolean) — optional, defaults to `false`.
Notes:
- Provide either `file` or `files`, not both. If neither is provided, HTTP 400.
- `batch` requires `files`; using `batch=true` without `files` returns HTTP 400.
- Response shape for multiple files is the same regardless of `batch`.
- Files sent to this endpoint are processed in a single pass (no VAD/chunking). This is intended for short clips (roughly ≤ 30s; depends on GPU memory/model). For longer audio, prefer `/v1/audio/transcriptions-from-url` which supports VAD-based chunking.
Responses
Single file response:
```json
{
"text": "transcribed text",
"words": [
{ "word": "hello", "start": 0.0, "end": 0.5 },
{ "word": "world", "start": 0.5, "end": 1.0 }
],
"filename": "audio.mp3"
}
```
Multiple files response:
```json
{
"results": [
{"filename": "a1.mp3", "text": "...", "words": [...]},
{"filename": "a2.mp3", "text": "...", "words": [...]}]
}
```
Notes:
- Word objects always include keys: `word`, `start`, `end`.
- Some implementations may include a trailing space in `word` to match Whisper tokenization behavior; clients should trim if needed.
Example curl (single file):
```bash
curl -X POST \
-H "Authorization: Bearer $REFLECTOR_GPU_APIKEY" \
-F "file=@/path/to/audio.mp3" \
-F "language=en" \
"$BASE_URL/v1/audio/transcriptions"
```
Example curl (multiple files, batch):
```bash
curl -X POST \
-H "Authorization: Bearer $REFLECTOR_GPU_APIKEY" \
-F "files=@/path/a1.mp3" -F "files=@/path/a2.mp3" \
-F "batch=true" -F "language=en" \
"$BASE_URL/v1/audio/transcriptions"
```
#### POST /v1/audio/transcriptions-from-url
Transcribe a single remote audio file by URL.
Request: application/json
Body parameters:
- `audio_file_url` (string) — required. URL of the audio file to transcribe.
- `model` (string) — optional. Defaults to the implementation-specific model (see above).
- `language` (string) — optional, defaults to `en`. Parakeet only accepts `en`.
- `timestamp_offset` (number) — optional, defaults to `0.0`. Added to each word's `start`/`end` in the response.
```json
{
"audio_file_url": "https://example.com/audio.mp3",
"model": "nvidia/parakeet-tdt-0.6b-v2",
"language": "en",
"timestamp_offset": 0.0
}
```
Response:
```json
{
"text": "transcribed text",
"words": [
{ "word": "hello", "start": 10.0, "end": 10.5 },
{ "word": "world", "start": 10.5, "end": 11.0 }
]
}
```
Notes:
- `timestamp_offset` is added to each words `start`/`end` in the response.
- Implementations may perform VAD-based chunking and batching for long-form audio; word timings are adjusted accordingly.
Example curl:
```bash
curl -X POST \
-H "Authorization: Bearer $REFLECTOR_GPU_APIKEY" \
-H "Content-Type: application/json" \
-d '{
"audio_file_url": "https://example.com/audio.mp3",
"language": "en",
"timestamp_offset": 0
}' \
"$BASE_URL/v1/audio/transcriptions-from-url"
```
### Error handling
- 400 Bad Request
- Parakeet: `language` other than `en`
- Missing required parameters (`file`/`files` for upload; `audio_file_url` for URL endpoint)
- Unsupported file extension
- 401 Unauthorized
- Missing or invalid Bearer token
- 404 Not Found
- `audio_file_url` does not exist
### Implementation details
- GPUs: A10G for small-file/live, L40S for large-file URL transcription (subject to deployment)
- VAD chunking and segment batching; word timings adjusted and overlapping ends constrained
- Pads very short segments (< 0.5s) to avoid model crashes on some backends
### Server configuration (Reflector API)
Set the Reflector server to use the Modal backend and point `TRANSCRIPT_URL` to your chosen deployment:
```
TRANSCRIPT_BACKEND=modal
TRANSCRIPT_URL=https://<account>--reflector-transcriber-parakeet-web.modal.run
TRANSCRIPT_MODAL_API_KEY=<REFLECTOR_GPU_APIKEY>
```
### Conformance tests
Use the pytest-based conformance tests to validate any new implementation (including self-hosted) against this spec:
```
TRANSCRIPT_URL=https://<your-deployment-base> \
TRANSCRIPT_MODAL_API_KEY=your-api-key \
uv run -m pytest -m gpu_modal --no-cov server/tests/test_gpu_modal_transcript.py
```

View File

@@ -1,212 +0,0 @@
# Reflector Webhook Documentation
## Overview
Reflector supports webhook notifications to notify external systems when transcript processing is completed. Webhooks can be configured per room and are triggered automatically after a transcript is successfully processed.
## Configuration
Webhooks are configured at the room level with two fields:
- `webhook_url`: The HTTPS endpoint to receive webhook notifications
- `webhook_secret`: Optional secret key for HMAC signature verification (auto-generated if not provided)
## Events
### `transcript.completed`
Triggered when a transcript has been fully processed, including transcription, diarization, summarization, and topic detection.
### `test`
A test event that can be triggered manually to verify webhook configuration.
## Webhook Request Format
### Headers
All webhook requests include the following headers:
| Header | Description | Example |
|--------|-------------|---------|
| `Content-Type` | Always `application/json` | `application/json` |
| `User-Agent` | Identifies Reflector as the source | `Reflector-Webhook/1.0` |
| `X-Webhook-Event` | The event type | `transcript.completed` or `test` |
| `X-Webhook-Retry` | Current retry attempt number | `0`, `1`, `2`... |
| `X-Webhook-Signature` | HMAC signature (if secret configured) | `t=1735306800,v1=abc123...` |
### Signature Verification
If a webhook secret is configured, Reflector includes an HMAC-SHA256 signature in the `X-Webhook-Signature` header to verify the webhook authenticity.
The signature format is: `t={timestamp},v1={signature}`
To verify the signature:
1. Extract the timestamp and signature from the header
2. Create the signed payload: `{timestamp}.{request_body}`
3. Compute HMAC-SHA256 of the signed payload using your webhook secret
4. Compare the computed signature with the received signature
Example verification (Python):
```python
import hmac
import hashlib
def verify_webhook_signature(payload: bytes, signature_header: str, secret: str) -> bool:
# Parse header: "t=1735306800,v1=abc123..."
parts = dict(part.split("=") for part in signature_header.split(","))
timestamp = parts["t"]
received_signature = parts["v1"]
# Create signed payload
signed_payload = f"{timestamp}.{payload.decode('utf-8')}"
# Compute expected signature
expected_signature = hmac.new(
secret.encode("utf-8"),
signed_payload.encode("utf-8"),
hashlib.sha256
).hexdigest()
# Compare signatures
return hmac.compare_digest(expected_signature, received_signature)
```
## Event Payloads
### `transcript.completed` Event
This event includes a convenient URL for accessing the transcript:
- `frontend_url`: Direct link to view the transcript in the web interface
```json
{
"event": "transcript.completed",
"event_id": "transcript.completed-abc-123-def-456",
"timestamp": "2025-08-27T12:34:56.789012Z",
"transcript": {
"id": "abc-123-def-456",
"room_id": "room-789",
"created_at": "2025-08-27T12:00:00Z",
"duration": 1800.5,
"title": "Q3 Product Planning Meeting",
"short_summary": "Team discussed Q3 product roadmap, prioritizing mobile app features and API improvements.",
"long_summary": "The product team met to finalize the Q3 roadmap. Key decisions included...",
"webvtt": "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\n<v Speaker 1>Welcome everyone to today's meeting...",
"topics": [
{
"title": "Introduction and Agenda",
"summary": "Meeting kickoff with agenda review",
"timestamp": 0.0,
"duration": 120.0,
"webvtt": "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\n<v Speaker 1>Welcome everyone..."
},
{
"title": "Mobile App Features Discussion",
"summary": "Team reviewed proposed mobile app features for Q3",
"timestamp": 120.0,
"duration": 600.0,
"webvtt": "WEBVTT\n\n00:02:00.000 --> 00:02:10.000\n<v Speaker 2>Let's talk about the mobile app..."
}
],
"participants": [
{
"id": "participant-1",
"name": "John Doe",
"speaker": "Speaker 1"
},
{
"id": "participant-2",
"name": "Jane Smith",
"speaker": "Speaker 2"
}
],
"source_language": "en",
"target_language": "en",
"status": "completed",
"frontend_url": "https://app.reflector.com/transcripts/abc-123-def-456"
},
"room": {
"id": "room-789",
"name": "Product Team Room"
}
}
```
### `test` Event
```json
{
"event": "test",
"event_id": "test.2025-08-27T12:34:56.789012Z",
"timestamp": "2025-08-27T12:34:56.789012Z",
"message": "This is a test webhook from Reflector",
"room": {
"id": "room-789",
"name": "Product Team Room"
}
}
```
## Retry Policy
Webhooks are delivered with automatic retry logic to handle transient failures. When a webhook delivery fails due to server errors or network issues, Reflector will automatically retry the delivery multiple times over an extended period.
### Retry Mechanism
Reflector implements an exponential backoff strategy for webhook retries:
- **Initial retry delay**: 60 seconds after the first failure
- **Exponential backoff**: Each subsequent retry waits approximately twice as long as the previous one
- **Maximum retry interval**: 1 hour (backoff is capped at this duration)
- **Maximum retry attempts**: 30 attempts total
- **Total retry duration**: Retries continue for approximately 24 hours
### How Retries Work
When a webhook fails, Reflector will:
1. Wait 60 seconds, then retry (attempt #1)
2. If it fails again, wait ~2 minutes, then retry (attempt #2)
3. Continue doubling the wait time up to a maximum of 1 hour between attempts
4. Keep retrying at 1-hour intervals until successful or 30 attempts are exhausted
The `X-Webhook-Retry` header indicates the current retry attempt number (0 for the initial attempt, 1 for first retry, etc.), allowing your endpoint to track retry attempts.
### Retry Behavior by HTTP Status Code
| Status Code | Behavior |
|-------------|----------|
| 2xx (Success) | No retry, webhook marked as delivered |
| 4xx (Client Error) | No retry, request is considered permanently failed |
| 5xx (Server Error) | Automatic retry with exponential backoff |
| Network/Timeout Error | Automatic retry with exponential backoff |
**Important Notes:**
- Webhooks timeout after 30 seconds. If your endpoint takes longer to respond, it will be considered a timeout error and retried.
- During the retry period (~24 hours), you may receive the same webhook multiple times if your endpoint experiences intermittent failures.
- There is no mechanism to manually retry failed webhooks after the retry period expires.
## Testing Webhooks
You can test your webhook configuration before processing transcripts:
```http
POST /v1/rooms/{room_id}/webhook/test
```
Response:
```json
{
"success": true,
"status_code": 200,
"message": "Webhook test successful",
"response_preview": "OK"
}
```
Or in case of failure:
```json
{
"success": false,
"error": "Webhook request timed out (10 seconds)"
}
```

View File

@@ -1,78 +1,41 @@
import os
import sys
import tempfile
import threading
import uuid
from typing import Generator, Mapping, NamedTuple, NewType, TypedDict
from urllib.parse import urlparse
import modal
from pydantic import BaseModel
MODELS_DIR = "/models"
MODEL_NAME = "large-v2"
MODEL_COMPUTE_TYPE: str = "float16"
MODEL_NUM_WORKERS: int = 1
MINUTES = 60 # seconds
SAMPLERATE = 16000
UPLOADS_PATH = "/uploads"
CACHE_PATH = "/models"
SUPPORTED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
VAD_CONFIG = {
"batch_max_duration": 30.0,
"silence_padding": 0.5,
"window_size": 512,
}
WhisperUniqFilename = NewType("WhisperUniqFilename", str)
AudioFileExtension = NewType("AudioFileExtension", str)
volume = modal.Volume.from_name("models", create_if_missing=True)
app = modal.App("reflector-transcriber")
model_cache = modal.Volume.from_name("models", create_if_missing=True)
upload_volume = modal.Volume.from_name("whisper-uploads", create_if_missing=True)
class TimeSegment(NamedTuple):
"""Represents a time segment with start and end times."""
start: float
end: float
class AudioSegment(NamedTuple):
"""Represents an audio segment with timing and audio data."""
start: float
end: float
audio: any
class TranscriptResult(NamedTuple):
"""Represents a transcription result with text and word timings."""
text: str
words: list["WordTiming"]
class WordTiming(TypedDict):
"""Represents a word with its timing information."""
word: str
start: float
end: float
def download_model():
from faster_whisper import download_model
model_cache.reload()
volume.reload()
download_model(MODEL_NAME, cache_dir=CACHE_PATH)
download_model(MODEL_NAME, cache_dir=MODELS_DIR)
model_cache.commit()
volume.commit()
image = (
modal.Image.debian_slim(python_version="3.12")
.pip_install(
"huggingface_hub==0.27.1",
"hf-transfer==0.1.9",
"torch==2.5.1",
"faster-whisper==1.1.1",
)
.env(
{
"HF_HUB_ENABLE_HF_TRANSFER": "1",
@@ -82,98 +45,19 @@ image = (
),
}
)
.apt_install("ffmpeg")
.pip_install(
"huggingface_hub==0.27.1",
"hf-transfer==0.1.9",
"torch==2.5.1",
"faster-whisper==1.1.1",
"fastapi==0.115.12",
"requests",
"librosa==0.10.1",
"numpy<2",
"silero-vad==5.1.0",
)
.run_function(download_model, volumes={CACHE_PATH: model_cache})
.run_function(download_model, volumes={MODELS_DIR: volume})
)
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
parsed_url = urlparse(url)
url_path = parsed_url.path
for ext in SUPPORTED_FILE_EXTENSIONS:
if url_path.lower().endswith(f".{ext}"):
return AudioFileExtension(ext)
content_type = headers.get("content-type", "").lower()
if "audio/mpeg" in content_type or "audio/mp3" in content_type:
return AudioFileExtension("mp3")
if "audio/wav" in content_type:
return AudioFileExtension("wav")
if "audio/mp4" in content_type:
return AudioFileExtension("mp4")
raise ValueError(
f"Unsupported audio format for URL: {url}. "
f"Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
)
def download_audio_to_volume(
audio_file_url: str,
) -> tuple[WhisperUniqFilename, AudioFileExtension]:
import requests
from fastapi import HTTPException
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(status_code=404, detail="Audio file not found")
response = requests.get(audio_file_url, allow_redirects=True)
response.raise_for_status()
audio_suffix = detect_audio_format(audio_file_url, response.headers)
unique_filename = WhisperUniqFilename(f"{uuid.uuid4()}.{audio_suffix}")
file_path = f"{UPLOADS_PATH}/{unique_filename}"
with open(file_path, "wb") as f:
f.write(response.content)
upload_volume.commit()
return unique_filename, audio_suffix
def pad_audio(audio_array, sample_rate: int = SAMPLERATE):
"""Add 0.5s of silence if audio is shorter than the silence_padding window.
Whisper does not require this strictly, but aligning behavior with Parakeet
avoids edge-case crashes on extremely short inputs and makes comparisons easier.
"""
import numpy as np
audio_duration = len(audio_array) / sample_rate
if audio_duration < VAD_CONFIG["silence_padding"]:
silence_samples = int(sample_rate * VAD_CONFIG["silence_padding"])
silence = np.zeros(silence_samples, dtype=np.float32)
return np.concatenate([audio_array, silence])
return audio_array
@app.cls(
gpu="A10G",
timeout=5 * MINUTES,
scaledown_window=5 * MINUTES,
allow_concurrent_inputs=6,
image=image,
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
volumes={MODELS_DIR: volume},
)
@modal.concurrent(max_inputs=10)
class TranscriberWhisperLive:
"""Live transcriber class for small audio segments (A10G).
Mirrors the Parakeet live class API but uses Faster-Whisper under the hood.
"""
class Transcriber:
@modal.enter()
def enter(self):
import faster_whisper
@@ -187,200 +71,23 @@ class TranscriberWhisperLive:
device=self.device,
compute_type=MODEL_COMPUTE_TYPE,
num_workers=MODEL_NUM_WORKERS,
download_root=CACHE_PATH,
download_root=MODELS_DIR,
local_files_only=True,
)
print(f"Model is on device: {self.device}")
@modal.method()
def transcribe_segment(
self,
filename: str,
language: str = "en",
audio_data: str,
audio_suffix: str,
language: str,
):
"""Transcribe a single uploaded audio file by filename."""
upload_volume.reload()
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
with self.lock:
with NoStdStreams():
segments, _ = self.model.transcribe(
file_path,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(segment.text for segment in segments).strip()
words = [
{
"word": word.word,
"start": round(float(word.start), 2),
"end": round(float(word.end), 2),
}
for segment in segments
for word in segment.words
]
return {"text": text, "words": words}
@modal.method()
def transcribe_batch(
self,
filenames: list[str],
language: str = "en",
):
"""Transcribe multiple uploaded audio files and return per-file results."""
upload_volume.reload()
results = []
for filename in filenames:
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"Batch file not found: {file_path}")
with self.lock:
with NoStdStreams():
segments, _ = self.model.transcribe(
file_path,
language=language,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
segments = list(segments)
text = "".join(seg.text for seg in segments).strip()
words = [
{
"word": w.word,
"start": round(float(w.start), 2),
"end": round(float(w.end), 2),
}
for seg in segments
for w in seg.words
]
results.append(
{
"filename": filename,
"text": text,
"words": words,
}
)
return results
@app.cls(
gpu="L40S",
timeout=15 * MINUTES,
image=image,
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
)
class TranscriberWhisperFile:
"""File transcriber for larger/longer audio, using VAD-driven batching (L40S)."""
@modal.enter()
def enter(self):
import faster_whisper
import torch
from silero_vad import load_silero_vad
self.lock = threading.Lock()
self.use_gpu = torch.cuda.is_available()
self.device = "cuda" if self.use_gpu else "cpu"
self.model = faster_whisper.WhisperModel(
MODEL_NAME,
device=self.device,
compute_type=MODEL_COMPUTE_TYPE,
num_workers=MODEL_NUM_WORKERS,
download_root=CACHE_PATH,
local_files_only=True,
)
self.vad_model = load_silero_vad(onnx=False)
@modal.method()
def transcribe_segment(
self, filename: str, timestamp_offset: float = 0.0, language: str = "en"
):
import librosa
import numpy as np
from silero_vad import VADIterator
def vad_segments(
audio_array,
sample_rate: int = SAMPLERATE,
window_size: int = VAD_CONFIG["window_size"],
) -> Generator[TimeSegment, None, None]:
"""Generate speech segments as TimeSegment using Silero VAD."""
iterator = VADIterator(self.vad_model, sampling_rate=sample_rate)
start = None
for i in range(0, len(audio_array), window_size):
chunk = audio_array[i : i + window_size]
if len(chunk) < window_size:
chunk = np.pad(
chunk, (0, window_size - len(chunk)), mode="constant"
)
speech = iterator(chunk)
if not speech:
continue
if "start" in speech:
start = speech["start"]
continue
if "end" in speech and start is not None:
end = speech["end"]
yield TimeSegment(
start / float(SAMPLERATE), end / float(SAMPLERATE)
)
start = None
iterator.reset_states()
upload_volume.reload()
file_path = f"{UPLOADS_PATH}/{filename}"
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
audio_array, _sr = librosa.load(file_path, sr=SAMPLERATE, mono=True)
# Batch segments up to ~30s windows by merging contiguous VAD segments
merged_batches: list[TimeSegment] = []
batch_start = None
batch_end = None
max_duration = VAD_CONFIG["batch_max_duration"]
for segment in vad_segments(audio_array):
seg_start, seg_end = segment.start, segment.end
if batch_start is None:
batch_start, batch_end = seg_start, seg_end
continue
if seg_end - batch_start <= max_duration:
batch_end = seg_end
else:
merged_batches.append(TimeSegment(batch_start, batch_end))
batch_start, batch_end = seg_start, seg_end
if batch_start is not None and batch_end is not None:
merged_batches.append(TimeSegment(batch_start, batch_end))
all_text = []
all_words = []
for segment in merged_batches:
start_time, end_time = segment.start, segment.end
s_idx = int(start_time * SAMPLERATE)
e_idx = int(end_time * SAMPLERATE)
segment = audio_array[s_idx:e_idx]
segment = pad_audio(segment, SAMPLERATE)
with tempfile.NamedTemporaryFile("wb+", suffix=f".{audio_suffix}") as fp:
fp.write(audio_data)
with self.lock:
segments, _ = self.model.transcribe(
segment,
fp.name,
language=language,
beam_size=5,
word_timestamps=True,
@@ -389,220 +96,66 @@ class TranscriberWhisperFile:
)
segments = list(segments)
text = "".join(seg.text for seg in segments).strip()
text = "".join(segment.text for segment in segments)
words = [
{
"word": w.word,
"start": round(float(w.start) + start_time + timestamp_offset, 2),
"end": round(float(w.end) + start_time + timestamp_offset, 2),
}
for seg in segments
for w in seg.words
{"word": word.word, "start": word.start, "end": word.end}
for segment in segments
for word in segment.words
]
if text:
all_text.append(text)
all_words.extend(words)
return {"text": " ".join(all_text), "words": all_words}
def detect_audio_format(url: str, headers: dict) -> str:
from urllib.parse import urlparse
from fastapi import HTTPException
url_path = urlparse(url).path
for ext in SUPPORTED_FILE_EXTENSIONS:
if url_path.lower().endswith(f".{ext}"):
return ext
content_type = headers.get("content-type", "").lower()
if "audio/mpeg" in content_type or "audio/mp3" in content_type:
return "mp3"
if "audio/wav" in content_type:
return "wav"
if "audio/mp4" in content_type:
return "mp4"
raise HTTPException(
status_code=400,
detail=(
f"Unsupported audio format for URL. Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
),
)
def download_audio_to_volume(audio_file_url: str) -> tuple[str, str]:
import requests
from fastapi import HTTPException
response = requests.head(audio_file_url, allow_redirects=True)
if response.status_code == 404:
raise HTTPException(status_code=404, detail="Audio file not found")
response = requests.get(audio_file_url, allow_redirects=True)
response.raise_for_status()
audio_suffix = detect_audio_format(audio_file_url, response.headers)
unique_filename = f"{uuid.uuid4()}.{audio_suffix}"
file_path = f"{UPLOADS_PATH}/{unique_filename}"
with open(file_path, "wb") as f:
f.write(response.content)
upload_volume.commit()
return unique_filename, audio_suffix
return {"text": text, "words": words}
@app.function(
scaledown_window=60,
timeout=600,
timeout=60,
allow_concurrent_inputs=40,
secrets=[
modal.Secret.from_name("reflector-gpu"),
],
volumes={CACHE_PATH: model_cache, UPLOADS_PATH: upload_volume},
image=image,
volumes={MODELS_DIR: volume},
)
@modal.concurrent(max_inputs=40)
@modal.asgi_app()
def web():
from fastapi import (
Body,
Depends,
FastAPI,
Form,
HTTPException,
UploadFile,
status,
)
from fastapi import Body, Depends, FastAPI, HTTPException, UploadFile, status
from fastapi.security import OAuth2PasswordBearer
from typing_extensions import Annotated
transcriber_live = TranscriberWhisperLive()
transcriber_file = TranscriberWhisperFile()
transcriber = Transcriber()
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey == os.environ["REFLECTOR_GPU_APIKEY"]:
return
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
supported_file_types = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
class TranscriptResponse(dict):
pass
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
class TranscriptResponse(BaseModel):
result: dict
@app.post("/v1/audio/transcriptions", dependencies=[Depends(apikey_auth)])
def transcribe(
file: UploadFile = None,
files: list[UploadFile] | None = None,
model: str = Form(MODEL_NAME),
language: str = Form("en"),
batch: bool = Form(False),
):
if not file and not files:
raise HTTPException(
status_code=400, detail="Either 'file' or 'files' parameter is required"
)
if batch and not files:
raise HTTPException(
status_code=400, detail="Batch transcription requires 'files'"
)
file: UploadFile,
model: str = "whisper-1",
language: Annotated[str, Body(...)] = "en",
) -> TranscriptResponse:
audio_data = file.file.read()
audio_suffix = file.filename.split(".")[-1]
assert audio_suffix in supported_file_types
upload_files = [file] if file else files
uploaded_filenames: list[str] = []
for upload_file in upload_files:
audio_suffix = upload_file.filename.split(".")[-1]
if audio_suffix not in SUPPORTED_FILE_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=(
f"Unsupported audio format. Supported extensions: {', '.join(SUPPORTED_FILE_EXTENSIONS)}"
),
)
unique_filename = f"{uuid.uuid4()}.{audio_suffix}"
file_path = f"{UPLOADS_PATH}/{unique_filename}"
with open(file_path, "wb") as f:
content = upload_file.file.read()
f.write(content)
uploaded_filenames.append(unique_filename)
upload_volume.commit()
try:
if batch and len(upload_files) > 1:
func = transcriber_live.transcribe_batch.spawn(
filenames=uploaded_filenames,
language=language,
)
results = func.get()
return {"results": results}
results = []
for filename in uploaded_filenames:
func = transcriber_live.transcribe_segment.spawn(
filename=filename,
language=language,
)
result = func.get()
result["filename"] = filename
results.append(result)
return {"results": results} if len(results) > 1 else results[0]
finally:
for filename in uploaded_filenames:
try:
file_path = f"{UPLOADS_PATH}/{filename}"
os.remove(file_path)
except Exception:
pass
upload_volume.commit()
@app.post("/v1/audio/transcriptions-from-url", dependencies=[Depends(apikey_auth)])
def transcribe_from_url(
audio_file_url: str = Body(
..., description="URL of the audio file to transcribe"
),
model: str = Body(MODEL_NAME),
language: str = Body("en"),
timestamp_offset: float = Body(0.0),
):
unique_filename, _audio_suffix = download_audio_to_volume(audio_file_url)
try:
func = transcriber_file.transcribe_segment.spawn(
filename=unique_filename,
timestamp_offset=timestamp_offset,
language=language,
)
result = func.get()
return result
finally:
try:
file_path = f"{UPLOADS_PATH}/{unique_filename}"
os.remove(file_path)
upload_volume.commit()
except Exception:
pass
func = transcriber.transcribe_segment.spawn(
audio_data=audio_data,
audio_suffix=audio_suffix,
language=language,
)
result = func.get()
return result
return app
class NoStdStreams:
def __init__(self):
self.devnull = open(os.devnull, "w")
def __enter__(self):
self._stdout, self._stderr = sys.stdout, sys.stderr
self._stdout.flush()
self._stderr.flush()
sys.stdout, sys.stderr = self.devnull, self.devnull
def __exit__(self, exc_type, exc_value, traceback):
sys.stdout, sys.stderr = self._stdout, self._stderr
self.devnull.close()

View File

@@ -3,7 +3,7 @@ import os
import sys
import threading
import uuid
from typing import Generator, Mapping, NamedTuple, NewType, TypedDict
from typing import Mapping, NewType
from urllib.parse import urlparse
import modal
@@ -14,7 +14,10 @@ SAMPLERATE = 16000
UPLOADS_PATH = "/uploads"
CACHE_PATH = "/cache"
VAD_CONFIG = {
"batch_max_duration": 30.0,
"max_segment_duration": 30.0,
"batch_max_files": 10,
"batch_max_duration": 5.0,
"min_segment_duration": 0.02,
"silence_padding": 0.5,
"window_size": 512,
}
@@ -22,37 +25,6 @@ VAD_CONFIG = {
ParakeetUniqFilename = NewType("ParakeetUniqFilename", str)
AudioFileExtension = NewType("AudioFileExtension", str)
class TimeSegment(NamedTuple):
"""Represents a time segment with start and end times."""
start: float
end: float
class AudioSegment(NamedTuple):
"""Represents an audio segment with timing and audio data."""
start: float
end: float
audio: any
class TranscriptResult(NamedTuple):
"""Represents a transcription result with text and word timings."""
text: str
words: list["WordTiming"]
class WordTiming(TypedDict):
"""Represents a word with its timing information."""
word: str
start: float
end: float
app = modal.App("reflector-transcriber-parakeet")
# Volume for caching model weights
@@ -198,14 +170,12 @@ class TranscriberParakeetLive:
(output,) = self.model.transcribe([padded_audio], timestamps=True)
text = output.text.strip()
words: list[WordTiming] = [
WordTiming(
# XXX the space added here is to match the output of whisper
# whisper add space to each words, while parakeet don't
word=word_info["word"] + " ",
start=round(word_info["start"], 2),
end=round(word_info["end"], 2),
)
words = [
{
"word": word_info["word"],
"start": round(word_info["start"], 2),
"end": round(word_info["end"], 2),
}
for word_info in output.timestamp["word"]
]
@@ -241,12 +211,12 @@ class TranscriberParakeetLive:
for i, (filename, output) in enumerate(zip(filenames, outputs)):
text = output.text.strip()
words: list[WordTiming] = [
WordTiming(
word=word_info["word"] + " ",
start=round(word_info["start"], 2),
end=round(word_info["end"], 2),
)
words = [
{
"word": word_info["word"],
"start": round(word_info["start"], 2),
"end": round(word_info["end"], 2),
}
for word_info in output.timestamp["word"]
]
@@ -301,9 +271,7 @@ class TranscriberParakeetFile:
audio_array, sample_rate = librosa.load(file_path, sr=SAMPLERATE, mono=True)
return audio_array
def vad_segment_generator(
audio_array,
) -> Generator[TimeSegment, None, None]:
def vad_segment_generator(audio_array):
"""Generate speech segments using VAD with start/end sample indices"""
vad_iterator = VADIterator(self.vad_model, sampling_rate=SAMPLERATE)
window_size = VAD_CONFIG["window_size"]
@@ -329,121 +297,107 @@ class TranscriberParakeetFile:
start_time = start / float(SAMPLERATE)
end_time = end / float(SAMPLERATE)
yield TimeSegment(start_time, end_time)
# Extract the actual audio segment
audio_segment = audio_array[start:end]
yield (start_time, end_time, audio_segment)
start = None
vad_iterator.reset_states()
def batch_speech_segments(
segments: Generator[TimeSegment, None, None], max_duration: int
) -> Generator[TimeSegment, None, None]:
"""
Input segments:
[0-2] [3-5] [6-8] [10-11] [12-15] [17-19] [20-22]
def vad_segment_filter(segments):
"""Filter VAD segments by duration and chunk large segments"""
min_dur = VAD_CONFIG["min_segment_duration"]
max_dur = VAD_CONFIG["max_segment_duration"]
↓ (max_duration=10)
for start_time, end_time, audio_segment in segments:
segment_duration = end_time - start_time
Output batches:
[0-8] [10-19] [20-22]
Note: silences are kept for better transcription, previous implementation was
passing segments separatly, but the output was less accurate.
"""
batch_start_time = None
batch_end_time = None
for segment in segments:
start_time, end_time = segment.start, segment.end
if batch_start_time is None or batch_end_time is None:
batch_start_time = start_time
batch_end_time = end_time
# Skip very small segments
if segment_duration < min_dur:
continue
total_duration = end_time - batch_start_time
if total_duration <= max_duration:
batch_end_time = end_time
# If segment is within max duration, yield as-is
if segment_duration <= max_dur:
yield (start_time, end_time, audio_segment)
continue
yield TimeSegment(batch_start_time, batch_end_time)
batch_start_time = start_time
batch_end_time = end_time
# Chunk large segments into smaller pieces
chunk_samples = int(max_dur * SAMPLERATE)
current_start = start_time
if batch_start_time is None or batch_end_time is None:
return
for chunk_offset in range(0, len(audio_segment), chunk_samples):
chunk_audio = audio_segment[
chunk_offset : chunk_offset + chunk_samples
]
if len(chunk_audio) == 0:
break
yield TimeSegment(batch_start_time, batch_end_time)
chunk_duration = len(chunk_audio) / float(SAMPLERATE)
chunk_end = current_start + chunk_duration
def batch_segment_to_audio_segment(
segments: Generator[TimeSegment, None, None],
audio_array,
) -> Generator[AudioSegment, None, None]:
"""Extract audio segments and apply padding for Parakeet compatibility.
# Only yield chunks that meet minimum duration
if chunk_duration >= min_dur:
yield (current_start, chunk_end, chunk_audio)
Uses pad_audio to ensure segments are at least 0.5s long, preventing
Parakeet crashes. This padding may cause slight timing overlaps between
segments, which are corrected by enforce_word_timing_constraints.
"""
for segment in segments:
start_time, end_time = segment.start, segment.end
start_sample = int(start_time * SAMPLERATE)
end_sample = int(end_time * SAMPLERATE)
audio_segment = audio_array[start_sample:end_sample]
current_start = chunk_end
padded_segment = pad_audio(audio_segment, SAMPLERATE)
def batch_segments(segments, max_files=10, max_duration=5.0):
batch = []
batch_duration = 0.0
yield AudioSegment(start_time, end_time, padded_segment)
for start_time, end_time, audio_segment in segments:
segment_duration = end_time - start_time
def transcribe_batch(model, audio_segments: list) -> list:
if segment_duration < VAD_CONFIG["silence_padding"]:
silence_samples = int(
(VAD_CONFIG["silence_padding"] - segment_duration) * SAMPLERATE
)
padding = np.zeros(silence_samples, dtype=np.float32)
audio_segment = np.concatenate([audio_segment, padding])
segment_duration = VAD_CONFIG["silence_padding"]
batch.append((start_time, end_time, audio_segment))
batch_duration += segment_duration
if len(batch) >= max_files or batch_duration >= max_duration:
yield batch
batch = []
batch_duration = 0.0
if batch:
yield batch
def transcribe_batch(model, audio_segments):
with NoStdStreams():
outputs = model.transcribe(audio_segments, timestamps=True)
return outputs
def enforce_word_timing_constraints(
words: list[WordTiming],
) -> list[WordTiming]:
"""Enforce that word end times don't exceed the start time of the next word.
Due to silence padding added in batch_segment_to_audio_segment for better
transcription accuracy, word timings from different segments may overlap.
This function ensures there are no overlaps by adjusting end times.
"""
if len(words) <= 1:
return words
enforced_words = []
for i, word in enumerate(words):
enforced_word = word.copy()
if i < len(words) - 1:
next_start = words[i + 1]["start"]
if enforced_word["end"] > next_start:
enforced_word["end"] = next_start
enforced_words.append(enforced_word)
return enforced_words
def emit_results(
results: list,
segments_info: list[AudioSegment],
) -> Generator[TranscriptResult, None, None]:
results,
segments_info,
batch_index,
total_batches,
):
"""Yield transcribed text and word timings from model output, adjusting timestamps to absolute positions."""
for i, (output, segment) in enumerate(zip(results, segments_info)):
start_time, end_time = segment.start, segment.end
for i, (output, (start_time, end_time, _)) in enumerate(
zip(results, segments_info)
):
text = output.text.strip()
words: list[WordTiming] = [
WordTiming(
word=word_info["word"] + " ",
start=round(
words = [
{
"word": word_info["word"],
"start": round(
word_info["start"] + start_time + timestamp_offset, 2
),
end=round(word_info["end"] + start_time + timestamp_offset, 2),
)
"end": round(
word_info["end"] + start_time + timestamp_offset, 2
),
}
for word_info in output.timestamp["word"]
]
yield TranscriptResult(text, words)
yield text, words
upload_volume.reload()
@@ -453,31 +407,41 @@ class TranscriberParakeetFile:
audio_array = load_and_convert_audio(file_path)
total_duration = len(audio_array) / float(SAMPLERATE)
processed_duration = 0.0
all_text_parts: list[str] = []
all_words: list[WordTiming] = []
all_text_parts = []
all_words = []
raw_segments = vad_segment_generator(audio_array)
speech_segments = batch_speech_segments(
raw_segments,
filtered_segments = vad_segment_filter(raw_segments)
batches = batch_segments(
filtered_segments,
VAD_CONFIG["batch_max_files"],
VAD_CONFIG["batch_max_duration"],
)
audio_segments = batch_segment_to_audio_segment(speech_segments, audio_array)
for batch in audio_segments:
audio_segment = batch.audio
results = transcribe_batch(self.model, [audio_segment])
batch_index = 0
total_batches = max(
1, int(total_duration / VAD_CONFIG["batch_max_duration"]) + 1
)
for result in emit_results(
for batch in batches:
batch_index += 1
audio_segments = [seg[2] for seg in batch]
results = transcribe_batch(self.model, audio_segments)
for text, words in emit_results(
results,
[batch],
batch,
batch_index,
total_batches,
):
if not result.text:
if not text:
continue
all_text_parts.append(result.text)
all_words.extend(result.words)
all_text_parts.append(text)
all_words.extend(words)
all_words = enforce_word_timing_constraints(all_words)
processed_duration += sum(len(seg[2]) / float(SAMPLERATE) for seg in batch)
combined_text = " ".join(all_text_parts)
return {"text": combined_text, "words": all_words}

View File

@@ -1,36 +0,0 @@
"""Add webhook fields to rooms
Revision ID: 0194f65cd6d3
Revises: 5a8907fd1d78
Create Date: 2025-08-27 09:03:19.610995
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "0194f65cd6d3"
down_revision: Union[str, None] = "5a8907fd1d78"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.add_column(sa.Column("webhook_url", sa.String(), nullable=True))
batch_op.add_column(sa.Column("webhook_secret", sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("room", schema=None) as batch_op:
batch_op.drop_column("webhook_secret")
batch_op.drop_column("webhook_url")
# ### end Alembic commands ###

View File

@@ -1,50 +0,0 @@
"""add cascade delete to meeting consent foreign key
Revision ID: 5a8907fd1d78
Revises: 0ab2d7ffaa16
Create Date: 2025-08-26 17:26:50.945491
"""
from typing import Sequence, Union
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "5a8907fd1d78"
down_revision: Union[str, None] = "0ab2d7ffaa16"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
batch_op.drop_constraint(
batch_op.f("meeting_consent_meeting_id_fkey"), type_="foreignkey"
)
batch_op.create_foreign_key(
batch_op.f("meeting_consent_meeting_id_fkey"),
"meeting",
["meeting_id"],
["id"],
ondelete="CASCADE",
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
batch_op.drop_constraint(
batch_op.f("meeting_consent_meeting_id_fkey"), type_="foreignkey"
)
batch_op.create_foreign_key(
batch_op.f("meeting_consent_meeting_id_fkey"),
"meeting",
["meeting_id"],
["id"],
)
# ### end Alembic commands ###

View File

@@ -1,28 +0,0 @@
"""webhook url and secret null by default
Revision ID: 61882a919591
Revises: 0194f65cd6d3
Create Date: 2025-08-29 11:46:36.738091
"""
from typing import Sequence, Union
# revision identifiers, used by Alembic.
revision: str = "61882a919591"
down_revision: Union[str, None] = "0194f65cd6d3"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@@ -1,27 +0,0 @@
import asyncio
import functools
from reflector.db import get_database
def asynctask(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
async def run_with_db():
database = get_database()
await database.connect()
try:
return await f(*args, **kwargs)
finally:
await database.disconnect()
coro = run_with_db()
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
return loop.run_until_complete(coro)
return asyncio.run(coro)
return wrapper

View File

@@ -54,12 +54,7 @@ meeting_consent = sa.Table(
"meeting_consent",
metadata,
sa.Column("id", sa.String, primary_key=True),
sa.Column(
"meeting_id",
sa.String,
sa.ForeignKey("meeting.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id"), nullable=False),
sa.Column("user_id", sa.String),
sa.Column("consent_given", sa.Boolean, nullable=False),
sa.Column("consent_timestamp", sa.DateTime(timezone=True), nullable=False),

View File

@@ -1,4 +1,3 @@
import secrets
from datetime import datetime, timezone
from sqlite3 import IntegrityError
from typing import Literal
@@ -41,8 +40,6 @@ rooms = sqlalchemy.Table(
sqlalchemy.Column(
"is_shared", sqlalchemy.Boolean, nullable=False, server_default=false()
),
sqlalchemy.Column("webhook_url", sqlalchemy.String, nullable=True),
sqlalchemy.Column("webhook_secret", sqlalchemy.String, nullable=True),
sqlalchemy.Index("idx_room_is_shared", "is_shared"),
)
@@ -62,8 +59,6 @@ class Room(BaseModel):
"none", "prompt", "automatic", "automatic-2nd-participant"
] = "automatic-2nd-participant"
is_shared: bool = False
webhook_url: str | None = None
webhook_secret: str | None = None
class RoomController:
@@ -112,15 +107,10 @@ class RoomController:
recording_type: str,
recording_trigger: str,
is_shared: bool,
webhook_url: str = "",
webhook_secret: str = "",
):
"""
Add a new room
"""
if webhook_url and not webhook_secret:
webhook_secret = secrets.token_urlsafe(32)
room = Room(
name=name,
user_id=user_id,
@@ -132,8 +122,6 @@ class RoomController:
recording_type=recording_type,
recording_trigger=recording_trigger,
is_shared=is_shared,
webhook_url=webhook_url,
webhook_secret=webhook_secret,
)
query = rooms.insert().values(**room.model_dump())
try:
@@ -146,9 +134,6 @@ class RoomController:
"""
Update a room fields with key/values in values
"""
if values.get("webhook_url") and not values.get("webhook_secret"):
values["webhook_secret"] = secrets.token_urlsafe(32)
query = rooms.update().where(rooms.c.id == room.id).values(**values)
try:
await get_database().execute(query)

View File

@@ -8,14 +8,12 @@ from typing import Annotated, Any, Dict, Iterator
import sqlalchemy
import webvtt
from databases.interfaces import Record as DbRecord
from fastapi import HTTPException
from pydantic import (
BaseModel,
Field,
NonNegativeFloat,
NonNegativeInt,
TypeAdapter,
ValidationError,
constr,
field_serializer,
@@ -26,7 +24,6 @@ from reflector.db.rooms import rooms
from reflector.db.transcripts import SourceKind, transcripts
from reflector.db.utils import is_postgresql
from reflector.logger import logger
from reflector.utils.string import NonEmptyString, try_parse_non_empty_string
DEFAULT_SEARCH_LIMIT = 20
SNIPPET_CONTEXT_LENGTH = 50 # Characters before/after match to include
@@ -34,13 +31,12 @@ DEFAULT_SNIPPET_MAX_LENGTH = NonNegativeInt(150)
DEFAULT_MAX_SNIPPETS = NonNegativeInt(3)
LONG_SUMMARY_MAX_SNIPPETS = 2
SearchQueryBase = constr(min_length=1, strip_whitespace=True)
SearchQueryBase = constr(min_length=0, strip_whitespace=True)
SearchLimitBase = Annotated[int, Field(ge=1, le=100)]
SearchOffsetBase = Annotated[int, Field(ge=0)]
SearchTotalBase = Annotated[int, Field(ge=0)]
SearchQuery = Annotated[SearchQueryBase, Field(description="Search query text")]
search_query_adapter = TypeAdapter(SearchQuery)
SearchLimit = Annotated[SearchLimitBase, Field(description="Results per page")]
SearchOffset = Annotated[
SearchOffsetBase, Field(description="Number of results to skip")
@@ -92,7 +88,7 @@ class WebVTTProcessor:
@staticmethod
def generate_snippets(
webvtt_content: WebVTTContent,
query: SearchQuery,
query: str,
max_snippets: NonNegativeInt = DEFAULT_MAX_SNIPPETS,
) -> list[str]:
"""Generate snippets from WebVTT content."""
@@ -129,7 +125,7 @@ class SnippetCandidate:
class SearchParameters(BaseModel):
"""Validated search parameters for full-text search."""
query_text: SearchQuery | None = None
query_text: SearchQuery
limit: SearchLimit = DEFAULT_SEARCH_LIMIT
offset: SearchOffset = 0
user_id: str | None = None
@@ -203,13 +199,15 @@ class SnippetGenerator:
prev_start = start
@staticmethod
def count_matches(text: str, query: SearchQuery) -> NonNegativeInt:
def count_matches(text: str, query: str) -> NonNegativeInt:
"""Count total number of matches for a query in text."""
ZERO = NonNegativeInt(0)
if not text:
logger.warning("Empty text for search query in count_matches")
return ZERO
assert query is not None
if not query:
logger.warning("Empty query for search text in count_matches")
return ZERO
return NonNegativeInt(
sum(1 for _ in SnippetGenerator.find_all_matches(text, query))
)
@@ -245,14 +243,13 @@ class SnippetGenerator:
@staticmethod
def generate(
text: str,
query: SearchQuery,
query: str,
max_length: NonNegativeInt = DEFAULT_SNIPPET_MAX_LENGTH,
max_snippets: NonNegativeInt = DEFAULT_MAX_SNIPPETS,
) -> list[str]:
"""Generate snippets from text."""
assert query is not None
if not text:
logger.warning("Empty text for generate_snippets")
if not text or not query:
logger.warning("Empty text or query for generate_snippets")
return []
candidates = (
@@ -273,7 +270,7 @@ class SnippetGenerator:
@staticmethod
def from_summary(
summary: str,
query: SearchQuery,
query: str,
max_snippets: NonNegativeInt = LONG_SUMMARY_MAX_SNIPPETS,
) -> list[str]:
"""Generate snippets from summary text."""
@@ -281,9 +278,9 @@ class SnippetGenerator:
@staticmethod
def combine_sources(
summary: NonEmptyString | None,
summary: str | None,
webvtt: WebVTTContent | None,
query: SearchQuery,
query: str,
max_total: NonNegativeInt = DEFAULT_MAX_SNIPPETS,
) -> tuple[list[str], NonNegativeInt]:
"""Combine snippets from multiple sources and return total match count.
@@ -292,11 +289,6 @@ class SnippetGenerator:
snippets can be empty for real in case of e.g. title match
"""
assert (
summary is not None or webvtt is not None
), "At least one source must be present"
webvtt_matches = 0
summary_matches = 0
@@ -363,8 +355,8 @@ class SearchController:
else_=rooms.c.name,
).label("room_name"),
]
search_query = None
if params.query_text is not None:
if params.query_text:
search_query = sqlalchemy.func.websearch_to_tsquery(
"english", params.query_text
)
@@ -381,9 +373,7 @@ class SearchController:
transcripts.join(rooms, transcripts.c.room_id == rooms.c.id, isouter=True)
)
if params.query_text is not None:
# because already initialized based on params.query_text presence above
assert search_query is not None
if params.query_text:
base_query = base_query.where(
transcripts.c.search_vector_en.op("@@")(search_query)
)
@@ -403,7 +393,7 @@ class SearchController:
transcripts.c.source_kind == params.source_kind
)
if params.query_text is not None:
if params.query_text:
order_by = sqlalchemy.desc(sqlalchemy.text("rank"))
else:
order_by = sqlalchemy.desc(transcripts.c.created_at)
@@ -417,29 +407,19 @@ class SearchController:
)
total = await get_database().fetch_val(count_query)
def _process_result(r: DbRecord) -> SearchResult:
def _process_result(r) -> SearchResult:
r_dict: Dict[str, Any] = dict(r)
webvtt_raw: str | None = r_dict.pop("webvtt", None)
webvtt: WebVTTContent | None
if webvtt_raw:
webvtt = WebVTTProcessor.parse(webvtt_raw)
else:
webvtt = None
long_summary_r: str | None = r_dict.pop("long_summary", None)
long_summary: NonEmptyString = try_parse_non_empty_string(long_summary_r)
long_summary: str | None = r_dict.pop("long_summary", None)
room_name: str | None = r_dict.pop("room_name", None)
db_result = SearchResultDB.model_validate(r_dict)
at_least_one_source = webvtt is not None or long_summary is not None
has_query = params.query_text is not None
snippets, total_match_count = (
SnippetGenerator.combine_sources(
long_summary, webvtt, params.query_text, DEFAULT_MAX_SNIPPETS
)
if has_query and at_least_one_source
else ([], 0)
snippets, total_match_count = SnippetGenerator.combine_sources(
long_summary, webvtt, params.query_text, DEFAULT_MAX_SNIPPETS
)
return SearchResult(

View File

@@ -122,15 +122,6 @@ def generate_transcript_name() -> str:
return f"Transcript {now.strftime('%Y-%m-%d %H:%M:%S')}"
TranscriptStatus = Literal[
"idle", "uploaded", "recording", "processing", "error", "ended"
]
class StrValue(BaseModel):
value: str
class AudioWaveform(BaseModel):
data: list[float]
@@ -194,7 +185,7 @@ class Transcript(BaseModel):
id: str = Field(default_factory=generate_uuid4)
user_id: str | None = None
name: str = Field(default_factory=generate_transcript_name)
status: TranscriptStatus = "idle"
status: str = "idle"
duration: float = 0
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
title: str | None = None
@@ -741,27 +732,5 @@ class TranscriptController:
transcript.delete_participant(participant_id)
await self.update(transcript, {"participants": transcript.participants_dump()})
async def set_status(
self, transcript_id: str, status: TranscriptStatus
) -> TranscriptEvent | None:
"""
Update the status of a transcript
Will add an event STATUS + update the status field of transcript
"""
async with self.transaction():
transcript = await self.get_by_id(transcript_id)
if not transcript:
raise Exception(f"Transcript {transcript_id} not found")
if transcript.status == status:
return
resp = await self.append_event(
transcript=transcript,
event="STATUS",
data=StrValue(value=status),
)
await self.update(transcript, {"status": status})
return resp
transcripts_controller = TranscriptController()

View File

@@ -7,26 +7,18 @@ Uses parallel processing for transcription, diarization, and waveform generation
"""
import asyncio
import uuid
from pathlib import Path
import av
import structlog
from celery import shared_task
from reflector.asynctask import asynctask
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import (
SourceKind,
Transcript,
TranscriptStatus,
transcripts_controller,
)
from reflector.logger import logger
from reflector.pipelines.main_live_pipeline import (
PipelineMainBase,
broadcast_to_sockets,
)
from reflector.pipelines.main_live_pipeline import PipelineMainBase, asynctask
from reflector.processors import (
AudioFileWriterProcessor,
TranscriptFinalSummaryProcessor,
@@ -51,7 +43,6 @@ from reflector.processors.types import (
)
from reflector.settings import settings
from reflector.storage import get_transcripts_storage
from reflector.worker.webhook import send_transcript_webhook
class EmptyPipeline:
@@ -92,27 +83,12 @@ class PipelineMainFile(PipelineMainBase):
exc_info=result,
)
@broadcast_to_sockets
async def set_status(self, transcript_id: str, status: TranscriptStatus):
async with self.lock_transaction():
return await transcripts_controller.set_status(transcript_id, status)
async def process(self, file_path: Path):
"""Main entry point for file processing"""
self.logger.info(f"Starting file pipeline for {file_path}")
transcript = await self.get_transcript()
# Clear transcript as we're going to regenerate everything
async with self.transaction():
await transcripts_controller.update(
transcript,
{
"events": [],
"topics": [],
},
)
# Extract audio and write to transcript location
audio_path = await self.extract_and_write_audio(file_path, transcript)
@@ -129,8 +105,6 @@ class PipelineMainFile(PipelineMainBase):
self.logger.info("File pipeline complete")
await transcripts_controller.set_status(transcript.id, "ended")
async def extract_and_write_audio(
self, file_path: Path, transcript: Transcript
) -> Path:
@@ -388,34 +362,14 @@ async def task_pipeline_file_process(*, transcript_id: str):
if not transcript:
raise Exception(f"Transcript {transcript_id} not found")
# Find the file to process
audio_file = next(transcript.data_path.glob("upload.*"), None)
if not audio_file:
audio_file = next(transcript.data_path.glob("audio.*"), None)
if not audio_file:
raise Exception("No audio file found to process")
# Run file pipeline
pipeline = PipelineMainFile(transcript_id=transcript_id)
try:
await pipeline.set_status(transcript_id, "processing")
# Find the file to process
audio_file = next(transcript.data_path.glob("upload.*"), None)
if not audio_file:
audio_file = next(transcript.data_path.glob("audio.*"), None)
if not audio_file:
raise Exception("No audio file found to process")
await pipeline.process(audio_file)
except Exception:
await pipeline.set_status(transcript_id, "error")
raise
# Trigger webhook if this is a room recording with webhook configured
if transcript.source_kind == SourceKind.ROOM and transcript.room_id:
room = await rooms_controller.get_by_id(transcript.room_id)
if room and room.webhook_url:
logger.info(
"Dispatching webhook task",
transcript_id=transcript_id,
room_id=room.id,
webhook_url=room.webhook_url,
)
send_transcript_webhook.delay(
transcript_id, room.id, event_id=uuid.uuid4().hex
)
await pipeline.process(audio_file)

View File

@@ -22,7 +22,7 @@ from celery import chord, current_task, group, shared_task
from pydantic import BaseModel
from structlog import BoundLogger as Logger
from reflector.asynctask import asynctask
from reflector.db import get_database
from reflector.db.meetings import meeting_consent_controller, meetings_controller
from reflector.db.recordings import recordings_controller
from reflector.db.rooms import rooms_controller
@@ -32,7 +32,6 @@ from reflector.db.transcripts import (
TranscriptFinalLongSummary,
TranscriptFinalShortSummary,
TranscriptFinalTitle,
TranscriptStatus,
TranscriptText,
TranscriptTopic,
TranscriptWaveform,
@@ -41,9 +40,8 @@ from reflector.db.transcripts import (
from reflector.logger import logger
from reflector.pipelines.runner import PipelineMessage, PipelineRunner
from reflector.processors import (
AudioChunkerAutoProcessor,
AudioChunkerProcessor,
AudioDiarizationAutoProcessor,
AudioDownscaleProcessor,
AudioFileWriterProcessor,
AudioMergeProcessor,
AudioTranscriptAutoProcessor,
@@ -70,6 +68,29 @@ from reflector.zulip import (
)
def asynctask(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
async def run_with_db():
database = get_database()
await database.connect()
try:
return await f(*args, **kwargs)
finally:
await database.disconnect()
coro = run_with_db()
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
return loop.run_until_complete(coro)
return asyncio.run(coro)
return wrapper
def broadcast_to_sockets(func):
"""
Decorator to broadcast transcript event to websockets
@@ -165,16 +186,9 @@ class PipelineMainBase(PipelineRunner[PipelineMessage], Generic[PipelineMessage]
for topic in topics
]
@asynccontextmanager
async def lock_transaction(self):
# This lock is to prevent multiple processor starting adding
# into event array at the same time
async with self._lock:
yield
@asynccontextmanager
async def transaction(self):
async with self.lock_transaction():
async with self._lock:
async with transcripts_controller.transaction():
yield
@@ -183,14 +197,14 @@ class PipelineMainBase(PipelineRunner[PipelineMessage], Generic[PipelineMessage]
# if it's the first part, update the status of the transcript
# but do not set the ended status yet.
if isinstance(self, PipelineMainLive):
status_mapping: dict[str, TranscriptStatus] = {
status_mapping = {
"started": "recording",
"push": "recording",
"flush": "processing",
"error": "error",
}
elif isinstance(self, PipelineMainFinalSummaries):
status_mapping: dict[str, TranscriptStatus] = {
status_mapping = {
"push": "processing",
"flush": "processing",
"error": "error",
@@ -206,8 +220,22 @@ class PipelineMainBase(PipelineRunner[PipelineMessage], Generic[PipelineMessage]
return
# when the status of the pipeline changes, update the transcript
async with self._lock:
return await transcripts_controller.set_status(self.transcript_id, status)
async with self.transaction():
transcript = await self.get_transcript()
if status == transcript.status:
return
resp = await transcripts_controller.append_event(
transcript=transcript,
event="STATUS",
data=StrValue(value=status),
)
await transcripts_controller.update(
transcript,
{
"status": status,
},
)
return resp
@broadcast_to_sockets
async def on_transcript(self, data):
@@ -337,8 +365,7 @@ class PipelineMainLive(PipelineMainBase):
path=transcript.audio_wav_filename,
on_duration=self.on_duration,
),
AudioDownscaleProcessor(),
AudioChunkerAutoProcessor(),
AudioChunkerProcessor(),
AudioMergeProcessor(),
AudioTranscriptAutoProcessor.as_threaded(),
TranscriptLinerProcessor(),
@@ -765,7 +792,7 @@ def pipeline_post(*, transcript_id: str):
chain_final_summaries,
) | task_pipeline_post_to_zulip.si(transcript_id=transcript_id)
return chain.delay()
chain.delay()
@get_transcript

View File

@@ -1,7 +1,5 @@
from .audio_chunker import AudioChunkerProcessor # noqa: F401
from .audio_chunker_auto import AudioChunkerAutoProcessor # noqa: F401
from .audio_diarization_auto import AudioDiarizationAutoProcessor # noqa: F401
from .audio_downscale import AudioDownscaleProcessor # noqa: F401
from .audio_file_writer import AudioFileWriterProcessor # noqa: F401
from .audio_merge import AudioMergeProcessor # noqa: F401
from .audio_transcript import AudioTranscriptProcessor # noqa: F401

View File

@@ -1,78 +1,340 @@
from typing import Optional
import av
from prometheus_client import Counter, Histogram
import numpy as np
import torch
from silero_vad import VADIterator, load_silero_vad
from reflector.processors.base import Processor
class AudioChunkerProcessor(Processor):
"""
Base class for assembling audio frames into chunks
Assemble audio frames into chunks with VAD-based speech detection
"""
INPUT_TYPE = av.AudioFrame
OUTPUT_TYPE = list[av.AudioFrame]
m_chunk = Histogram(
"audio_chunker",
"Time spent in AudioChunker.chunk",
["backend"],
)
m_chunk_call = Counter(
"audio_chunker_call",
"Number of calls to AudioChunker.chunk",
["backend"],
)
m_chunk_success = Counter(
"audio_chunker_success",
"Number of successful calls to AudioChunker.chunk",
["backend"],
)
m_chunk_failure = Counter(
"audio_chunker_failure",
"Number of failed calls to AudioChunker.chunk",
["backend"],
)
def __init__(self, *args, **kwargs):
name = self.__class__.__name__
self.m_chunk = self.m_chunk.labels(name)
self.m_chunk_call = self.m_chunk_call.labels(name)
self.m_chunk_success = self.m_chunk_success.labels(name)
self.m_chunk_failure = self.m_chunk_failure.labels(name)
super().__init__(*args, **kwargs)
def __init__(
self,
block_frames=256,
max_frames=1024,
vad_threshold=0.5,
use_onnx=False,
min_frames=2,
):
super().__init__()
self.frames: list[av.AudioFrame] = []
self.block_frames = block_frames
self.max_frames = max_frames
self.vad_threshold = vad_threshold
self.min_frames = min_frames
# Initialize Silero VAD
self._init_vad(use_onnx)
def _init_vad(self, use_onnx=False):
"""Initialize Silero VAD model"""
try:
torch.set_num_threads(1)
self.vad_model = load_silero_vad(onnx=use_onnx)
self.vad_iterator = VADIterator(self.vad_model, sampling_rate=16000)
self.logger.info("Silero VAD initialized successfully")
except Exception as e:
self.logger.error(f"Failed to initialize Silero VAD: {e}")
self.vad_model = None
self.vad_iterator = None
async def _push(self, data: av.AudioFrame):
"""Process incoming audio frame"""
# Validate audio format on first frame
if len(self.frames) == 0:
if data.sample_rate != 16000 or len(data.layout.channels) != 1:
raise ValueError(
f"AudioChunkerProcessor expects 16kHz mono audio, got {data.sample_rate}Hz "
f"with {len(data.layout.channels)} channel(s). "
f"Use AudioDownscaleProcessor before this processor."
self.frames.append(data)
# print("timestamp", data.pts * data.time_base * 1000)
# Check for speech segments every 32 frames (~1 second)
if len(self.frames) >= 32 and len(self.frames) % 32 == 0:
await self._process_block()
# Safety fallback - emit if we hit max frames
elif len(self.frames) >= self.max_frames:
self.logger.warning(
f"AudioChunkerProcessor: Reached max frames ({self.max_frames}), "
f"emitting first {self.max_frames // 2} frames"
)
frames_to_emit = self.frames[: self.max_frames // 2]
self.frames = self.frames[self.max_frames // 2 :]
if len(frames_to_emit) >= self.min_frames:
await self.emit(frames_to_emit)
else:
self.logger.debug(
f"Ignoring fallback segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
try:
self.m_chunk_call.inc()
with self.m_chunk.time():
result = await self._chunk(data)
self.m_chunk_success.inc()
if result:
await self.emit(result)
except Exception:
self.m_chunk_failure.inc()
raise
async def _process_block(self):
# Need at least 32 frames for VAD detection (~1 second)
if len(self.frames) < 32 or self.vad_iterator is None:
return
async def _chunk(self, data: av.AudioFrame) -> Optional[list[av.AudioFrame]]:
"""
Process audio frame and return chunk when ready.
Subclasses should implement their chunking logic here.
"""
raise NotImplementedError
# Processing block with current buffer size
# print(f"Processing block: {len(self.frames)} frames in buffer")
try:
# Convert frames to numpy array for VAD
audio_array = self._frames_to_numpy(self.frames)
if audio_array is None:
# Fallback: emit all frames if conversion failed
frames_to_emit = self.frames[:]
self.frames = []
if len(frames_to_emit) >= self.min_frames:
await self.emit(frames_to_emit)
else:
self.logger.debug(
f"Ignoring conversion-failed segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
return
# Find complete speech segments in the buffer
speech_end_frame = self._find_speech_segment_end(audio_array)
if speech_end_frame is None or speech_end_frame <= 0:
# No speech found but buffer is getting large
if len(self.frames) > 512:
# Check if it's all silence and can be discarded
# No speech segment found, buffer at {len(self.frames)} frames
# Could emit silence or discard old frames here
# For now, keep first 256 frames and discard older silence
if len(self.frames) > 768:
self.logger.debug(
f"Discarding {len(self.frames) - 256} old frames (likely silence)"
)
self.frames = self.frames[-256:]
return
# Calculate segment timing information
frames_to_emit = self.frames[:speech_end_frame]
# Get timing from av.AudioFrame
if frames_to_emit:
first_frame = frames_to_emit[0]
last_frame = frames_to_emit[-1]
sample_rate = first_frame.sample_rate
# Calculate duration
total_samples = sum(f.samples for f in frames_to_emit)
duration_seconds = total_samples / sample_rate if sample_rate > 0 else 0
# Get timestamps if available
start_time = (
first_frame.pts * first_frame.time_base if first_frame.pts else 0
)
end_time = (
last_frame.pts * last_frame.time_base if last_frame.pts else 0
)
# Convert to HH:MM:SS format for logging
def format_time(seconds):
if not seconds:
return "00:00:00"
total_seconds = int(float(seconds))
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
secs = total_seconds % 60
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
start_formatted = format_time(start_time)
end_formatted = format_time(end_time)
# Keep remaining frames for next processing
remaining_after = len(self.frames) - speech_end_frame
# Single structured log line
self.logger.info(
"Speech segment found",
start=start_formatted,
end=end_formatted,
frames=speech_end_frame,
duration=round(duration_seconds, 2),
buffer_before=len(self.frames),
remaining=remaining_after,
)
# Keep remaining frames for next processing
self.frames = self.frames[speech_end_frame:]
# Filter out segments with too few frames
if len(frames_to_emit) >= self.min_frames:
await self.emit(frames_to_emit)
else:
self.logger.debug(
f"Ignoring segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
except Exception as e:
self.logger.error(f"Error in VAD processing: {e}")
# Fallback to simple chunking
if len(self.frames) >= self.block_frames:
frames_to_emit = self.frames[: self.block_frames]
self.frames = self.frames[self.block_frames :]
if len(frames_to_emit) >= self.min_frames:
await self.emit(frames_to_emit)
else:
self.logger.debug(
f"Ignoring exception-fallback segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
def _frames_to_numpy(self, frames: list[av.AudioFrame]) -> Optional[np.ndarray]:
"""Convert av.AudioFrame list to numpy array for VAD processing"""
if not frames:
return None
try:
first_frame = frames[0]
original_sample_rate = first_frame.sample_rate
audio_data = []
for frame in frames:
frame_array = frame.to_ndarray()
# Handle stereo -> mono conversion
if len(frame_array.shape) == 2 and frame_array.shape[0] > 1:
frame_array = np.mean(frame_array, axis=0)
elif len(frame_array.shape) == 2:
frame_array = frame_array.flatten()
audio_data.append(frame_array)
if not audio_data:
return None
combined_audio = np.concatenate(audio_data)
# Resample from 48kHz to 16kHz if needed
if original_sample_rate != 16000:
combined_audio = self._resample_audio(
combined_audio, original_sample_rate, 16000
)
# Ensure float32 format
if combined_audio.dtype == np.int16:
# Normalize int16 audio to float32 in range [-1.0, 1.0]
combined_audio = combined_audio.astype(np.float32) / 32768.0
elif combined_audio.dtype != np.float32:
combined_audio = combined_audio.astype(np.float32)
return combined_audio
except Exception as e:
self.logger.error(f"Error converting frames to numpy: {e}")
return None
def _resample_audio(
self, audio: np.ndarray, from_sr: int, to_sr: int
) -> np.ndarray:
"""Simple linear resampling from from_sr to to_sr"""
if from_sr == to_sr:
return audio
try:
# Simple linear interpolation resampling
ratio = to_sr / from_sr
new_length = int(len(audio) * ratio)
# Create indices for interpolation
old_indices = np.linspace(0, len(audio) - 1, new_length)
resampled = np.interp(old_indices, np.arange(len(audio)), audio)
return resampled.astype(np.float32)
except Exception as e:
self.logger.error("Resampling error", exc_info=e)
# Fallback: simple decimation/repetition
if from_sr > to_sr:
# Downsample by taking every nth sample
step = from_sr // to_sr
return audio[::step]
else:
# Upsample by repeating samples
repeat = to_sr // from_sr
return np.repeat(audio, repeat)
def _find_speech_segment_end(self, audio_array: np.ndarray) -> Optional[int]:
"""Find complete speech segments and return frame index at segment end"""
if self.vad_iterator is None or len(audio_array) == 0:
return None
try:
# Process audio in 512-sample windows for VAD
window_size = 512
min_silence_windows = 3 # Require 3 windows of silence after speech
# Track speech state
in_speech = False
speech_start = None
speech_end = None
silence_count = 0
for i in range(0, len(audio_array), window_size):
chunk = audio_array[i : i + window_size]
if len(chunk) < window_size:
chunk = np.pad(chunk, (0, window_size - len(chunk)))
# Detect if this window has speech
speech_dict = self.vad_iterator(chunk, return_seconds=True)
# VADIterator returns dict with 'start' and 'end' when speech segments are detected
if speech_dict:
if not in_speech:
# Speech started
speech_start = i
in_speech = True
# Debug: print(f"Speech START at sample {i}, VAD: {speech_dict}")
silence_count = 0 # Reset silence counter
continue
if not in_speech:
continue
# We're in speech but found silence
silence_count += 1
if silence_count < min_silence_windows:
continue
# Found end of speech segment
speech_end = i - (min_silence_windows - 1) * window_size
# Debug: print(f"Speech END at sample {speech_end}")
# Convert sample position to frame index
samples_per_frame = self.frames[0].samples if self.frames else 1024
# Account for resampling: we process at 16kHz but frames might be 48kHz
resample_ratio = 48000 / 16000 # 3x
actual_sample_pos = int(speech_end * resample_ratio)
frame_index = actual_sample_pos // samples_per_frame
# Ensure we don't exceed buffer
frame_index = min(frame_index, len(self.frames))
return frame_index
return None
except Exception as e:
self.logger.error(f"Error finding speech segment: {e}")
return None
async def _flush(self):
"""Flush any remaining frames when processing ends"""
raise NotImplementedError
frames = self.frames[:]
self.frames = []
if frames:
if len(frames) >= self.min_frames:
await self.emit(frames)
else:
self.logger.debug(
f"Ignoring flush segment with {len(frames)} frames "
f"(< {self.min_frames} minimum)"
)

View File

@@ -1,32 +0,0 @@
import importlib
from reflector.processors.audio_chunker import AudioChunkerProcessor
from reflector.settings import settings
class AudioChunkerAutoProcessor(AudioChunkerProcessor):
_registry = {}
@classmethod
def register(cls, name, kclass):
cls._registry[name] = kclass
def __new__(cls, name: str | None = None, **kwargs):
if name is None:
name = settings.AUDIO_CHUNKER_BACKEND
if name not in cls._registry:
module_name = f"reflector.processors.audio_chunker_{name}"
importlib.import_module(module_name)
# gather specific configuration for the processor
# search `AUDIO_CHUNKER_BACKEND_XXX_YYY`, push to constructor as `backend_xxx_yyy`
config = {}
name_upper = name.upper()
settings_prefix = "AUDIO_CHUNKER_"
config_prefix = f"{settings_prefix}{name_upper}_"
for key, value in settings:
if key.startswith(config_prefix):
config_name = key[len(settings_prefix) :].lower()
config[config_name] = value
return cls._registry[name](**config | kwargs)

View File

@@ -1,34 +0,0 @@
from typing import Optional
import av
from reflector.processors.audio_chunker import AudioChunkerProcessor
from reflector.processors.audio_chunker_auto import AudioChunkerAutoProcessor
class AudioChunkerFramesProcessor(AudioChunkerProcessor):
"""
Simple frame-based audio chunker that emits chunks after a fixed number of frames
"""
def __init__(self, max_frames=256, **kwargs):
super().__init__(**kwargs)
self.max_frames = max_frames
async def _chunk(self, data: av.AudioFrame) -> Optional[list[av.AudioFrame]]:
self.frames.append(data)
if len(self.frames) >= self.max_frames:
frames_to_emit = self.frames[:]
self.frames = []
return frames_to_emit
return None
async def _flush(self):
frames = self.frames[:]
self.frames = []
if frames:
await self.emit(frames)
AudioChunkerAutoProcessor.register("frames", AudioChunkerFramesProcessor)

View File

@@ -1,298 +0,0 @@
from typing import Optional
import av
import numpy as np
import torch
from silero_vad import VADIterator, load_silero_vad
from reflector.processors.audio_chunker import AudioChunkerProcessor
from reflector.processors.audio_chunker_auto import AudioChunkerAutoProcessor
class AudioChunkerSileroProcessor(AudioChunkerProcessor):
"""
Assemble audio frames into chunks with VAD-based speech detection using Silero VAD
"""
def __init__(
self,
block_frames=256,
max_frames=1024,
use_onnx=True,
min_frames=2,
**kwargs,
):
super().__init__(**kwargs)
self.block_frames = block_frames
self.max_frames = max_frames
self.min_frames = min_frames
# Initialize Silero VAD
self._init_vad(use_onnx)
def _init_vad(self, use_onnx=False):
"""Initialize Silero VAD model"""
try:
torch.set_num_threads(1)
self.vad_model = load_silero_vad(onnx=use_onnx)
self.vad_iterator = VADIterator(self.vad_model, sampling_rate=16000)
self.logger.info("Silero VAD initialized successfully")
except Exception as e:
self.logger.error(f"Failed to initialize Silero VAD: {e}")
self.vad_model = None
self.vad_iterator = None
async def _chunk(self, data: av.AudioFrame) -> Optional[list[av.AudioFrame]]:
"""Process audio frame and return chunk when ready"""
self.frames.append(data)
# Check for speech segments every 32 frames (~1 second)
if len(self.frames) >= 32 and len(self.frames) % 32 == 0:
return await self._process_block()
# Safety fallback - emit if we hit max frames
elif len(self.frames) >= self.max_frames:
self.logger.warning(
f"AudioChunkerSileroProcessor: Reached max frames ({self.max_frames}), "
f"emitting first {self.max_frames // 2} frames"
)
frames_to_emit = self.frames[: self.max_frames // 2]
self.frames = self.frames[self.max_frames // 2 :]
if len(frames_to_emit) >= self.min_frames:
return frames_to_emit
else:
self.logger.debug(
f"Ignoring fallback segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
return None
async def _process_block(self) -> Optional[list[av.AudioFrame]]:
# Need at least 32 frames for VAD detection (~1 second)
if len(self.frames) < 32 or self.vad_iterator is None:
return None
# Processing block with current buffer size
print(f"Processing block: {len(self.frames)} frames in buffer")
try:
# Convert frames to numpy array for VAD
audio_array = self._frames_to_numpy(self.frames)
if audio_array is None:
# Fallback: emit all frames if conversion failed
frames_to_emit = self.frames[:]
self.frames = []
if len(frames_to_emit) >= self.min_frames:
return frames_to_emit
else:
self.logger.debug(
f"Ignoring conversion-failed segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
return None
# Find complete speech segments in the buffer
speech_end_frame = self._find_speech_segment_end(audio_array)
if speech_end_frame is None or speech_end_frame <= 0:
# No speech found but buffer is getting large
if len(self.frames) > 512:
# Check if it's all silence and can be discarded
# No speech segment found, buffer at {len(self.frames)} frames
# Could emit silence or discard old frames here
# For now, keep first 256 frames and discard older silence
if len(self.frames) > 768:
self.logger.debug(
f"Discarding {len(self.frames) - 256} old frames (likely silence)"
)
self.frames = self.frames[-256:]
return None
# Calculate segment timing information
frames_to_emit = self.frames[:speech_end_frame]
# Get timing from av.AudioFrame
if frames_to_emit:
first_frame = frames_to_emit[0]
last_frame = frames_to_emit[-1]
sample_rate = first_frame.sample_rate
# Calculate duration
total_samples = sum(f.samples for f in frames_to_emit)
duration_seconds = total_samples / sample_rate if sample_rate > 0 else 0
# Get timestamps if available
start_time = (
first_frame.pts * first_frame.time_base if first_frame.pts else 0
)
end_time = (
last_frame.pts * last_frame.time_base if last_frame.pts else 0
)
# Convert to HH:MM:SS format for logging
def format_time(seconds):
if not seconds:
return "00:00:00"
total_seconds = int(float(seconds))
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
secs = total_seconds % 60
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
start_formatted = format_time(start_time)
end_formatted = format_time(end_time)
# Keep remaining frames for next processing
remaining_after = len(self.frames) - speech_end_frame
# Single structured log line
self.logger.info(
"Speech segment found",
start=start_formatted,
end=end_formatted,
frames=speech_end_frame,
duration=round(duration_seconds, 2),
buffer_before=len(self.frames),
remaining=remaining_after,
)
# Keep remaining frames for next processing
self.frames = self.frames[speech_end_frame:]
# Filter out segments with too few frames
if len(frames_to_emit) >= self.min_frames:
return frames_to_emit
else:
self.logger.debug(
f"Ignoring segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
except Exception as e:
self.logger.error(f"Error in VAD processing: {e}")
# Fallback to simple chunking
if len(self.frames) >= self.block_frames:
frames_to_emit = self.frames[: self.block_frames]
self.frames = self.frames[self.block_frames :]
if len(frames_to_emit) >= self.min_frames:
return frames_to_emit
else:
self.logger.debug(
f"Ignoring exception-fallback segment with {len(frames_to_emit)} frames "
f"(< {self.min_frames} minimum)"
)
return None
def _frames_to_numpy(self, frames: list[av.AudioFrame]) -> Optional[np.ndarray]:
"""Convert av.AudioFrame list to numpy array for VAD processing"""
if not frames:
return None
try:
audio_data = []
for frame in frames:
frame_array = frame.to_ndarray()
if len(frame_array.shape) == 2:
frame_array = frame_array.flatten()
audio_data.append(frame_array)
if not audio_data:
return None
combined_audio = np.concatenate(audio_data)
# Ensure float32 format
if combined_audio.dtype == np.int16:
# Normalize int16 audio to float32 in range [-1.0, 1.0]
combined_audio = combined_audio.astype(np.float32) / 32768.0
elif combined_audio.dtype != np.float32:
combined_audio = combined_audio.astype(np.float32)
return combined_audio
except Exception as e:
self.logger.error(f"Error converting frames to numpy: {e}")
return None
def _find_speech_segment_end(self, audio_array: np.ndarray) -> Optional[int]:
"""Find complete speech segments and return frame index at segment end"""
if self.vad_iterator is None or len(audio_array) == 0:
return None
try:
# Process audio in 512-sample windows for VAD
window_size = 512
min_silence_windows = 3 # Require 3 windows of silence after speech
# Track speech state
in_speech = False
speech_start = None
speech_end = None
silence_count = 0
for i in range(0, len(audio_array), window_size):
chunk = audio_array[i : i + window_size]
if len(chunk) < window_size:
chunk = np.pad(chunk, (0, window_size - len(chunk)))
# Detect if this window has speech
speech_dict = self.vad_iterator(chunk, return_seconds=True)
# VADIterator returns dict with 'start' and 'end' when speech segments are detected
if speech_dict:
if not in_speech:
# Speech started
speech_start = i
in_speech = True
# Debug: print(f"Speech START at sample {i}, VAD: {speech_dict}")
silence_count = 0 # Reset silence counter
continue
if not in_speech:
continue
# We're in speech but found silence
silence_count += 1
if silence_count < min_silence_windows:
continue
# Found end of speech segment
speech_end = i - (min_silence_windows - 1) * window_size
# Debug: print(f"Speech END at sample {speech_end}")
# Convert sample position to frame index
samples_per_frame = self.frames[0].samples if self.frames else 1024
frame_index = speech_end // samples_per_frame
# Ensure we don't exceed buffer
frame_index = min(frame_index, len(self.frames))
return frame_index
return None
except Exception as e:
self.logger.error(f"Error finding speech segment: {e}")
return None
async def _flush(self):
frames = self.frames[:]
self.frames = []
if frames:
if len(frames) >= self.min_frames:
await self.emit(frames)
else:
self.logger.debug(
f"Ignoring flush segment with {len(frames)} frames "
f"(< {self.min_frames} minimum)"
)
AudioChunkerAutoProcessor.register("silero", AudioChunkerSileroProcessor)

View File

@@ -1,60 +0,0 @@
from typing import Optional
import av
from av.audio.resampler import AudioResampler
from reflector.processors.base import Processor
def copy_frame(frame: av.AudioFrame) -> av.AudioFrame:
frame_copy = frame.from_ndarray(
frame.to_ndarray(),
format=frame.format.name,
layout=frame.layout.name,
)
frame_copy.sample_rate = frame.sample_rate
frame_copy.pts = frame.pts
frame_copy.time_base = frame.time_base
return frame_copy
class AudioDownscaleProcessor(Processor):
"""
Downscale audio frames to 16kHz mono format
"""
INPUT_TYPE = av.AudioFrame
OUTPUT_TYPE = av.AudioFrame
def __init__(self, target_rate: int = 16000, target_layout: str = "mono", **kwargs):
super().__init__(**kwargs)
self.target_rate = target_rate
self.target_layout = target_layout
self.resampler: Optional[AudioResampler] = None
self.needs_resampling: Optional[bool] = None
async def _push(self, data: av.AudioFrame):
if self.needs_resampling is None:
self.needs_resampling = (
data.sample_rate != self.target_rate
or data.layout.name != self.target_layout
)
if self.needs_resampling:
self.resampler = AudioResampler(
format="s16", layout=self.target_layout, rate=self.target_rate
)
if not self.needs_resampling or not self.resampler:
await self.emit(data)
return
resampled_frames = self.resampler.resample(copy_frame(data))
for resampled_frame in resampled_frames:
await self.emit(resampled_frame)
async def _flush(self):
if self.needs_resampling and self.resampler:
final_frames = self.resampler.resample(None)
for frame in final_frames:
await self.emit(frame)

View File

@@ -3,11 +3,24 @@ from time import monotonic_ns
from uuid import uuid4
import av
from av.audio.resampler import AudioResampler
from reflector.processors.base import Processor
from reflector.processors.types import AudioFile
def copy_frame(frame: av.AudioFrame) -> av.AudioFrame:
frame_copy = frame.from_ndarray(
frame.to_ndarray(),
format=frame.format.name,
layout=frame.layout.name,
)
frame_copy.sample_rate = frame.sample_rate
frame_copy.pts = frame.pts
frame_copy.time_base = frame.time_base
return frame_copy
class AudioMergeProcessor(Processor):
"""
Merge audio frame into a single file
@@ -16,8 +29,9 @@ class AudioMergeProcessor(Processor):
INPUT_TYPE = list[av.AudioFrame]
OUTPUT_TYPE = AudioFile
def __init__(self, **kwargs):
def __init__(self, downsample_to_16k_mono: bool = True, **kwargs):
super().__init__(**kwargs)
self.downsample_to_16k_mono = downsample_to_16k_mono
async def _push(self, data: list[av.AudioFrame]):
if not data:
@@ -25,27 +39,72 @@ class AudioMergeProcessor(Processor):
# get audio information from first frame
frame = data[0]
output_channels = len(frame.layout.channels)
output_sample_rate = frame.sample_rate
output_sample_width = frame.format.bytes
original_channels = len(frame.layout.channels)
original_sample_rate = frame.sample_rate
original_sample_width = frame.format.bytes
# determine if we need processing
needs_processing = self.downsample_to_16k_mono and (
original_sample_rate != 16000 or original_channels != 1
)
# determine output parameters
if self.downsample_to_16k_mono:
output_sample_rate = 16000
output_channels = 1
output_sample_width = 2 # 16-bit = 2 bytes
else:
output_sample_rate = original_sample_rate
output_channels = original_channels
output_sample_width = original_sample_width
# create audio file
uu = uuid4().hex
fd = io.BytesIO()
# Use PyAV to write frames
out_container = av.open(fd, "w", format="wav")
out_stream = out_container.add_stream("pcm_s16le", rate=output_sample_rate)
out_stream.layout = frame.layout.name
if needs_processing:
# Process with PyAV resampler
out_container = av.open(fd, "w", format="wav")
out_stream = out_container.add_stream("pcm_s16le", rate=16000)
out_stream.layout = "mono"
for frame in data:
for packet in out_stream.encode(frame):
# Create resampler if needed
resampler = None
if original_sample_rate != 16000 or original_channels != 1:
resampler = AudioResampler(format="s16", layout="mono", rate=16000)
for frame in data:
if resampler:
# Resample and convert to mono
# XXX for an unknown reason, if we don't use a copy of the frame, we get
# Invalid Argumment from resample. Debugging indicate that when a previous processor
# already used the frame (like AudioFileWriter), it make it invalid argument here.
resampled_frames = resampler.resample(copy_frame(frame))
for resampled_frame in resampled_frames:
for packet in out_stream.encode(resampled_frame):
out_container.mux(packet)
else:
# Direct encoding without resampling
for packet in out_stream.encode(frame):
out_container.mux(packet)
# Flush the encoder
for packet in out_stream.encode(None):
out_container.mux(packet)
out_container.close()
else:
# Use PyAV for original frames (no processing needed)
out_container = av.open(fd, "w", format="wav")
out_stream = out_container.add_stream("pcm_s16le", rate=output_sample_rate)
out_stream.layout = "mono" if output_channels == 1 else frame.layout
# Flush the encoder
for packet in out_stream.encode(None):
out_container.mux(packet)
out_container.close()
for frame in data:
for packet in out_stream.encode(frame):
out_container.mux(packet)
for packet in out_stream.encode(None):
out_container.mux(packet)
out_container.close()
fd.seek(0)

View File

@@ -12,6 +12,9 @@ API will be a POST request to TRANSCRIPT_URL:
"""
from typing import List
import aiohttp
from openai import AsyncOpenAI
from reflector.processors.audio_transcript import AudioTranscriptProcessor
@@ -22,9 +25,7 @@ from reflector.settings import settings
class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
def __init__(
self,
modal_api_key: str | None = None,
**kwargs,
self, modal_api_key: str | None = None, batch_enabled: bool = True, **kwargs
):
super().__init__()
if not settings.TRANSCRIPT_URL:
@@ -34,6 +35,126 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
self.transcript_url = settings.TRANSCRIPT_URL + "/v1"
self.timeout = settings.TRANSCRIPT_TIMEOUT
self.modal_api_key = modal_api_key
self.max_batch_duration = 10.0
self.max_batch_files = 15
self.batch_enabled = batch_enabled
self.pending_files: List[AudioFile] = [] # Files waiting to be processed
@classmethod
def _calculate_duration(cls, audio_file: AudioFile) -> float:
"""Calculate audio duration in seconds from AudioFile metadata"""
# Duration = total_samples / sample_rate
# We need to estimate total samples from the file data
import wave
try:
# Try to read as WAV file to get duration
audio_file.fd.seek(0)
with wave.open(audio_file.fd, "rb") as wav_file:
frames = wav_file.getnframes()
sample_rate = wav_file.getframerate()
duration = frames / sample_rate
return duration
except Exception:
# Fallback: estimate from file size and audio parameters
audio_file.fd.seek(0, 2) # Seek to end
file_size = audio_file.fd.tell()
audio_file.fd.seek(0) # Reset to beginning
# Estimate: file_size / (sample_rate * channels * sample_width)
bytes_per_second = (
audio_file.sample_rate
* audio_file.channels
* (audio_file.sample_width // 8)
)
estimated_duration = (
file_size / bytes_per_second if bytes_per_second > 0 else 0
)
return max(0, estimated_duration)
def _create_batches(self, audio_files: List[AudioFile]) -> List[List[AudioFile]]:
"""Group audio files into batches with maximum 30s total duration"""
batches = []
current_batch = []
current_duration = 0.0
for audio_file in audio_files:
duration = self._calculate_duration(audio_file)
# If adding this file exceeds max duration, start a new batch
if current_duration + duration > self.max_batch_duration and current_batch:
batches.append(current_batch)
current_batch = [audio_file]
current_duration = duration
else:
current_batch.append(audio_file)
current_duration += duration
# Add the last batch if not empty
if current_batch:
batches.append(current_batch)
return batches
async def _transcript_batch(self, audio_files: List[AudioFile]) -> List[Transcript]:
"""Transcribe a batch of audio files using the parakeet backend"""
if not audio_files:
return []
self.logger.debug(f"Batch transcribing {len(audio_files)} files")
# Prepare form data for batch request
data = aiohttp.FormData()
data.add_field("language", self.get_pref("audio:source_language", "en"))
data.add_field("batch", "true")
for i, audio_file in enumerate(audio_files):
audio_file.fd.seek(0)
data.add_field(
"files",
audio_file.fd,
filename=f"{audio_file.name}",
content_type="audio/wav",
)
# Make batch request
headers = {"Authorization": f"Bearer {self.modal_api_key}"}
async with aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout)
) as session:
async with session.post(
f"{self.transcript_url}/audio/transcriptions",
data=data,
headers=headers,
) as response:
if response.status != 200:
error_text = await response.text()
raise Exception(
f"Batch transcription failed: {response.status} {error_text}"
)
result = await response.json()
# Process batch results
transcripts = []
results = result.get("results", [])
for i, (audio_file, file_result) in enumerate(zip(audio_files, results)):
transcript = Transcript(
words=[
Word(
text=word_info["word"],
start=word_info["start"],
end=word_info["end"],
)
for word_info in file_result.get("words", [])
]
)
transcript.add_offset(audio_file.timestamp)
transcripts.append(transcript)
return transcripts
async def _transcript(self, data: AudioFile):
async with AsyncOpenAI(
@@ -66,5 +187,96 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
return transcript
async def transcript_multiple(
self, audio_files: List[AudioFile]
) -> List[Transcript]:
"""Transcribe multiple audio files using batching"""
if len(audio_files) == 1:
# Single file, use existing method
return [await self._transcript(audio_files[0])]
# Create batches with max 30s duration each
batches = self._create_batches(audio_files)
self.logger.debug(
f"Processing {len(audio_files)} files in {len(batches)} batches"
)
# Process all batches concurrently
all_transcripts = []
for batch in batches:
batch_transcripts = await self._transcript_batch(batch)
all_transcripts.extend(batch_transcripts)
return all_transcripts
async def _push(self, data: AudioFile):
"""Override _push to support batching"""
if not self.batch_enabled:
# Use parent implementation for single file processing
return await super()._push(data)
# Add file to pending batch
self.pending_files.append(data)
self.logger.debug(
f"Added file to batch: {data.name}, batch size: {len(self.pending_files)}"
)
# Calculate total duration of pending files
total_duration = sum(self._calculate_duration(f) for f in self.pending_files)
# Process batch if it reaches max duration or has multiple files ready for optimization
should_process_batch = (
total_duration >= self.max_batch_duration
or len(self.pending_files) >= self.max_batch_files
)
if should_process_batch:
await self._process_pending_batch()
async def _process_pending_batch(self):
"""Process all pending files as batches"""
if not self.pending_files:
return
self.logger.debug(f"Processing batch of {len(self.pending_files)} files")
try:
# Create batches respecting duration limit
batches = self._create_batches(self.pending_files)
# Process each batch
for batch in batches:
self.m_transcript_call.inc()
try:
with self.m_transcript.time():
# Use batch transcription
transcripts = await self._transcript_batch(batch)
self.m_transcript_success.inc()
# Emit each transcript
for transcript in transcripts:
if transcript:
await self.emit(transcript)
except Exception:
self.m_transcript_failure.inc()
raise
finally:
# Release audio files
for audio_file in batch:
audio_file.release()
finally:
# Clear pending files
self.pending_files.clear()
async def _flush(self):
"""Process any remaining files when flushing"""
await self._process_pending_batch()
await super()._flush()
AudioTranscriptAutoProcessor.register("modal", AudioTranscriptModalProcessor)

View File

@@ -67,9 +67,6 @@ class FileTranscriptModalProcessor(FileTranscriptProcessor):
for word_info in result.get("words", [])
]
# words come not in order
words.sort(key=lambda w: w.start)
return Transcript(words=words)

View File

@@ -1,4 +1,3 @@
from pydantic.types import PositiveInt
from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -22,10 +21,6 @@ class Settings(BaseSettings):
# local data directory
DATA_DIR: str = "./data"
# Audio Chunking
# backends: silero, frames
AUDIO_CHUNKER_BACKEND: str = "frames"
# Audio Transcription
# backends: whisper, modal
TRANSCRIPT_BACKEND: str = "whisper"
@@ -91,8 +86,9 @@ class Settings(BaseSettings):
AUTH_JWT_PUBLIC_KEY: str | None = "authentik.monadical.com_public.pem"
AUTH_JWT_AUDIENCE: str | None = None
# API public mode
# if set, all anonymous record will be public
PUBLIC_MODE: bool = False
PUBLIC_DATA_RETENTION_DAYS: PositiveInt = 7
# Min transcript length to generate topic + summary
MIN_TRANSCRIPT_LENGTH: int = 750

View File

@@ -1,72 +0,0 @@
#!/usr/bin/env python
"""
Manual cleanup tool for old public data.
Uses the same implementation as the Celery worker task.
"""
import argparse
import asyncio
import sys
import structlog
from reflector.settings import settings
from reflector.worker.cleanup import _cleanup_old_public_data
logger = structlog.get_logger(__name__)
async def cleanup_old_data(days: int = 7):
logger.info(
"Starting manual cleanup",
retention_days=days,
public_mode=settings.PUBLIC_MODE,
)
if not settings.PUBLIC_MODE:
logger.critical(
"WARNING: PUBLIC_MODE is False. "
"This tool is intended for public instances only."
)
raise Exception("Tool intended for public instances only")
result = await _cleanup_old_public_data(days=days)
if result:
logger.info(
"Cleanup completed",
transcripts_deleted=result.get("transcripts_deleted", 0),
meetings_deleted=result.get("meetings_deleted", 0),
recordings_deleted=result.get("recordings_deleted", 0),
errors_count=len(result.get("errors", [])),
)
if result.get("errors"):
logger.warning(
"Errors encountered during cleanup:", errors=result["errors"][:10]
)
else:
logger.info("Cleanup skipped or completed without results")
def main():
parser = argparse.ArgumentParser(
description="Clean up old transcripts and meetings"
)
parser.add_argument(
"--days",
type=int,
default=7,
help="Number of days to keep data (default: 7)",
)
args = parser.parse_args()
if args.days < 1:
logger.error("Days must be at least 1")
sys.exit(1)
asyncio.run(cleanup_old_data(days=args.days))
if __name__ == "__main__":
main()

View File

@@ -1,204 +1,292 @@
"""
Process audio file with diarization support
===========================================
Extended version of process.py that includes speaker diarization.
This tool processes audio files locally without requiring the full server infrastructure.
"""
import argparse
import asyncio
import json
import shutil
import sys
import time
import tempfile
import uuid
from pathlib import Path
from typing import Any, Dict, List, Literal
from typing import List
import av
from reflector.db.transcripts import SourceKind, TranscriptTopic, transcripts_controller
from reflector.logger import logger
from reflector.pipelines.main_file_pipeline import (
task_pipeline_file_process as task_pipeline_file_process,
from reflector.processors import (
AudioChunkerProcessor,
AudioFileWriterProcessor,
AudioMergeProcessor,
AudioTranscriptAutoProcessor,
Pipeline,
PipelineEvent,
TranscriptFinalSummaryProcessor,
TranscriptFinalTitleProcessor,
TranscriptLinerProcessor,
TranscriptTopicDetectorProcessor,
TranscriptTranslatorAutoProcessor,
)
from reflector.pipelines.main_live_pipeline import pipeline_post as live_pipeline_post
from reflector.pipelines.main_live_pipeline import (
pipeline_process as live_pipeline_process,
from reflector.processors.base import BroadcastProcessor, Processor
from reflector.processors.types import (
AudioDiarizationInput,
TitleSummary,
TitleSummaryWithId,
)
def serialize_topics(topics: List[TranscriptTopic]) -> List[Dict[str, Any]]:
"""Convert TranscriptTopic objects to JSON-serializable dicts"""
serialized = []
for topic in topics:
topic_dict = topic.model_dump()
serialized.append(topic_dict)
return serialized
class TopicCollectorProcessor(Processor):
"""Collect topics for diarization"""
INPUT_TYPE = TitleSummary
OUTPUT_TYPE = TitleSummary
def debug_print_speakers(serialized_topics: List[Dict[str, Any]]) -> None:
"""Print debug info about speakers found in topics"""
all_speakers = set()
for topic_dict in serialized_topics:
for word in topic_dict.get("words", []):
all_speakers.add(word.get("speaker", 0))
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.topics: List[TitleSummaryWithId] = []
self._topic_id = 0
print(
f"Found {len(serialized_topics)} topics with speakers: {all_speakers}",
file=sys.stderr,
)
TranscriptId = str
# common interface for every flow: it needs an Entry in db with specific ceremony (file path + status + actual file in file system)
# ideally we want to get rid of it at some point
async def prepare_entry(
source_path: str,
source_language: str,
target_language: str,
) -> TranscriptId:
file_path = Path(source_path)
transcript = await transcripts_controller.add(
file_path.name,
# note that the real file upload has SourceKind: LIVE for the reason of it's an error
source_kind=SourceKind.FILE,
source_language=source_language,
target_language=target_language,
user_id=None,
)
logger.info(
f"Created empty transcript {transcript.id} for file {file_path.name} because technically we need an empty transcript before we start transcript"
)
# pipelines expect files as upload.*
extension = file_path.suffix
upload_path = transcript.data_path / f"upload{extension}"
upload_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source_path, upload_path)
logger.info(f"Copied {source_path} to {upload_path}")
# pipelines expect entity status "uploaded"
await transcripts_controller.update(transcript, {"status": "uploaded"})
return transcript.id
# same reason as prepare_entry
async def extract_result_from_entry(
transcript_id: TranscriptId, output_path: str
) -> None:
post_final_transcript = await transcripts_controller.get_by_id(transcript_id)
# assert post_final_transcript.status == "ended"
# File pipeline doesn't set status to "ended", only live pipeline does https://github.com/Monadical-SAS/reflector/issues/582
topics = post_final_transcript.topics
if not topics:
raise RuntimeError(
f"No topics found for transcript {transcript_id} after processing"
async def _push(self, data: TitleSummary):
# Convert to TitleSummaryWithId and collect
self._topic_id += 1
topic_with_id = TitleSummaryWithId(
id=str(self._topic_id),
title=data.title,
summary=data.summary,
timestamp=data.timestamp,
duration=data.duration,
transcript=data.transcript,
)
self.topics.append(topic_with_id)
serialized_topics = serialize_topics(topics)
# Pass through the original topic
await self.emit(data)
if output_path:
# Write to JSON file
with open(output_path, "w") as f:
for topic_dict in serialized_topics:
json.dump(topic_dict, f)
f.write("\n")
print(f"Results written to {output_path}", file=sys.stderr)
else:
# Write to stdout as JSONL
for topic_dict in serialized_topics:
print(json.dumps(topic_dict))
debug_print_speakers(serialized_topics)
def get_topics(self) -> List[TitleSummaryWithId]:
return self.topics
async def process_live_pipeline(
transcript_id: TranscriptId,
async def process_audio_file(
filename,
event_callback,
only_transcript=False,
source_language="en",
target_language="en",
enable_diarization=True,
diarization_backend="pyannote",
):
"""Process transcript_id with transcription and diarization"""
# Create temp file for audio if diarization is enabled
audio_temp_path = None
if enable_diarization:
audio_temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
audio_temp_path = audio_temp_file.name
audio_temp_file.close()
print(f"Processing transcript_id {transcript_id}...", file=sys.stderr)
await live_pipeline_process(transcript_id=transcript_id)
print(f"Processing complete for transcript {transcript_id}", file=sys.stderr)
# Create processor for collecting topics
topic_collector = TopicCollectorProcessor()
pre_final_transcript = await transcripts_controller.get_by_id(transcript_id)
# Build pipeline for audio processing
processors = []
# assert documented behaviour: after process, the pipeline isn't ended. this is the reason of calling pipeline_post
assert pre_final_transcript.status != "ended"
# Add audio file writer at the beginning if diarization is enabled
if enable_diarization:
processors.append(AudioFileWriterProcessor(audio_temp_path))
# at this point, diarization is running but we have no access to it. run diarization in parallel - one will hopefully win after polling
result = live_pipeline_post(transcript_id=transcript_id)
# Add the rest of the processors
processors += [
AudioChunkerProcessor(),
AudioMergeProcessor(),
AudioTranscriptAutoProcessor.as_threaded(),
TranscriptLinerProcessor(),
TranscriptTranslatorAutoProcessor.as_threaded(),
]
# result.ready() blocks even without await; it mutates result also
while not result.ready():
print(f"Status: {result.state}")
time.sleep(2)
if not only_transcript:
processors += [
TranscriptTopicDetectorProcessor.as_threaded(),
# Collect topics for diarization
topic_collector,
BroadcastProcessor(
processors=[
TranscriptFinalTitleProcessor.as_threaded(),
TranscriptFinalSummaryProcessor.as_threaded(),
],
),
]
# Create main pipeline
pipeline = Pipeline(*processors)
pipeline.set_pref("audio:source_language", source_language)
pipeline.set_pref("audio:target_language", target_language)
pipeline.describe()
pipeline.on(event_callback)
# Start processing audio
logger.info(f"Opening {filename}")
container = av.open(filename)
try:
logger.info("Start pushing audio into the pipeline")
for frame in container.decode(audio=0):
await pipeline.push(frame)
finally:
logger.info("Flushing the pipeline")
await pipeline.flush()
# Run diarization if enabled and we have topics
if enable_diarization and not only_transcript and audio_temp_path:
topics = topic_collector.get_topics()
if topics:
logger.info(f"Starting diarization with {len(topics)} topics")
try:
from reflector.processors import AudioDiarizationAutoProcessor
diarization_processor = AudioDiarizationAutoProcessor(
name=diarization_backend
)
diarization_processor.set_pipeline(pipeline)
# For Modal backend, we need to upload the file to S3 first
if diarization_backend == "modal":
from datetime import datetime
from reflector.storage import get_transcripts_storage
from reflector.utils.s3_temp_file import S3TemporaryFile
storage = get_transcripts_storage()
# Generate a unique filename in evaluation folder
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
audio_filename = f"evaluation/diarization_temp/{timestamp}_{uuid.uuid4().hex}.wav"
# Use context manager for automatic cleanup
async with S3TemporaryFile(storage, audio_filename) as s3_file:
# Read and upload the audio file
with open(audio_temp_path, "rb") as f:
audio_data = f.read()
audio_url = await s3_file.upload(audio_data)
logger.info(f"Uploaded audio to S3: {audio_filename}")
# Create diarization input with S3 URL
diarization_input = AudioDiarizationInput(
audio_url=audio_url, topics=topics
)
# Run diarization
await diarization_processor.push(diarization_input)
await diarization_processor.flush()
logger.info("Diarization complete")
# File will be automatically cleaned up when exiting the context
else:
# For local backend, use local file path
audio_url = audio_temp_path
# Create diarization input
diarization_input = AudioDiarizationInput(
audio_url=audio_url, topics=topics
)
# Run diarization
await diarization_processor.push(diarization_input)
await diarization_processor.flush()
logger.info("Diarization complete")
except ImportError as e:
logger.error(f"Failed to import diarization dependencies: {e}")
logger.error(
"Install with: uv pip install pyannote.audio torch torchaudio"
)
logger.error(
"And set HF_TOKEN environment variable for pyannote models"
)
raise SystemExit(1)
except Exception as e:
logger.error(f"Diarization failed: {e}")
raise SystemExit(1)
else:
logger.warning("Skipping diarization: no topics available")
# Clean up temp file
if audio_temp_path:
try:
Path(audio_temp_path).unlink()
except Exception as e:
logger.warning(f"Failed to clean up temp file {audio_temp_path}: {e}")
logger.info("All done!")
async def process_file_pipeline(
transcript_id: TranscriptId,
filename: str,
event_callback,
source_language="en",
target_language="en",
enable_diarization=True,
diarization_backend="modal",
):
"""Process audio/video file using the optimized file pipeline"""
# task_pipeline_file_process is a Celery task, need to use .delay() for async execution
result = task_pipeline_file_process.delay(transcript_id=transcript_id)
# Wait for the Celery task to complete
while not result.ready():
print(f"File pipeline status: {result.state}", file=sys.stderr)
time.sleep(2)
logger.info("File pipeline processing complete")
async def process(
source_path: str,
source_language: str,
target_language: str,
pipeline: Literal["live", "file"],
output_path: str = None,
):
from reflector.db import get_database
database = get_database()
# db connect is a part of ceremony
await database.connect()
try:
transcript_id = await prepare_entry(
source_path,
source_language,
target_language,
from reflector.db import database
from reflector.db.transcripts import SourceKind, transcripts_controller
from reflector.pipelines.main_file_pipeline import PipelineMainFile
await database.connect()
try:
# Create a temporary transcript for processing
transcript = await transcripts_controller.add(
"",
source_kind=SourceKind.FILE,
source_language=source_language,
target_language=target_language,
)
# Process the file
pipeline = PipelineMainFile(transcript_id=transcript.id)
await pipeline.process(Path(filename))
logger.info("File pipeline processing complete")
finally:
await database.disconnect()
except ImportError as e:
logger.error(f"File pipeline not available: {e}")
logger.info("Falling back to stream pipeline")
# Fall back to stream pipeline
await process_audio_file(
filename,
event_callback,
only_transcript=False,
source_language=source_language,
target_language=target_language,
enable_diarization=enable_diarization,
diarization_backend=diarization_backend,
)
pipeline_handlers = {
"live": process_live_pipeline,
"file": process_file_pipeline,
}
handler = pipeline_handlers.get(pipeline)
if not handler:
raise ValueError(f"Unknown pipeline type: {pipeline}")
await handler(transcript_id)
await extract_result_from_entry(transcript_id, output_path)
finally:
await database.disconnect()
if __name__ == "__main__":
import argparse
import os
parser = argparse.ArgumentParser(
description="Process audio files with speaker diarization"
description="Process audio files with optional speaker diarization"
)
parser.add_argument("source", help="Source file (mp3, wav, mp4...)")
parser.add_argument(
"--pipeline",
required=True,
choices=["live", "file"],
help="Pipeline type to use for processing (live: streaming/incremental, file: batch/parallel)",
"--stream",
action="store_true",
help="Use streaming pipeline (original frame-based processing)",
)
parser.add_argument(
"--only-transcript",
"-t",
action="store_true",
help="Only generate transcript without topics/summaries",
)
parser.add_argument(
"--source-language", default="en", help="Source language code (default: en)"
@@ -207,14 +295,81 @@ if __name__ == "__main__":
"--target-language", default="en", help="Target language code (default: en)"
)
parser.add_argument("--output", "-o", help="Output file (output.jsonl)")
parser.add_argument(
"--enable-diarization",
"-d",
action="store_true",
help="Enable speaker diarization",
)
parser.add_argument(
"--diarization-backend",
default="pyannote",
choices=["pyannote", "modal"],
help="Diarization backend to use (default: pyannote)",
)
args = parser.parse_args()
asyncio.run(
process(
args.source,
args.source_language,
args.target_language,
args.pipeline,
args.output,
if "REDIS_HOST" not in os.environ:
os.environ["REDIS_HOST"] = "localhost"
output_fd = None
if args.output:
output_fd = open(args.output, "w")
async def event_callback(event: PipelineEvent):
processor = event.processor
data = event.data
# Ignore internal processors
if processor in (
"AudioChunkerProcessor",
"AudioMergeProcessor",
"AudioFileWriterProcessor",
"TopicCollectorProcessor",
"BroadcastProcessor",
):
return
# If diarization is enabled, skip the original topic events from the pipeline
# The diarization processor will emit the same topics but with speaker info
if processor == "TranscriptTopicDetectorProcessor" and args.enable_diarization:
return
# Log all events
logger.info(f"Event: {processor} - {type(data).__name__}")
# Write to output
if output_fd:
output_fd.write(event.model_dump_json())
output_fd.write("\n")
output_fd.flush()
if args.stream:
# Use original streaming pipeline
asyncio.run(
process_audio_file(
args.source,
event_callback,
only_transcript=args.only_transcript,
source_language=args.source_language,
target_language=args.target_language,
enable_diarization=args.enable_diarization,
diarization_backend=args.diarization_backend,
)
)
)
else:
# Use optimized file pipeline (default)
asyncio.run(
process_file_pipeline(
args.source,
event_callback,
source_language=args.source_language,
target_language=args.target_language,
enable_diarization=args.enable_diarization,
diarization_backend=args.diarization_backend,
)
)
if output_fd:
output_fd.close()
logger.info(f"Output written to {args.output}")

View File

@@ -0,0 +1,315 @@
"""
@vibe-generated
Process audio file with diarization support
===========================================
Extended version of process.py that includes speaker diarization.
This tool processes audio files locally without requiring the full server infrastructure.
"""
import asyncio
import tempfile
import uuid
from pathlib import Path
from typing import List
import av
from reflector.logger import logger
from reflector.processors import (
AudioChunkerProcessor,
AudioFileWriterProcessor,
AudioMergeProcessor,
AudioTranscriptAutoProcessor,
Pipeline,
PipelineEvent,
TranscriptFinalSummaryProcessor,
TranscriptFinalTitleProcessor,
TranscriptLinerProcessor,
TranscriptTopicDetectorProcessor,
TranscriptTranslatorAutoProcessor,
)
from reflector.processors.base import BroadcastProcessor, Processor
from reflector.processors.types import (
AudioDiarizationInput,
TitleSummary,
TitleSummaryWithId,
)
class TopicCollectorProcessor(Processor):
"""Collect topics for diarization"""
INPUT_TYPE = TitleSummary
OUTPUT_TYPE = TitleSummary
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.topics: List[TitleSummaryWithId] = []
self._topic_id = 0
async def _push(self, data: TitleSummary):
# Convert to TitleSummaryWithId and collect
self._topic_id += 1
topic_with_id = TitleSummaryWithId(
id=str(self._topic_id),
title=data.title,
summary=data.summary,
timestamp=data.timestamp,
duration=data.duration,
transcript=data.transcript,
)
self.topics.append(topic_with_id)
# Pass through the original topic
await self.emit(data)
def get_topics(self) -> List[TitleSummaryWithId]:
return self.topics
async def process_audio_file_with_diarization(
filename,
event_callback,
only_transcript=False,
source_language="en",
target_language="en",
enable_diarization=True,
diarization_backend="modal",
):
# Create temp file for audio if diarization is enabled
audio_temp_path = None
if enable_diarization:
audio_temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
audio_temp_path = audio_temp_file.name
audio_temp_file.close()
# Create processor for collecting topics
topic_collector = TopicCollectorProcessor()
# Build pipeline for audio processing
processors = []
# Add audio file writer at the beginning if diarization is enabled
if enable_diarization:
processors.append(AudioFileWriterProcessor(audio_temp_path))
# Add the rest of the processors
processors += [
AudioChunkerProcessor(),
AudioMergeProcessor(),
AudioTranscriptAutoProcessor.as_threaded(),
]
processors += [
TranscriptLinerProcessor(),
TranscriptTranslatorAutoProcessor.as_threaded(),
]
if not only_transcript:
processors += [
TranscriptTopicDetectorProcessor.as_threaded(),
# Collect topics for diarization
topic_collector,
BroadcastProcessor(
processors=[
TranscriptFinalTitleProcessor.as_threaded(),
TranscriptFinalSummaryProcessor.as_threaded(),
],
),
]
# Create main pipeline
pipeline = Pipeline(*processors)
pipeline.set_pref("audio:source_language", source_language)
pipeline.set_pref("audio:target_language", target_language)
pipeline.describe()
pipeline.on(event_callback)
# Start processing audio
logger.info(f"Opening {filename}")
container = av.open(filename)
try:
logger.info("Start pushing audio into the pipeline")
for frame in container.decode(audio=0):
await pipeline.push(frame)
finally:
logger.info("Flushing the pipeline")
await pipeline.flush()
# Run diarization if enabled and we have topics
if enable_diarization and not only_transcript and audio_temp_path:
topics = topic_collector.get_topics()
if topics:
logger.info(f"Starting diarization with {len(topics)} topics")
try:
from reflector.processors import AudioDiarizationAutoProcessor
diarization_processor = AudioDiarizationAutoProcessor(
name=diarization_backend
)
diarization_processor.set_pipeline(pipeline)
# For Modal backend, we need to upload the file to S3 first
if diarization_backend == "modal":
from datetime import datetime, timezone
from reflector.storage import get_transcripts_storage
from reflector.utils.s3_temp_file import S3TemporaryFile
storage = get_transcripts_storage()
# Generate a unique filename in evaluation folder
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
audio_filename = f"evaluation/diarization_temp/{timestamp}_{uuid.uuid4().hex}.wav"
# Use context manager for automatic cleanup
async with S3TemporaryFile(storage, audio_filename) as s3_file:
# Read and upload the audio file
with open(audio_temp_path, "rb") as f:
audio_data = f.read()
audio_url = await s3_file.upload(audio_data)
logger.info(f"Uploaded audio to S3: {audio_filename}")
# Create diarization input with S3 URL
diarization_input = AudioDiarizationInput(
audio_url=audio_url, topics=topics
)
# Run diarization
await diarization_processor.push(diarization_input)
await diarization_processor.flush()
logger.info("Diarization complete")
# File will be automatically cleaned up when exiting the context
else:
# For local backend, use local file path
audio_url = audio_temp_path
# Create diarization input
diarization_input = AudioDiarizationInput(
audio_url=audio_url, topics=topics
)
# Run diarization
await diarization_processor.push(diarization_input)
await diarization_processor.flush()
logger.info("Diarization complete")
except ImportError as e:
logger.error(f"Failed to import diarization dependencies: {e}")
logger.error(
"Install with: uv pip install pyannote.audio torch torchaudio"
)
logger.error(
"And set HF_TOKEN environment variable for pyannote models"
)
raise SystemExit(1)
except Exception as e:
logger.error(f"Diarization failed: {e}")
raise SystemExit(1)
else:
logger.warning("Skipping diarization: no topics available")
# Clean up temp file
if audio_temp_path:
try:
Path(audio_temp_path).unlink()
except Exception as e:
logger.warning(f"Failed to clean up temp file {audio_temp_path}: {e}")
logger.info("All done!")
if __name__ == "__main__":
import argparse
import os
parser = argparse.ArgumentParser(
description="Process audio files with optional speaker diarization"
)
parser.add_argument("source", help="Source file (mp3, wav, mp4...)")
parser.add_argument(
"--only-transcript",
"-t",
action="store_true",
help="Only generate transcript without topics/summaries",
)
parser.add_argument(
"--source-language", default="en", help="Source language code (default: en)"
)
parser.add_argument(
"--target-language", default="en", help="Target language code (default: en)"
)
parser.add_argument("--output", "-o", help="Output file (output.jsonl)")
parser.add_argument(
"--enable-diarization",
"-d",
action="store_true",
help="Enable speaker diarization",
)
parser.add_argument(
"--diarization-backend",
default="modal",
choices=["modal"],
help="Diarization backend to use (default: modal)",
)
args = parser.parse_args()
# Set REDIS_HOST to localhost if not provided
if "REDIS_HOST" not in os.environ:
os.environ["REDIS_HOST"] = "localhost"
logger.info("REDIS_HOST not set, defaulting to localhost")
output_fd = None
if args.output:
output_fd = open(args.output, "w")
async def event_callback(event: PipelineEvent):
processor = event.processor
data = event.data
# Ignore internal processors
if processor in (
"AudioChunkerProcessor",
"AudioMergeProcessor",
"AudioFileWriterProcessor",
"TopicCollectorProcessor",
"BroadcastProcessor",
):
return
# If diarization is enabled, skip the original topic events from the pipeline
# The diarization processor will emit the same topics but with speaker info
if processor == "TranscriptTopicDetectorProcessor" and args.enable_diarization:
return
# Log all events
logger.info(f"Event: {processor} - {type(data).__name__}")
# Write to output
if output_fd:
output_fd.write(event.model_dump_json())
output_fd.write("\n")
output_fd.flush()
asyncio.run(
process_audio_file_with_diarization(
args.source,
event_callback,
only_transcript=args.only_transcript,
source_language=args.source_language,
target_language=args.target_language,
enable_diarization=args.enable_diarization,
diarization_backend=args.diarization_backend,
)
)
if output_fd:
output_fd.close()
logger.info(f"Output written to {args.output}")

View File

@@ -53,7 +53,7 @@ async def run_single_processor(args):
async def event_callback(event: PipelineEvent):
processor = event.processor
# ignore some processor
if processor in ("AudioChunkerAutoProcessor", "AudioMergeProcessor"):
if processor in ("AudioChunkerProcessor", "AudioMergeProcessor"):
return
print(f"Event: {event}")
if output_fd:

View File

@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""
@vibe-generated
Test script for the diarization CLI tool
=========================================
This script helps test the diarization functionality with sample audio files.
"""
import asyncio
import sys
from pathlib import Path
from reflector.logger import logger
async def test_diarization(audio_file: str):
"""Test the diarization functionality"""
# Import the processing function
from process_with_diarization import process_audio_file_with_diarization
# Collect events
events = []
async def event_callback(event):
events.append({"processor": event.processor, "data": event.data})
logger.info(f"Event from {event.processor}")
# Process the audio file
logger.info(f"Processing audio file: {audio_file}")
try:
await process_audio_file_with_diarization(
audio_file,
event_callback,
only_transcript=False,
source_language="en",
target_language="en",
enable_diarization=True,
diarization_backend="modal",
)
# Analyze results
logger.info(f"Processing complete. Received {len(events)} events")
# Look for diarization results
diarized_topics = []
for event in events:
if "TitleSummary" in event["processor"]:
# Check if words have speaker information
if hasattr(event["data"], "transcript") and event["data"].transcript:
words = event["data"].transcript.words
if words and hasattr(words[0], "speaker"):
speakers = set(
w.speaker for w in words if hasattr(w, "speaker")
)
logger.info(
f"Found {len(speakers)} speakers in topic: {event['data'].title}"
)
diarized_topics.append(event["data"])
if diarized_topics:
logger.info(f"Successfully diarized {len(diarized_topics)} topics")
# Print sample output
sample_topic = diarized_topics[0]
logger.info("Sample diarized output:")
for i, word in enumerate(sample_topic.transcript.words[:10]):
logger.info(f" Word {i}: '{word.text}' - Speaker {word.speaker}")
else:
logger.warning("No diarization results found in output")
return events
except Exception as e:
logger.error(f"Error during processing: {e}")
raise
def main():
if len(sys.argv) < 2:
print("Usage: python test_diarization.py <audio_file>")
sys.exit(1)
audio_file = sys.argv[1]
if not Path(audio_file).exists():
print(f"Error: Audio file '{audio_file}' not found")
sys.exit(1)
# Run the test
asyncio.run(test_diarization(audio_file))
if __name__ == "__main__":
main()

View File

@@ -1,20 +0,0 @@
from typing import Annotated
from pydantic import Field, TypeAdapter, constr
NonEmptyStringBase = constr(min_length=1, strip_whitespace=False)
NonEmptyString = Annotated[
NonEmptyStringBase,
Field(description="A non-empty string", min_length=1),
]
non_empty_string_adapter = TypeAdapter(NonEmptyString)
def parse_non_empty_string(s: str) -> NonEmptyString:
return non_empty_string_adapter.validate_python(s)
def try_parse_non_empty_string(s: str) -> NonEmptyString | None:
if not s:
return None
return parse_non_empty_string(s)

View File

@@ -15,7 +15,6 @@ from reflector.db.meetings import meetings_controller
from reflector.db.rooms import rooms_controller
from reflector.settings import settings
from reflector.whereby import create_meeting, upload_logo
from reflector.worker.webhook import test_webhook
logger = logging.getLogger(__name__)
@@ -45,11 +44,6 @@ class Room(BaseModel):
is_shared: bool
class RoomDetails(Room):
webhook_url: str | None
webhook_secret: str | None
class Meeting(BaseModel):
id: str
room_name: str
@@ -70,8 +64,6 @@ class CreateRoom(BaseModel):
recording_type: str
recording_trigger: str
is_shared: bool
webhook_url: str
webhook_secret: str
class UpdateRoom(BaseModel):
@@ -84,26 +76,16 @@ class UpdateRoom(BaseModel):
recording_type: str
recording_trigger: str
is_shared: bool
webhook_url: str
webhook_secret: str
class DeletionStatus(BaseModel):
status: str
class WebhookTestResult(BaseModel):
success: bool
message: str = ""
error: str = ""
status_code: int | None = None
response_preview: str | None = None
@router.get("/rooms", response_model=Page[RoomDetails])
@router.get("/rooms", response_model=Page[Room])
async def rooms_list(
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
) -> list[RoomDetails]:
) -> list[Room]:
if not user and not settings.PUBLIC_MODE:
raise HTTPException(status_code=401, detail="Not authenticated")
@@ -117,18 +99,6 @@ async def rooms_list(
)
@router.get("/rooms/{room_id}", response_model=RoomDetails)
async def rooms_get(
room_id: str,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
user_id = user["sub"] if user else None
room = await rooms_controller.get_by_id_for_http(room_id, user_id=user_id)
if not room:
raise HTTPException(status_code=404, detail="Room not found")
return room
@router.post("/rooms", response_model=Room)
async def rooms_create(
room: CreateRoom,
@@ -147,12 +117,10 @@ async def rooms_create(
recording_type=room.recording_type,
recording_trigger=room.recording_trigger,
is_shared=room.is_shared,
webhook_url=room.webhook_url,
webhook_secret=room.webhook_secret,
)
@router.patch("/rooms/{room_id}", response_model=RoomDetails)
@router.patch("/rooms/{room_id}", response_model=Room)
async def rooms_update(
room_id: str,
info: UpdateRoom,
@@ -197,7 +165,6 @@ async def rooms_create_meeting(
end_date = current_time + timedelta(hours=8)
whereby_meeting = await create_meeting("", end_date=end_date, room=room)
await upload_logo(whereby_meeting["roomName"], "./images/logo.png")
# Now try to save to database
@@ -242,24 +209,3 @@ async def rooms_create_meeting(
meeting.host_room_url = ""
return meeting
@router.post("/rooms/{room_id}/webhook/test", response_model=WebhookTestResult)
async def rooms_test_webhook(
room_id: str,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
"""Test webhook configuration by sending a sample payload."""
user_id = user["sub"] if user else None
room = await rooms_controller.get_by_id(room_id)
if not room:
raise HTTPException(status_code=404, detail="Room not found")
if user_id and room.user_id != user_id:
raise HTTPException(
status_code=403, detail="Not authorized to test this room's webhook"
)
result = await test_webhook(room_id)
return WebhookTestResult(**result)

View File

@@ -5,7 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi_pagination import Page
from fastapi_pagination.ext.databases import apaginate
from jose import jwt
from pydantic import BaseModel, Field, constr, field_serializer
from pydantic import BaseModel, Field, field_serializer
import reflector.auth as auth
from reflector.db import get_database
@@ -19,15 +19,14 @@ from reflector.db.search import (
SearchOffsetBase,
SearchParameters,
SearchQuery,
SearchQueryBase,
SearchResult,
SearchTotal,
search_controller,
search_query_adapter,
)
from reflector.db.transcripts import (
SourceKind,
TranscriptParticipant,
TranscriptStatus,
TranscriptTopic,
transcripts_controller,
)
@@ -64,7 +63,7 @@ class GetTranscriptMinimal(BaseModel):
id: str
user_id: str | None
name: str
status: TranscriptStatus
status: str
locked: bool
duration: float
title: str | None
@@ -97,7 +96,6 @@ class CreateTranscript(BaseModel):
name: str
source_language: str = Field("en")
target_language: str = Field("en")
source_kind: SourceKind | None = None
class UpdateTranscript(BaseModel):
@@ -116,19 +114,7 @@ class DeletionStatus(BaseModel):
status: str
SearchQueryParamBase = constr(min_length=0, strip_whitespace=True)
SearchQueryParam = Annotated[
SearchQueryParamBase, Query(description="Search query text")
]
# http and api standards accept "q="; we would like to handle it as the absence of query, not as "empty string query"
def parse_search_query_param(q: SearchQueryParam) -> SearchQuery | None:
if q == "":
return None
return search_query_adapter.validate_python(q)
SearchQueryParam = Annotated[SearchQueryBase, Query(description="Search query text")]
SearchLimitParam = Annotated[SearchLimitBase, Query(description="Results per page")]
SearchOffsetParam = Annotated[
SearchOffsetBase, Query(description="Number of results to skip")
@@ -138,7 +124,7 @@ SearchOffsetParam = Annotated[
class SearchResponse(BaseModel):
results: list[SearchResult]
total: SearchTotal
query: SearchQuery | None = None
query: SearchQuery
limit: SearchLimit
offset: SearchOffset
@@ -188,7 +174,7 @@ async def transcripts_search(
user_id = user["sub"] if user else None
search_params = SearchParameters(
query_text=parse_search_query_param(q),
query_text=q,
limit=limit,
offset=offset,
user_id=user_id,
@@ -215,7 +201,7 @@ async def transcripts_create(
user_id = user["sub"] if user else None
return await transcripts_controller.add(
info.name,
source_kind=info.source_kind or SourceKind.LIVE,
source_kind=SourceKind.FILE,
source_language=info.source_language,
target_language=info.target_language,
user_id=user_id,

View File

@@ -6,7 +6,7 @@ from pydantic import BaseModel
import reflector.auth as auth
from reflector.db.transcripts import transcripts_controller
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
from reflector.pipelines.main_live_pipeline import task_pipeline_process
router = APIRouter()
@@ -34,13 +34,13 @@ async def transcript_process(
)
if task_is_scheduled_or_active(
"reflector.pipelines.main_file_pipeline.task_pipeline_file_process",
"reflector.pipelines.main_live_pipeline.task_pipeline_process",
transcript_id=transcript_id,
):
return ProcessStatus(status="already running")
# schedule a background task process the file
task_pipeline_file_process.delay(transcript_id=transcript_id)
task_pipeline_process.delay(transcript_id=transcript_id)
return ProcessStatus(status="ok")

View File

@@ -6,7 +6,7 @@ from pydantic import BaseModel
import reflector.auth as auth
from reflector.db.transcripts import transcripts_controller
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
from reflector.pipelines.main_live_pipeline import task_pipeline_process
router = APIRouter()
@@ -92,6 +92,6 @@ async def transcript_record_upload(
await transcripts_controller.update(transcript, {"status": "uploaded"})
# launch a background task to process the file
task_pipeline_file_process.delay(transcript_id=transcript_id)
task_pipeline_process.delay(transcript_id=transcript_id)
return UploadStatus(status="ok")

View File

@@ -19,7 +19,6 @@ else:
"reflector.pipelines.main_live_pipeline",
"reflector.worker.healthcheck",
"reflector.worker.process",
"reflector.worker.cleanup",
]
)
@@ -39,16 +38,6 @@ else:
},
}
if settings.PUBLIC_MODE:
app.conf.beat_schedule["cleanup_old_public_data"] = {
"task": "reflector.worker.cleanup.cleanup_old_public_data_task",
"schedule": crontab(hour=3, minute=0),
}
logger.info(
"Public mode cleanup enabled",
retention_days=settings.PUBLIC_DATA_RETENTION_DAYS,
)
if settings.HEALTHCHECK_URL:
app.conf.beat_schedule["healthcheck_ping"] = {
"task": "reflector.worker.healthcheck.healthcheck_ping",

View File

@@ -1,156 +0,0 @@
"""
Main task for cleanup old public data.
Deletes old anonymous transcripts and their associated meetings/recordings.
Transcripts are the main entry point - any associated data is also removed.
"""
import asyncio
from datetime import datetime, timedelta, timezone
from typing import TypedDict
import structlog
from celery import shared_task
from databases import Database
from pydantic.types import PositiveInt
from reflector.asynctask import asynctask
from reflector.db import get_database
from reflector.db.meetings import meetings
from reflector.db.recordings import recordings
from reflector.db.transcripts import transcripts, transcripts_controller
from reflector.settings import settings
from reflector.storage import get_recordings_storage
logger = structlog.get_logger(__name__)
class CleanupStats(TypedDict):
"""Statistics for cleanup operation."""
transcripts_deleted: int
meetings_deleted: int
recordings_deleted: int
errors: list[str]
async def delete_single_transcript(
db: Database, transcript_data: dict, stats: CleanupStats
):
transcript_id = transcript_data["id"]
meeting_id = transcript_data["meeting_id"]
recording_id = transcript_data["recording_id"]
try:
async with db.transaction(isolation="serializable"):
if meeting_id:
await db.execute(meetings.delete().where(meetings.c.id == meeting_id))
stats["meetings_deleted"] += 1
logger.info("Deleted associated meeting", meeting_id=meeting_id)
if recording_id:
recording = await db.fetch_one(
recordings.select().where(recordings.c.id == recording_id)
)
if recording:
try:
await get_recordings_storage().delete_file(
recording["object_key"]
)
except Exception as storage_error:
logger.warning(
"Failed to delete recording from storage",
recording_id=recording_id,
object_key=recording["object_key"],
error=str(storage_error),
)
await db.execute(
recordings.delete().where(recordings.c.id == recording_id)
)
stats["recordings_deleted"] += 1
logger.info(
"Deleted associated recording", recording_id=recording_id
)
await transcripts_controller.remove_by_id(transcript_id)
stats["transcripts_deleted"] += 1
logger.info(
"Deleted transcript",
transcript_id=transcript_id,
created_at=transcript_data["created_at"].isoformat(),
)
except Exception as e:
error_msg = f"Failed to delete transcript {transcript_id}: {str(e)}"
logger.error(error_msg, exc_info=e)
stats["errors"].append(error_msg)
async def cleanup_old_transcripts(
db: Database, cutoff_date: datetime, stats: CleanupStats
):
"""Delete old anonymous transcripts and their associated recordings/meetings."""
query = transcripts.select().where(
(transcripts.c.created_at < cutoff_date) & (transcripts.c.user_id.is_(None))
)
old_transcripts = await db.fetch_all(query)
logger.info(f"Found {len(old_transcripts)} old transcripts to delete")
for transcript_data in old_transcripts:
await delete_single_transcript(db, transcript_data, stats)
def log_cleanup_results(stats: CleanupStats):
logger.info(
"Cleanup completed",
transcripts_deleted=stats["transcripts_deleted"],
meetings_deleted=stats["meetings_deleted"],
recordings_deleted=stats["recordings_deleted"],
errors_count=len(stats["errors"]),
)
if stats["errors"]:
logger.warning(
"Cleanup completed with errors",
errors=stats["errors"][:10],
)
async def cleanup_old_public_data(
days: PositiveInt | None = None,
) -> CleanupStats | None:
if days is None:
days = settings.PUBLIC_DATA_RETENTION_DAYS
if not settings.PUBLIC_MODE:
logger.info("Skipping cleanup - not a public instance")
return None
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days)
logger.info(
"Starting cleanup of old public data",
cutoff_date=cutoff_date.isoformat(),
)
stats: CleanupStats = {
"transcripts_deleted": 0,
"meetings_deleted": 0,
"recordings_deleted": 0,
"errors": [],
}
db = get_database()
await cleanup_old_transcripts(db, cutoff_date, stats)
log_cleanup_results(stats)
return stats
@shared_task(
autoretry_for=(Exception,),
retry_kwargs={"max_retries": 3, "countdown": 300},
)
@asynctask
def cleanup_old_public_data_task(days: int | None = None):
asyncio.run(cleanup_old_public_data(days=days))

View File

@@ -1,258 +0,0 @@
"""Webhook task for sending transcript notifications."""
import hashlib
import hmac
import json
import uuid
from datetime import datetime, timezone
import httpx
import structlog
from celery import shared_task
from celery.utils.log import get_task_logger
from reflector.db.rooms import rooms_controller
from reflector.db.transcripts import transcripts_controller
from reflector.pipelines.main_live_pipeline import asynctask
from reflector.settings import settings
from reflector.utils.webvtt import topics_to_webvtt
logger = structlog.wrap_logger(get_task_logger(__name__))
def generate_webhook_signature(payload: bytes, secret: str, timestamp: str) -> str:
"""Generate HMAC signature for webhook payload."""
signed_payload = f"{timestamp}.{payload.decode('utf-8')}"
hmac_obj = hmac.new(
secret.encode("utf-8"),
signed_payload.encode("utf-8"),
hashlib.sha256,
)
return hmac_obj.hexdigest()
@shared_task(
bind=True,
max_retries=30,
default_retry_delay=60,
retry_backoff=True,
retry_backoff_max=3600, # Max 1 hour between retries
)
@asynctask
async def send_transcript_webhook(
self,
transcript_id: str,
room_id: str,
event_id: str,
):
log = logger.bind(
transcript_id=transcript_id,
room_id=room_id,
retry_count=self.request.retries,
)
try:
# Fetch transcript and room
transcript = await transcripts_controller.get_by_id(transcript_id)
if not transcript:
log.error("Transcript not found, skipping webhook")
return
room = await rooms_controller.get_by_id(room_id)
if not room:
log.error("Room not found, skipping webhook")
return
if not room.webhook_url:
log.info("No webhook URL configured for room, skipping")
return
# Generate WebVTT content from topics
topics_data = []
if transcript.topics:
# Build topics data with diarized content per topic
for topic in transcript.topics:
topic_webvtt = topics_to_webvtt([topic]) if topic.words else ""
topics_data.append(
{
"title": topic.title,
"summary": topic.summary,
"timestamp": topic.timestamp,
"duration": topic.duration,
"webvtt": topic_webvtt,
}
)
# Build webhook payload
frontend_url = f"{settings.UI_BASE_URL}/transcripts/{transcript.id}"
participants = [
{"id": p.id, "name": p.name, "speaker": p.speaker}
for p in (transcript.participants or [])
]
payload_data = {
"event": "transcript.completed",
"event_id": event_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"transcript": {
"id": transcript.id,
"room_id": transcript.room_id,
"created_at": transcript.created_at.isoformat(),
"duration": transcript.duration,
"title": transcript.title,
"short_summary": transcript.short_summary,
"long_summary": transcript.long_summary,
"webvtt": transcript.webvtt,
"topics": topics_data,
"participants": participants,
"source_language": transcript.source_language,
"target_language": transcript.target_language,
"status": transcript.status,
"frontend_url": frontend_url,
},
"room": {
"id": room.id,
"name": room.name,
},
}
# Convert to JSON
payload_json = json.dumps(payload_data, separators=(",", ":"))
payload_bytes = payload_json.encode("utf-8")
# Generate signature if secret is configured
headers = {
"Content-Type": "application/json",
"User-Agent": "Reflector-Webhook/1.0",
"X-Webhook-Event": "transcript.completed",
"X-Webhook-Retry": str(self.request.retries),
}
if room.webhook_secret:
timestamp = str(int(datetime.now(timezone.utc).timestamp()))
signature = generate_webhook_signature(
payload_bytes, room.webhook_secret, timestamp
)
headers["X-Webhook-Signature"] = f"t={timestamp},v1={signature}"
# Send webhook with timeout
async with httpx.AsyncClient(timeout=30.0) as client:
log.info(
"Sending webhook",
url=room.webhook_url,
payload_size=len(payload_bytes),
)
response = await client.post(
room.webhook_url,
content=payload_bytes,
headers=headers,
)
response.raise_for_status()
log.info(
"Webhook sent successfully",
status_code=response.status_code,
response_size=len(response.content),
)
except httpx.HTTPStatusError as e:
log.error(
"Webhook failed with HTTP error",
status_code=e.response.status_code,
response_text=e.response.text[:500], # First 500 chars
)
# Don't retry on client errors (4xx)
if 400 <= e.response.status_code < 500:
log.error("Client error, not retrying")
return
# Retry on server errors (5xx)
raise self.retry(exc=e)
except (httpx.ConnectError, httpx.TimeoutException) as e:
# Retry on network errors
log.error("Webhook failed with connection error", error=str(e))
raise self.retry(exc=e)
except Exception as e:
# Retry on unexpected errors
log.exception("Unexpected error in webhook task", error=str(e))
raise self.retry(exc=e)
async def test_webhook(room_id: str) -> dict:
"""
Test webhook configuration by sending a sample payload.
Returns immediately with success/failure status.
This is the shared implementation used by both the API endpoint and Celery task.
"""
try:
room = await rooms_controller.get_by_id(room_id)
if not room:
return {"success": False, "error": "Room not found"}
if not room.webhook_url:
return {"success": False, "error": "No webhook URL configured"}
now = (datetime.now(timezone.utc).isoformat(),)
payload_data = {
"event": "test",
"event_id": uuid.uuid4().hex,
"timestamp": now,
"message": "This is a test webhook from Reflector",
"room": {
"id": room.id,
"name": room.name,
},
}
payload_json = json.dumps(payload_data, separators=(",", ":"))
payload_bytes = payload_json.encode("utf-8")
# Generate headers with signature
headers = {
"Content-Type": "application/json",
"User-Agent": "Reflector-Webhook/1.0",
"X-Webhook-Event": "test",
}
if room.webhook_secret:
timestamp = str(int(datetime.now(timezone.utc).timestamp()))
signature = generate_webhook_signature(
payload_bytes, room.webhook_secret, timestamp
)
headers["X-Webhook-Signature"] = f"t={timestamp},v1={signature}"
# Send test webhook with short timeout
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.post(
room.webhook_url,
content=payload_bytes,
headers=headers,
)
return {
"success": response.is_success,
"status_code": response.status_code,
"message": f"Webhook test {'successful' if response.is_success else 'failed'}",
"response_preview": response.text if response.text else None,
}
except httpx.TimeoutException:
return {
"success": False,
"error": "Webhook request timed out (10 seconds)",
}
except httpx.ConnectError as e:
return {
"success": False,
"error": f"Could not connect to webhook URL: {str(e)}",
}
except Exception as e:
return {
"success": False,
"error": f"Unexpected error: {str(e)}",
}

View File

@@ -2,7 +2,7 @@
if [ "${ENTRYPOINT}" = "server" ]; then
uv run alembic upgrade head
uv run uvicorn reflector.app:app --host 0.0.0.0 --port 1250
uv run -m reflector.app
elif [ "${ENTRYPOINT}" = "worker" ]; then
uv run celery -A reflector.worker.app worker --loglevel=info
elif [ "${ENTRYPOINT}" = "beat" ]; then

View File

@@ -178,63 +178,6 @@ async def dummy_diarization():
yield
@pytest.fixture
async def dummy_file_transcript():
from reflector.processors.file_transcript import FileTranscriptProcessor
from reflector.processors.types import Transcript, Word
class TestFileTranscriptProcessor(FileTranscriptProcessor):
async def _transcript(self, data):
return Transcript(
text="Hello world. How are you today?",
words=[
Word(start=0.0, end=0.5, text="Hello", speaker=0),
Word(start=0.5, end=0.6, text=" ", speaker=0),
Word(start=0.6, end=1.0, text="world", speaker=0),
Word(start=1.0, end=1.1, text=".", speaker=0),
Word(start=1.1, end=1.2, text=" ", speaker=0),
Word(start=1.2, end=1.5, text="How", speaker=0),
Word(start=1.5, end=1.6, text=" ", speaker=0),
Word(start=1.6, end=1.8, text="are", speaker=0),
Word(start=1.8, end=1.9, text=" ", speaker=0),
Word(start=1.9, end=2.1, text="you", speaker=0),
Word(start=2.1, end=2.2, text=" ", speaker=0),
Word(start=2.2, end=2.5, text="today", speaker=0),
Word(start=2.5, end=2.6, text="?", speaker=0),
],
)
with patch(
"reflector.processors.file_transcript_auto.FileTranscriptAutoProcessor.__new__"
) as mock_auto:
mock_auto.return_value = TestFileTranscriptProcessor()
yield
@pytest.fixture
async def dummy_file_diarization():
from reflector.processors.file_diarization import (
FileDiarizationOutput,
FileDiarizationProcessor,
)
from reflector.processors.types import DiarizationSegment
class TestFileDiarizationProcessor(FileDiarizationProcessor):
async def _diarize(self, data):
return FileDiarizationOutput(
diarization=[
DiarizationSegment(start=0.0, end=1.1, speaker=0),
DiarizationSegment(start=1.2, end=2.6, speaker=1),
]
)
with patch(
"reflector.processors.file_diarization_auto.FileDiarizationAutoProcessor.__new__"
) as mock_auto:
mock_auto.return_value = TestFileDiarizationProcessor()
yield
@pytest.fixture
async def dummy_transcript_translator():
from reflector.processors.transcript_translator import TranscriptTranslatorProcessor
@@ -295,13 +238,9 @@ async def dummy_storage():
with (
patch("reflector.storage.base.Storage.get_instance") as mock_storage,
patch("reflector.storage.get_transcripts_storage") as mock_get_transcripts,
patch(
"reflector.pipelines.main_file_pipeline.get_transcripts_storage"
) as mock_get_transcripts2,
):
mock_storage.return_value = dummy
mock_get_transcripts.return_value = dummy
mock_get_transcripts2.return_value = dummy
yield
@@ -321,10 +260,7 @@ def celery_config():
@pytest.fixture(scope="session")
def celery_includes():
return [
"reflector.pipelines.main_live_pipeline",
"reflector.pipelines.main_file_pipeline",
]
return ["reflector.pipelines.main_live_pipeline"]
@pytest.fixture
@@ -366,7 +302,7 @@ async def fake_transcript_with_topics(tmpdir, client):
transcript = await transcripts_controller.get_by_id(tid)
assert transcript is not None
await transcripts_controller.update(transcript, {"status": "ended"})
await transcripts_controller.update(transcript, {"status": "finished"})
# manually copy a file at the expected location
audio_filename = transcript.audio_mp3_filename

View File

@@ -1,287 +0,0 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, patch
import pytest
from reflector.db.recordings import Recording, recordings_controller
from reflector.db.transcripts import SourceKind, transcripts_controller
from reflector.worker.cleanup import cleanup_old_public_data
@pytest.mark.asyncio
async def test_cleanup_old_public_data_skips_when_not_public():
"""Test that cleanup is skipped when PUBLIC_MODE is False."""
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = False
result = await cleanup_old_public_data()
# Should return early without doing anything
assert result is None
@pytest.mark.asyncio
async def test_cleanup_old_public_data_deletes_old_anonymous_transcripts():
"""Test that old anonymous transcripts are deleted."""
# Create old and new anonymous transcripts
old_date = datetime.now(timezone.utc) - timedelta(days=8)
new_date = datetime.now(timezone.utc) - timedelta(days=2)
# Create old anonymous transcript (should be deleted)
old_transcript = await transcripts_controller.add(
name="Old Anonymous Transcript",
source_kind=SourceKind.FILE,
user_id=None, # Anonymous
)
# Manually update created_at to be old
from reflector.db import get_database
from reflector.db.transcripts import transcripts
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == old_transcript.id)
.values(created_at=old_date)
)
# Create new anonymous transcript (should NOT be deleted)
new_transcript = await transcripts_controller.add(
name="New Anonymous Transcript",
source_kind=SourceKind.FILE,
user_id=None, # Anonymous
)
# Create old transcript with user (should NOT be deleted)
old_user_transcript = await transcripts_controller.add(
name="Old User Transcript",
source_kind=SourceKind.FILE,
user_id="user123",
)
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == old_user_transcript.id)
.values(created_at=old_date)
)
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock the storage deletion
with patch("reflector.db.transcripts.get_transcripts_storage") as mock_storage:
mock_storage.return_value.delete_file = AsyncMock()
result = await cleanup_old_public_data()
# Check results
assert result["transcripts_deleted"] == 1
assert result["errors"] == []
# Verify old anonymous transcript was deleted
assert await transcripts_controller.get_by_id(old_transcript.id) is None
# Verify new anonymous transcript still exists
assert await transcripts_controller.get_by_id(new_transcript.id) is not None
# Verify user transcript still exists
assert await transcripts_controller.get_by_id(old_user_transcript.id) is not None
@pytest.mark.asyncio
async def test_cleanup_deletes_associated_meeting_and_recording():
"""Test that meetings and recordings associated with old transcripts are deleted."""
from reflector.db import get_database
from reflector.db.meetings import meetings
from reflector.db.transcripts import transcripts
old_date = datetime.now(timezone.utc) - timedelta(days=8)
# Create a meeting
meeting_id = "test-meeting-for-transcript"
await get_database().execute(
meetings.insert().values(
id=meeting_id,
room_name="Meeting with Transcript",
room_url="https://example.com/meeting",
host_room_url="https://example.com/meeting-host",
start_date=old_date,
end_date=old_date + timedelta(hours=1),
user_id=None,
room_id=None,
)
)
# Create a recording
recording = await recordings_controller.create(
Recording(
bucket_name="test-bucket",
object_key="test-recording.mp4",
recorded_at=old_date,
)
)
# Create an old transcript with both meeting and recording
old_transcript = await transcripts_controller.add(
name="Old Transcript with Meeting and Recording",
source_kind=SourceKind.ROOM,
user_id=None,
meeting_id=meeting_id,
recording_id=recording.id,
)
# Update created_at to be old
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == old_transcript.id)
.values(created_at=old_date)
)
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock storage deletion
with patch("reflector.db.transcripts.get_transcripts_storage") as mock_storage:
mock_storage.return_value.delete_file = AsyncMock()
with patch(
"reflector.worker.cleanup.get_recordings_storage"
) as mock_rec_storage:
mock_rec_storage.return_value.delete_file = AsyncMock()
result = await cleanup_old_public_data()
# Check results
assert result["transcripts_deleted"] == 1
assert result["meetings_deleted"] == 1
assert result["recordings_deleted"] == 1
assert result["errors"] == []
# Verify transcript was deleted
assert await transcripts_controller.get_by_id(old_transcript.id) is None
# Verify meeting was deleted
query = meetings.select().where(meetings.c.id == meeting_id)
meeting_result = await get_database().fetch_one(query)
assert meeting_result is None
# Verify recording was deleted
assert await recordings_controller.get_by_id(recording.id) is None
@pytest.mark.asyncio
async def test_cleanup_handles_errors_gracefully():
"""Test that cleanup continues even when individual deletions fail."""
old_date = datetime.now(timezone.utc) - timedelta(days=8)
# Create multiple old transcripts
transcript1 = await transcripts_controller.add(
name="Transcript 1",
source_kind=SourceKind.FILE,
user_id=None,
)
transcript2 = await transcripts_controller.add(
name="Transcript 2",
source_kind=SourceKind.FILE,
user_id=None,
)
# Update created_at to be old
from reflector.db import get_database
from reflector.db.transcripts import transcripts
for t_id in [transcript1.id, transcript2.id]:
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == t_id)
.values(created_at=old_date)
)
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock remove_by_id to fail for the first transcript
original_remove = transcripts_controller.remove_by_id
call_count = 0
async def mock_remove_by_id(transcript_id, user_id=None):
nonlocal call_count
call_count += 1
if call_count == 1:
raise Exception("Simulated deletion error")
return await original_remove(transcript_id, user_id)
with patch.object(
transcripts_controller, "remove_by_id", side_effect=mock_remove_by_id
):
result = await cleanup_old_public_data()
# Should have one successful deletion and one error
assert result["transcripts_deleted"] == 1
assert len(result["errors"]) == 1
assert "Failed to delete transcript" in result["errors"][0]
@pytest.mark.asyncio
async def test_meeting_consent_cascade_delete():
"""Test that meeting_consent records are automatically deleted when meeting is deleted."""
from reflector.db import get_database
from reflector.db.meetings import (
meeting_consent,
meeting_consent_controller,
meetings,
)
# Create a meeting
meeting_id = "test-cascade-meeting"
await get_database().execute(
meetings.insert().values(
id=meeting_id,
room_name="Test Meeting for CASCADE",
room_url="https://example.com/cascade-test",
host_room_url="https://example.com/cascade-test-host",
start_date=datetime.now(timezone.utc),
end_date=datetime.now(timezone.utc) + timedelta(hours=1),
user_id="test-user",
room_id=None,
)
)
# Create consent records for this meeting
consent1_id = "consent-1"
consent2_id = "consent-2"
await get_database().execute(
meeting_consent.insert().values(
id=consent1_id,
meeting_id=meeting_id,
user_id="user1",
consent_given=True,
consent_timestamp=datetime.now(timezone.utc),
)
)
await get_database().execute(
meeting_consent.insert().values(
id=consent2_id,
meeting_id=meeting_id,
user_id="user2",
consent_given=False,
consent_timestamp=datetime.now(timezone.utc),
)
)
# Verify consent records exist
consents = await meeting_consent_controller.get_by_meeting_id(meeting_id)
assert len(consents) == 2
# Delete the meeting
await get_database().execute(meetings.delete().where(meetings.c.id == meeting_id))
# Verify meeting is deleted
query = meetings.select().where(meetings.c.id == meeting_id)
result = await get_database().fetch_one(query)
assert result is None
# Verify consent records are automatically deleted (CASCADE DELETE)
consents_after = await meeting_consent_controller.get_by_meeting_id(meeting_id)
assert len(consents_after) == 0

View File

@@ -272,9 +272,6 @@ class TestGPUModalTranscript:
for f in temp_files:
Path(f).unlink(missing_ok=True)
@pytest.mark.skipif(
not "parakeet" in get_model_name(), reason="Parakeet only supports English"
)
def test_transcriptions_error_handling(self):
"""Test error handling for invalid requests."""
url = get_modal_transcript_url()

View File

@@ -0,0 +1,61 @@
import pytest
@pytest.mark.asyncio
@pytest.mark.parametrize("enable_diarization", [False, True])
async def test_basic_process(
dummy_transcript,
dummy_llm,
dummy_processors,
enable_diarization,
dummy_diarization,
):
# goal is to start the server, and send rtc audio to it
# validate the events received
from pathlib import Path
from reflector.settings import settings
from reflector.tools.process import process_audio_file
# LLM_BACKEND no longer exists in settings
# settings.LLM_BACKEND = "test"
settings.TRANSCRIPT_BACKEND = "whisper"
# event callback
marks = {}
async def event_callback(event):
if event.processor not in marks:
marks[event.processor] = 0
marks[event.processor] += 1
# invoke the process and capture events
path = Path(__file__).parent / "records" / "test_mathieu_hello.wav"
if enable_diarization:
# Test with diarization - may fail if pyannote.audio is not installed
try:
await process_audio_file(
path.as_posix(), event_callback, enable_diarization=True
)
except SystemExit:
pytest.skip("pyannote.audio not installed - skipping diarization test")
else:
# Test without diarization - should always work
await process_audio_file(
path.as_posix(), event_callback, enable_diarization=False
)
print(f"Diarization: {enable_diarization}, Marks: {marks}")
# validate the events
# Each processor should be called for each audio segment processed
# The final processors (Topic, Title, Summary) should be called once at the end
assert marks["TranscriptLinerProcessor"] > 0
assert marks["TranscriptTranslatorPassthroughProcessor"] > 0
assert marks["TranscriptTopicDetectorProcessor"] == 1
assert marks["TranscriptFinalSummaryProcessor"] == 1
assert marks["TranscriptFinalTitleProcessor"] == 1
if enable_diarization:
assert marks["TestAudioDiarizationProcessor"] == 1

View File

@@ -23,7 +23,7 @@ async def test_search_postgresql_only():
assert results == []
assert total == 0
params_empty = SearchParameters(query_text=None)
params_empty = SearchParameters(query_text="")
results_empty, total_empty = await search_controller.search_transcripts(
params_empty
)
@@ -34,7 +34,7 @@ async def test_search_postgresql_only():
@pytest.mark.asyncio
async def test_search_with_empty_query():
"""Test that empty query returns all transcripts."""
params = SearchParameters(query_text=None)
params = SearchParameters(query_text="")
results, total = await search_controller.search_transcripts(params)
assert isinstance(results, list)

View File

@@ -1,7 +1,5 @@
"""Unit tests for search snippet generation."""
import pytest
from reflector.db.search import (
SnippetCandidate,
SnippetGenerator,
@@ -514,9 +512,11 @@ data visualization and data storage"""
)
assert webvtt_count == 3
# combine_sources requires at least one source to be present
with pytest.raises(AssertionError, match="At least one source must be present"):
SnippetGenerator.combine_sources(None, None, "data", max_total=3)
snippets_empty, count_empty = SnippetGenerator.combine_sources(
None, None, "data", max_total=3
)
assert snippets_empty == []
assert count_empty == 0
def test_edge_cases(self):
"""Test edge cases for the pure functions."""

View File

@@ -19,7 +19,7 @@ async def fake_transcript(tmpdir, client):
transcript = await transcripts_controller.get_by_id(tid)
assert transcript is not None
await transcripts_controller.update(transcript, {"status": "ended"})
await transcripts_controller.update(transcript, {"status": "finished"})
# manually copy a file at the expected location
audio_filename = transcript.audio_mp3_filename

View File

@@ -29,10 +29,10 @@ async def client(app_lifespan):
@pytest.mark.asyncio
async def test_transcript_process(
tmpdir,
whisper_transcript,
dummy_llm,
dummy_processors,
dummy_file_transcript,
dummy_file_diarization,
dummy_diarization,
dummy_storage,
client,
):
@@ -56,8 +56,8 @@ async def test_transcript_process(
assert response.status_code == 200
assert response.json()["status"] == "ok"
# wait for processing to finish (max 1 minute)
timeout_seconds = 60
# wait for processing to finish (max 10 minutes)
timeout_seconds = 600 # 10 minutes
start_time = time.monotonic()
while (time.monotonic() - start_time) < timeout_seconds:
# fetch the transcript and check if it is ended
@@ -75,10 +75,9 @@ async def test_transcript_process(
)
assert response.status_code == 200
assert response.json()["status"] == "ok"
await asyncio.sleep(2)
# wait for processing to finish (max 1 minute)
timeout_seconds = 60
# wait for processing to finish (max 10 minutes)
timeout_seconds = 600 # 10 minutes
start_time = time.monotonic()
while (time.monotonic() - start_time) < timeout_seconds:
# fetch the transcript and check if it is ended
@@ -100,4 +99,4 @@ async def test_transcript_process(
response = await client.get(f"/transcripts/{tid}/topics")
assert response.status_code == 200
assert len(response.json()) == 1
assert "Hello world. How are you today?" in response.json()[0]["transcript"]
assert "want to share" in response.json()[0]["transcript"]

View File

@@ -12,8 +12,7 @@ async def test_transcript_upload_file(
tmpdir,
dummy_llm,
dummy_processors,
dummy_file_transcript,
dummy_file_diarization,
dummy_diarization,
dummy_storage,
client,
):
@@ -37,8 +36,8 @@ async def test_transcript_upload_file(
assert response.status_code == 200
assert response.json()["status"] == "ok"
# wait the processing to finish (max 1 minute)
timeout_seconds = 60
# wait the processing to finish (max 10 minutes)
timeout_seconds = 600 # 10 minutes
start_time = time.monotonic()
while (time.monotonic() - start_time) < timeout_seconds:
# fetch the transcript and check if it is ended
@@ -48,7 +47,7 @@ async def test_transcript_upload_file(
break
await asyncio.sleep(1)
else:
return pytest.fail(f"Processing timed out after {timeout_seconds} seconds")
pytest.fail(f"Processing timed out after {timeout_seconds} seconds")
# check the transcript is ended
transcript = resp.json()
@@ -60,4 +59,4 @@ async def test_transcript_upload_file(
response = await client.get(f"/transcripts/{tid}/topics")
assert response.status_code == 200
assert len(response.json()) == 1
assert "Hello world. How are you today?" in response.json()[0]["transcript"]
assert "want to share" in response.json()[0]["transcript"]

View File

@@ -1,30 +0,0 @@
"use client";
import { Flex, Spinner } from "@chakra-ui/react";
import { useAuth } from "../lib/AuthProvider";
import { useLoginRequiredPages } from "../lib/useLoginRequiredPages";
export default function AuthWrapper({
children,
}: {
children: React.ReactNode;
}) {
const auth = useAuth();
const redirectPath = useLoginRequiredPages();
const redirectHappens = !!redirectPath;
if (auth.status === "loading" || redirectHappens) {
return (
<Flex
flexDir="column"
alignItems="center"
justifyContent="center"
h="calc(100vh - 80px)" // Account for header height
>
<Spinner size="xl" color="blue.500" />
</Flex>
);
}
return <>{children}</>;
}

View File

@@ -1,10 +1,7 @@
import React from "react";
import { Box, Stack, Link, Heading } from "@chakra-ui/react";
import NextLink from "next/link";
import type { components } from "../../../reflector-api";
type Room = components["schemas"]["Room"];
type SourceKind = components["schemas"]["SourceKind"];
import { Room, SourceKind } from "../../../api";
interface FilterSidebarProps {
rooms: Room[];
@@ -75,7 +72,7 @@ export default function FilterSidebar({
key={room.id}
as={NextLink}
href="#"
onClick={() => onFilterChange("room" as SourceKind, room.id)}
onClick={() => onFilterChange("room", room.id)}
color={
selectedSourceKind === "room" && selectedRoomId === room.id
? "blue.500"

View File

@@ -18,10 +18,7 @@ import {
highlightMatches,
generateTextFragment,
} from "../../../lib/textHighlight";
import type { components } from "../../../reflector-api";
type SearchResult = components["schemas"]["SearchResult"];
type SourceKind = components["schemas"]["SourceKind"];
import { SearchResult } from "../../../api";
interface TranscriptCardsProps {
results: SearchResult[];
@@ -123,7 +120,7 @@ function TranscriptCard({
: "N/A";
const formattedDate = formatLocalDate(result.created_at);
const source =
result.source_kind === ("room" as SourceKind)
result.source_kind === "room"
? result.room_name || result.room_id
: result.source_kind;

View File

@@ -19,33 +19,37 @@ import {
parseAsStringLiteral,
} from "nuqs";
import { LuX } from "react-icons/lu";
import type { components } from "../../reflector-api";
type Room = components["schemas"]["Room"];
type SourceKind = components["schemas"]["SourceKind"];
type SearchResult = components["schemas"]["SearchResult"];
import {
useRoomsList,
useTranscriptsSearch,
useTranscriptDelete,
useTranscriptProcess,
} from "../../lib/apiHooks";
import { useSearchTranscripts } from "../transcripts/useSearchTranscripts";
import useSessionUser from "../../lib/useSessionUser";
import { Room, SourceKind, SearchResult, $SourceKind } from "../../api";
import useApi from "../../lib/useApi";
import { useError } from "../../(errors)/errorContext";
import FilterSidebar from "./_components/FilterSidebar";
import Pagination, {
FIRST_PAGE,
PaginationPage,
parsePaginationPage,
totalPages as getTotalPages,
paginationPageTo0Based,
} from "./_components/Pagination";
import TranscriptCards from "./_components/TranscriptCards";
import DeleteTranscriptDialog from "./_components/DeleteTranscriptDialog";
import { formatLocalDate } from "../../lib/time";
import { RECORD_A_MEETING_URL } from "../../api/urls";
import { useUserName } from "../../lib/useUserName";
const SEARCH_FORM_QUERY_INPUT_NAME = "query" as const;
const usePrefetchRooms = (setRooms: (rooms: Room[]) => void): void => {
const { setError } = useError();
const api = useApi();
useEffect(() => {
if (!api) return;
api
.v1RoomsList({ page: 1 })
.then((rooms) => setRooms(rooms.items))
.catch((err) => setError(err, "There was an error fetching the rooms"));
}, [api, setError]);
};
const SearchForm: React.FC<{
setPage: (page: PaginationPage) => void;
sourceKind: SourceKind | null;
@@ -65,6 +69,7 @@ const SearchForm: React.FC<{
searchQuery,
setSearchQuery,
}) => {
// to keep the search input controllable + more fine grained control (urlSearchQuery is updated on submits)
const [searchInputValue, setSearchInputValue] = useState(searchQuery || "");
const handleSearchQuerySubmit = async (d: FormData) => {
await setSearchQuery((d.get(SEARCH_FORM_QUERY_INPUT_NAME) as string) || "");
@@ -158,6 +163,7 @@ const UnderSearchFormFilterIndicators: React.FC<{
p="1px"
onClick={() => {
setSourceKind(null);
// TODO questionable
setRoomId(null);
}}
_hover={{ bg: "blue.200" }}
@@ -203,11 +209,7 @@ export default function TranscriptBrowser() {
const [urlSourceKind, setUrlSourceKind] = useQueryState(
"source",
parseAsStringLiteral([
"room",
"live",
"file",
] as const satisfies SourceKind[]).withOptions({
parseAsStringLiteral($SourceKind.enum).withOptions({
shallow: false,
}),
);
@@ -227,40 +229,46 @@ export default function TranscriptBrowser() {
useEffect(() => {
const maybePage = parsePaginationPage(urlPage);
if ("error" in maybePage) {
setPage(FIRST_PAGE).then(() => {});
setPage(FIRST_PAGE).then(() => {
/*may be called n times we dont care*/
});
return;
}
_setSafePage(maybePage.value);
}, [urlPage]);
const [rooms, setRooms] = useState<Room[]>([]);
const pageSize = 20;
const {
data: searchData,
isLoading: searchLoading,
refetch: reloadSearch,
} = useTranscriptsSearch(urlSearchQuery, {
limit: pageSize,
offset: paginationPageTo0Based(page) * pageSize,
room_id: urlRoomId || undefined,
source_kind: urlSourceKind || undefined,
});
const results = searchData?.results || [];
const totalResults = searchData?.total || 0;
// Fetch rooms
const { data: roomsData } = useRoomsList(1);
const rooms = roomsData?.items || [];
results,
totalCount: totalResults,
isLoading,
reload,
} = useSearchTranscripts(
urlSearchQuery,
{
roomIds: urlRoomId ? [urlRoomId] : null,
sourceKind: urlSourceKind,
},
{
pageSize,
page,
},
);
const totalPages = getTotalPages(totalResults, pageSize);
const userName = useUserName();
const userName = useSessionUser().name;
const [deletionLoading, setDeletionLoading] = useState(false);
const api = useApi();
const { setError } = useError();
const cancelRef = React.useRef(null);
const [transcriptToDeleteId, setTranscriptToDeleteId] =
React.useState<string>();
usePrefetchRooms(setRooms);
const handleFilterTranscripts = (
sourceKind: SourceKind | null,
roomId: string,
@@ -272,37 +280,44 @@ export default function TranscriptBrowser() {
const onCloseDeletion = () => setTranscriptToDeleteId(undefined);
const deleteTranscript = useTranscriptDelete();
const processTranscript = useTranscriptProcess();
const confirmDeleteTranscript = (transcriptId: string) => {
if (deletionLoading) return;
if (!api || deletionLoading) return;
setDeletionLoading(true);
deleteTranscript.mutate(
{
params: {
path: { transcript_id: transcriptId },
},
},
{
onSuccess: () => {
setDeletionLoading(false);
onCloseDeletion();
reloadSearch();
},
onError: () => {
setDeletionLoading(false);
},
},
);
api
.v1TranscriptDelete({ transcriptId })
.then(() => {
setDeletionLoading(false);
onCloseDeletion();
reload();
})
.catch((err) => {
setDeletionLoading(false);
setError(err, "There was an error deleting the transcript");
});
};
const handleProcessTranscript = (transcriptId: string) => {
processTranscript.mutate({
params: {
path: { transcript_id: transcriptId },
},
});
if (!api) {
console.error("API not available on handleProcessTranscript");
return;
}
api
.v1TranscriptProcess({ transcriptId })
.then((result) => {
const status =
result && typeof result === "object" && "status" in result
? (result as { status: string }).status
: undefined;
if (status === "already running") {
setError(
new Error("Processing is already running, please wait"),
"Processing is already running, please wait",
);
}
})
.catch((err) => {
setError(err, "There was an error processing the transcript");
});
};
const transcriptToDelete = results?.find(
@@ -317,7 +332,7 @@ export default function TranscriptBrowser() {
? transcriptToDelete.room_name || transcriptToDelete.room_id
: transcriptToDelete?.source_kind;
if (searchLoading && results.length === 0) {
if (isLoading && results.length === 0) {
return (
<Flex
flexDir="column"
@@ -345,7 +360,7 @@ export default function TranscriptBrowser() {
>
<Heading size="lg">
{userName ? `${userName}'s Transcriptions` : "Your Transcriptions"}{" "}
{(searchLoading || deletionLoading) && <Spinner size="sm" />}
{(isLoading || deletionLoading) && <Spinner size="sm" />}
</Heading>
</Flex>
@@ -388,12 +403,12 @@ export default function TranscriptBrowser() {
<TranscriptCards
results={results}
query={urlSearchQuery}
isLoading={searchLoading}
isLoading={isLoading}
onDelete={setTranscriptToDeleteId}
onReprocess={handleProcessTranscript}
/>
{!searchLoading && results.length === 0 && (
{!isLoading && results.length === 0 && (
<EmptyResult searchQuery={urlSearchQuery} />
)}
</Flex>

View File

@@ -2,8 +2,9 @@ import { Container, Flex, Link } from "@chakra-ui/react";
import { getConfig } from "../lib/edgeConfig";
import NextLink from "next/link";
import Image from "next/image";
import About from "../(aboutAndPrivacy)/about";
import Privacy from "../(aboutAndPrivacy)/privacy";
import UserInfo from "../(auth)/userInfo";
import AuthWrapper from "./AuthWrapper";
import { RECORD_A_MEETING_URL } from "../api/urls";
export default async function AppLayout({
@@ -89,7 +90,7 @@ export default async function AppLayout({
</div>
</Flex>
<AuthWrapper>{children}</AuthWrapper>
{children}
</Container>
);
}

View File

@@ -12,9 +12,7 @@ import {
HStack,
} from "@chakra-ui/react";
import { LuLink } from "react-icons/lu";
import type { components } from "../../../reflector-api";
type Room = components["schemas"]["Room"];
import { Room } from "../../../api";
import { RoomActionsMenu } from "./RoomActionsMenu";
interface RoomCardsProps {

View File

@@ -1,7 +1,5 @@
import { Box, Heading, Text, VStack } from "@chakra-ui/react";
import type { components } from "../../../reflector-api";
type Room = components["schemas"]["Room"];
import { Room } from "../../../api";
import { RoomTable } from "./RoomTable";
import { RoomCards } from "./RoomCards";

View File

@@ -9,9 +9,7 @@ import {
Spinner,
} from "@chakra-ui/react";
import { LuLink } from "react-icons/lu";
import type { components } from "../../../reflector-api";
type Room = components["schemas"]["Room"];
import { Room } from "../../../api";
import { RoomActionsMenu } from "./RoomActionsMenu";
interface RoomTableProps {

View File

@@ -11,28 +11,15 @@ import {
Input,
Select,
Spinner,
IconButton,
createListCollection,
useDisclosure,
} from "@chakra-ui/react";
import { useEffect, useMemo, useState } from "react";
import { LuEye, LuEyeOff } from "react-icons/lu";
import { useEffect, useState } from "react";
import useApi from "../../lib/useApi";
import useRoomList from "./useRoomList";
import type { components } from "../../reflector-api";
import {
useRoomCreate,
useRoomUpdate,
useRoomDelete,
useZulipStreams,
useZulipTopics,
useRoomGet,
useRoomTestWebhook,
} from "../../lib/apiHooks";
import { ApiError, Room } from "../../api";
import { RoomList } from "./_components/RoomList";
import { PaginationPage } from "../browse/_components/Pagination";
import { assertExists } from "../../lib/utils";
type Room = components["schemas"]["Room"];
interface SelectOption {
label: string;
@@ -68,8 +55,6 @@ const roomInitialState = {
recordingType: "cloud",
recordingTrigger: "automatic-2nd-participant",
isShared: false,
webhookUrl: "",
webhookSecret: "",
};
export default function RoomsList() {
@@ -87,77 +72,61 @@ export default function RoomsList() {
const recordingTypeCollection = createListCollection({
items: recordingTypeOptions,
});
const [roomInput, setRoomInput] = useState<null | typeof roomInitialState>(
null,
);
const [room, setRoom] = useState(roomInitialState);
const [isEditing, setIsEditing] = useState(false);
const [editRoomId, setEditRoomId] = useState<string | null>(null);
const {
loading,
response,
refetch,
error: roomListError,
} = useRoomList(PaginationPage(1));
const [editRoomId, setEditRoomId] = useState("");
const api = useApi();
// TODO seems to be no setPage calls
const [page, setPage] = useState<number>(1);
const { loading, response, refetch } = useRoomList(PaginationPage(page));
const [streams, setStreams] = useState<Stream[]>([]);
const [topics, setTopics] = useState<Topic[]>([]);
const [nameError, setNameError] = useState("");
const [linkCopied, setLinkCopied] = useState("");
const [selectedStreamId, setSelectedStreamId] = useState<number | null>(null);
const [testingWebhook, setTestingWebhook] = useState(false);
const [webhookTestResult, setWebhookTestResult] = useState<string | null>(
null,
);
const [showWebhookSecret, setShowWebhookSecret] = useState(false);
interface Stream {
stream_id: number;
name: string;
}
const createRoomMutation = useRoomCreate();
const updateRoomMutation = useRoomUpdate();
const deleteRoomMutation = useRoomDelete();
const { data: streams = [] } = useZulipStreams();
const { data: topics = [] } = useZulipTopics(selectedStreamId);
interface Topic {
name: string;
}
const {
data: detailedEditedRoom,
isLoading: isDetailedEditedRoomLoading,
error: detailedEditedRoomError,
} = useRoomGet(editRoomId);
const error = roomListError || detailedEditedRoomError;
// room being edited, as fetched from the server
const editedRoom: typeof roomInitialState | null = useMemo(
() =>
detailedEditedRoom
? {
name: detailedEditedRoom.name,
zulipAutoPost: detailedEditedRoom.zulip_auto_post,
zulipStream: detailedEditedRoom.zulip_stream,
zulipTopic: detailedEditedRoom.zulip_topic,
isLocked: detailedEditedRoom.is_locked,
roomMode: detailedEditedRoom.room_mode,
recordingType: detailedEditedRoom.recording_type,
recordingTrigger: detailedEditedRoom.recording_trigger,
isShared: detailedEditedRoom.is_shared,
webhookUrl: detailedEditedRoom.webhook_url || "",
webhookSecret: detailedEditedRoom.webhook_secret || "",
}
: null,
[detailedEditedRoom],
);
// a room input value or a last api room state
const room = roomInput || editedRoom || roomInitialState;
const roomTestWebhookMutation = useRoomTestWebhook();
// Update selected stream ID when zulip stream changes
useEffect(() => {
if (room.zulipStream && streams.length > 0) {
const selectedStream = streams.find((s) => s.name === room.zulipStream);
if (selectedStream !== undefined) {
setSelectedStreamId(selectedStream.stream_id);
const fetchZulipStreams = async () => {
if (!api) return;
try {
const response = await api.v1ZulipGetStreams();
setStreams(response);
} catch (error) {
console.error("Error fetching Zulip streams:", error);
}
} else {
setSelectedStreamId(null);
};
if (room.zulipAutoPost) {
fetchZulipStreams();
}
}, [room.zulipStream, streams]);
}, [room.zulipAutoPost, !api]);
useEffect(() => {
const fetchZulipTopics = async () => {
if (!api || !room.zulipStream) return;
try {
const selectedStream = streams.find((s) => s.name === room.zulipStream);
if (selectedStream) {
const response = await api.v1ZulipGetTopics({
streamId: selectedStream.stream_id,
});
setTopics(response);
}
} catch (error) {
console.error("Error fetching Zulip topics:", error);
}
};
fetchZulipTopics();
}, [room.zulipStream, streams, api]);
const streamOptions: SelectOption[] = streams.map((stream) => {
return { label: stream.name, value: stream.name };
@@ -186,76 +155,6 @@ export default function RoomsList() {
}, 2000);
};
const handleCloseDialog = () => {
setShowWebhookSecret(false);
setWebhookTestResult(null);
setEditRoomId(null);
onClose();
};
const handleTestWebhook = async () => {
if (!room.webhookUrl) {
setWebhookTestResult("Please enter a webhook URL first");
return;
}
if (!editRoomId) {
console.error("No room ID to test webhook");
return;
}
setTestingWebhook(true);
setWebhookTestResult(null);
try {
const response = await roomTestWebhookMutation.mutateAsync({
params: {
path: {
room_id: editRoomId,
},
},
});
if (response.success) {
setWebhookTestResult(
`✅ Webhook test successful! Status: ${response.status_code}`,
);
} else {
let errorMsg = `❌ Webhook test failed`;
errorMsg += ` (Status: ${response.status_code})`;
if (response.error) {
errorMsg += `: ${response.error}`;
} else if (response.response_preview) {
// Try to parse and extract meaningful error from response
// Specific to N8N at the moment, as there is no specification for that
// We could just display as is, but decided here to dig a little bit more.
try {
const preview = JSON.parse(response.response_preview);
if (preview.message) {
errorMsg += `: ${preview.message}`;
}
} catch {
// If not JSON, just show the preview text (truncated)
const previewText = response.response_preview.substring(0, 150);
errorMsg += `: ${previewText}`;
}
} else if (response?.message) {
errorMsg += `: ${response.message}`;
}
setWebhookTestResult(errorMsg);
}
} catch (error) {
console.error("Error testing webhook:", error);
setWebhookTestResult("❌ Failed to test webhook. Please check your URL.");
} finally {
setTestingWebhook(false);
}
// Clear result after 5 seconds
setTimeout(() => {
setWebhookTestResult(null);
}, 5000);
};
const handleSaveRoom = async () => {
try {
if (RESERVED_PATHS.includes(room.name)) {
@@ -273,34 +172,30 @@ export default function RoomsList() {
recording_type: room.recordingType,
recording_trigger: room.recordingTrigger,
is_shared: room.isShared,
webhook_url: room.webhookUrl,
webhook_secret: room.webhookSecret,
};
if (isEditing) {
await updateRoomMutation.mutateAsync({
params: {
path: { room_id: assertExists(editRoomId) },
},
body: roomData,
await api?.v1RoomsUpdate({
roomId: editRoomId,
requestBody: roomData,
});
} else {
await createRoomMutation.mutateAsync({
body: roomData,
await api?.v1RoomsCreate({
requestBody: roomData,
});
}
setRoomInput(null);
setRoom(roomInitialState);
setIsEditing(false);
setEditRoomId("");
setNameError("");
refetch();
onClose();
handleCloseDialog();
} catch (err: any) {
} catch (err) {
if (
err?.status === 400 &&
err?.body?.detail == "Room name is not unique"
err instanceof ApiError &&
err.status === 400 &&
(err.body as any).detail == "Room name is not unique"
) {
setNameError(
"This room name is already taken. Please choose a different name.",
@@ -311,11 +206,18 @@ export default function RoomsList() {
}
};
const handleEditRoom = async (roomId: string, roomData) => {
// Reset states
setShowWebhookSecret(false);
setWebhookTestResult(null);
const handleEditRoom = (roomId, roomData) => {
setRoom({
name: roomData.name,
zulipAutoPost: roomData.zulip_auto_post,
zulipStream: roomData.zulip_stream,
zulipTopic: roomData.zulip_topic,
isLocked: roomData.is_locked,
roomMode: roomData.room_mode,
recordingType: roomData.recording_type,
recordingTrigger: roomData.recording_trigger,
isShared: roomData.is_shared,
});
setEditRoomId(roomId);
setIsEditing(true);
setNameError("");
@@ -324,10 +226,8 @@ export default function RoomsList() {
const handleDeleteRoom = async (roomId: string) => {
try {
await deleteRoomMutation.mutateAsync({
params: {
path: { room_id: roomId },
},
await api?.v1RoomsDelete({
roomId,
});
refetch();
} catch (err) {
@@ -344,7 +244,7 @@ export default function RoomsList() {
.toLowerCase();
setNameError("");
}
setRoomInput({
setRoom({
...room,
[name]: type === "checkbox" ? checked : value,
});
@@ -367,9 +267,6 @@ export default function RoomsList() {
</Flex>
);
if (roomListError)
return <div>{`${roomListError.name}: ${roomListError.message}`}</div>;
return (
<Flex
flexDir="column"
@@ -388,10 +285,8 @@ export default function RoomsList() {
colorPalette="primary"
onClick={() => {
setIsEditing(false);
setRoomInput(null);
setRoom(roomInitialState);
setNameError("");
setShowWebhookSecret(false);
setWebhookTestResult(null);
onOpen();
}}
>
@@ -401,7 +296,7 @@ export default function RoomsList() {
<Dialog.Root
open={open}
onOpenChange={(e) => (e.open ? onOpen() : handleCloseDialog())}
onOpenChange={(e) => (e.open ? onOpen() : onClose())}
size="lg"
>
<Dialog.Backdrop />
@@ -457,7 +352,7 @@ export default function RoomsList() {
<Select.Root
value={[room.roomMode]}
onValueChange={(e) =>
setRoomInput({ ...room, roomMode: e.value[0] })
setRoom({ ...room, roomMode: e.value[0] })
}
collection={roomModeCollection}
>
@@ -487,7 +382,7 @@ export default function RoomsList() {
<Select.Root
value={[room.recordingType]}
onValueChange={(e) =>
setRoomInput({
setRoom({
...room,
recordingType: e.value[0],
recordingTrigger:
@@ -522,7 +417,7 @@ export default function RoomsList() {
<Select.Root
value={[room.recordingTrigger]}
onValueChange={(e) =>
setRoomInput({ ...room, recordingTrigger: e.value[0] })
setRoom({ ...room, recordingTrigger: e.value[0] })
}
collection={recordingTriggerCollection}
disabled={room.recordingType !== "cloud"}
@@ -577,7 +472,7 @@ export default function RoomsList() {
<Select.Root
value={room.zulipStream ? [room.zulipStream] : []}
onValueChange={(e) =>
setRoomInput({
setRoom({
...room,
zulipStream: e.value[0],
zulipTopic: "",
@@ -612,7 +507,7 @@ export default function RoomsList() {
<Select.Root
value={room.zulipTopic ? [room.zulipTopic] : []}
onValueChange={(e) =>
setRoomInput({ ...room, zulipTopic: e.value[0] })
setRoom({ ...room, zulipTopic: e.value[0] })
}
collection={topicCollection}
disabled={!room.zulipAutoPost}
@@ -638,109 +533,6 @@ export default function RoomsList() {
</Select.Positioner>
</Select.Root>
</Field.Root>
{/* Webhook Configuration Section */}
<Field.Root mt={8}>
<Field.Label>Webhook URL</Field.Label>
<Input
name="webhookUrl"
type="url"
placeholder="https://example.com/webhook"
value={room.webhookUrl}
onChange={handleRoomChange}
/>
<Field.HelperText>
Optional: URL to receive notifications when transcripts are
ready
</Field.HelperText>
</Field.Root>
{room.webhookUrl && (
<>
<Field.Root mt={4}>
<Field.Label>Webhook Secret</Field.Label>
<Flex gap={2}>
<Input
name="webhookSecret"
type={showWebhookSecret ? "text" : "password"}
value={room.webhookSecret}
onChange={handleRoomChange}
placeholder={
isEditing && room.webhookSecret
? "••••••••"
: "Leave empty to auto-generate"
}
flex="1"
/>
{isEditing && room.webhookSecret && (
<IconButton
size="sm"
variant="ghost"
aria-label={
showWebhookSecret ? "Hide secret" : "Show secret"
}
onClick={() =>
setShowWebhookSecret(!showWebhookSecret)
}
>
{showWebhookSecret ? <LuEyeOff /> : <LuEye />}
</IconButton>
)}
</Flex>
<Field.HelperText>
Used for HMAC signature verification (auto-generated if
left empty)
</Field.HelperText>
</Field.Root>
{isEditing && (
<>
<Flex
mt={2}
gap={2}
alignItems="flex-start"
direction="column"
>
<Button
size="sm"
variant="outline"
onClick={handleTestWebhook}
disabled={testingWebhook || !room.webhookUrl}
>
{testingWebhook ? (
<>
<Spinner size="xs" mr={2} />
Testing...
</>
) : (
"Test Webhook"
)}
</Button>
{webhookTestResult && (
<div
style={{
fontSize: "14px",
wordBreak: "break-word",
maxWidth: "100%",
padding: "8px",
borderRadius: "4px",
backgroundColor: webhookTestResult.startsWith(
"✅",
)
? "#f0fdf4"
: "#fef2f2",
border: `1px solid ${webhookTestResult.startsWith("✅") ? "#86efac" : "#fca5a5"}`,
}}
>
{webhookTestResult}
</div>
)}
</Flex>
</>
)}
</>
)}
<Field.Root mt={4}>
<Checkbox.Root
name="isShared"
@@ -765,7 +557,7 @@ export default function RoomsList() {
</Field.Root>
</Dialog.Body>
<Dialog.Footer>
<Button variant="ghost" onClick={handleCloseDialog}>
<Button variant="ghost" onClick={onClose}>
Cancel
</Button>
<Button

View File

@@ -1,7 +1,7 @@
import { useRoomsList } from "../../lib/apiHooks";
import type { components } from "../../reflector-api";
type Page_Room_ = components["schemas"]["Page_RoomDetails_"];
import { useEffect, useState } from "react";
import { useError } from "../../(errors)/errorContext";
import useApi from "../../lib/useApi";
import { Page_Room_ } from "../../api";
import { PaginationPage } from "../browse/_components/Pagination";
type RoomList = {
@@ -11,17 +11,38 @@ type RoomList = {
refetch: () => void;
};
// Wrapper to maintain backward compatibility
//always protected
const useRoomList = (page: PaginationPage): RoomList => {
const { data, isLoading, error, refetch } = useRoomsList(page);
return {
response: data || null,
loading: isLoading,
error: error
? new Error(error.detail ? JSON.stringify(error.detail) : undefined)
: null,
refetch,
const [response, setResponse] = useState<Page_Room_ | null>(null);
const [loading, setLoading] = useState<boolean>(true);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = useApi();
const [refetchCount, setRefetchCount] = useState(0);
const refetch = () => {
setLoading(true);
setRefetchCount(refetchCount + 1);
};
useEffect(() => {
if (!api) return;
setLoading(true);
api
.v1RoomsList({ page })
.then((response) => {
setResponse(response);
setLoading(false);
})
.catch((err) => {
setResponse(null);
setLoading(false);
setError(err);
setErrorState(err);
});
}, [!api, page, refetchCount]);
return { response, loading, error, refetch };
};
export default useRoomList;

View File

@@ -6,10 +6,9 @@ import TopicPlayer from "./topicPlayer";
import useParticipants from "../../useParticipants";
import useTopicWithWords from "../../useTopicWithWords";
import ParticipantList from "./participantList";
import type { components } from "../../../../reflector-api";
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
import { GetTranscriptTopic } from "../../../../api";
import { SelectedText, selectedTextIsTimeSlice } from "./types";
import { useTranscriptUpdate } from "../../../../lib/apiHooks";
import useApi from "../../../../lib/useApi";
import useTranscript from "../../useTranscript";
import { useError } from "../../../../(errors)/errorContext";
import { useRouter } from "next/navigation";
@@ -24,7 +23,7 @@ export type TranscriptCorrect = {
export default function TranscriptCorrect({
params: { transcriptId },
}: TranscriptCorrect) {
const updateTranscriptMutation = useTranscriptUpdate();
const api = useApi();
const transcript = useTranscript(transcriptId);
const stateCurrentTopic = useState<GetTranscriptTopic>();
const [currentTopic, _sct] = stateCurrentTopic;
@@ -35,21 +34,16 @@ export default function TranscriptCorrect({
const { setError } = useError();
const router = useRouter();
const markAsDone = async () => {
const markAsDone = () => {
if (transcript.response && !transcript.response.reviewed) {
try {
await updateTranscriptMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: { reviewed: true },
api
?.v1TranscriptUpdate({ transcriptId, requestBody: { reviewed: true } })
.then(() => {
router.push(`/transcripts/${transcriptId}`);
})
.catch((e) => {
setError(e, "Error marking as done");
});
router.push(`/transcripts/${transcriptId}`);
} catch (e) {
setError(e as Error, "Error marking as done");
}
}
};

View File

@@ -1,15 +1,8 @@
import { faArrowTurnDown } from "@fortawesome/free-solid-svg-icons";
import { FontAwesomeIcon } from "@fortawesome/react-fontawesome";
import { ChangeEvent, useEffect, useRef, useState } from "react";
import type { components } from "../../../../reflector-api";
type Participant = components["schemas"]["Participant"];
import {
useTranscriptSpeakerAssign,
useTranscriptSpeakerMerge,
useTranscriptParticipantUpdate,
useTranscriptParticipantCreate,
useTranscriptParticipantDelete,
} from "../../../../lib/apiHooks";
import { Participant } from "../../../../api";
import useApi from "../../../../lib/useApi";
import { UseParticipants } from "../../useParticipants";
import { selectedTextIsSpeaker, selectedTextIsTimeSlice } from "./types";
import { useError } from "../../../../(errors)/errorContext";
@@ -37,19 +30,9 @@ const ParticipantList = ({
topicWithWords,
stateSelectedText,
}: ParticipantList) => {
const api = useApi();
const { setError } = useError();
const speakerAssignMutation = useTranscriptSpeakerAssign();
const speakerMergeMutation = useTranscriptSpeakerMerge();
const participantUpdateMutation = useTranscriptParticipantUpdate();
const participantCreateMutation = useTranscriptParticipantCreate();
const participantDeleteMutation = useTranscriptParticipantDelete();
const loading =
speakerAssignMutation.isPending ||
speakerMergeMutation.isPending ||
participantUpdateMutation.isPending ||
participantCreateMutation.isPending ||
participantDeleteMutation.isPending;
const [loading, setLoading] = useState(false);
const [participantInput, setParticipantInput] = useState("");
const inputRef = useRef<HTMLInputElement>(null);
const [selectedText, setSelectedText] = stateSelectedText;
@@ -120,6 +103,7 @@ const ParticipantList = ({
const onSuccess = () => {
topicWithWords.refetch();
participants.refetch();
setLoading(false);
setAction(null);
setSelectedText(undefined);
setSelectedParticipant(undefined);
@@ -136,14 +120,11 @@ const ParticipantList = ({
if (loading || participants.loading || topicWithWords.loading) return;
if (!selectedTextIsTimeSlice(selectedText)) return;
setLoading(true);
try {
await speakerAssignMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: {
await api?.v1TranscriptAssignSpeaker({
transcriptId,
requestBody: {
participant: participant.id,
timestamp_from: selectedText.start,
timestamp_to: selectedText.end,
@@ -151,7 +132,8 @@ const ParticipantList = ({
});
onSuccess();
} catch (error) {
setError(error as Error, "There was an error assigning");
setError(error, "There was an error assigning");
setLoading(false);
throw error;
}
};
@@ -159,38 +141,32 @@ const ParticipantList = ({
const mergeSpeaker =
(speakerFrom, participantTo: Participant) => async () => {
if (loading || participants.loading || topicWithWords.loading) return;
setLoading(true);
if (participantTo.speaker) {
try {
await speakerMergeMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: {
await api?.v1TranscriptMergeSpeaker({
transcriptId,
requestBody: {
speaker_from: speakerFrom,
speaker_to: participantTo.speaker,
},
});
onSuccess();
} catch (error) {
setError(error as Error, "There was an error merging");
setError(error, "There was an error merging");
setLoading(false);
}
} else {
try {
await participantUpdateMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
participant_id: participantTo.id,
},
},
body: { speaker: speakerFrom },
await api?.v1TranscriptUpdateParticipant({
transcriptId,
participantId: participantTo.id,
requestBody: { speaker: speakerFrom },
});
onSuccess();
} catch (error) {
setError(error as Error, "There was an error merging (update)");
setError(error, "There was an error merging (update)");
setLoading(false);
}
}
};
@@ -210,106 +186,105 @@ const ParticipantList = ({
(p) => p.speaker == selectedText,
);
if (participant && participant.name !== participantInput) {
try {
await participantUpdateMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
participant_id: participant.id,
},
},
body: {
setLoading(true);
api
?.v1TranscriptUpdateParticipant({
transcriptId,
participantId: participant.id,
requestBody: {
name: participantInput,
},
})
.then(() => {
participants.refetch();
setLoading(false);
setAction(null);
})
.catch((e) => {
setError(e, "There was an error renaming");
setLoading(false);
});
participants.refetch();
setAction(null);
} catch (e) {
setError(e as Error, "There was an error renaming");
}
}
} else if (
action == "Create to rename" &&
selectedTextIsSpeaker(selectedText)
) {
try {
await participantCreateMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: {
setLoading(true);
api
?.v1TranscriptAddParticipant({
transcriptId,
requestBody: {
name: participantInput,
speaker: selectedText,
},
})
.then(() => {
participants.refetch();
setParticipantInput("");
setOneMatch(undefined);
setLoading(false);
})
.catch((e) => {
setError(e, "There was an error creating");
setLoading(false);
});
participants.refetch();
setParticipantInput("");
setOneMatch(undefined);
} catch (e) {
setError(e as Error, "There was an error creating");
}
} else if (
action == "Create and assign" &&
selectedTextIsTimeSlice(selectedText)
) {
setLoading(true);
try {
const participant = await participantCreateMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: {
const participant = await api?.v1TranscriptAddParticipant({
transcriptId,
requestBody: {
name: participantInput,
},
});
setLoading(false);
assignTo(participant)().catch(() => {
// error and loading are handled by assignTo catch
participants.refetch();
});
} catch (error) {
setError(error as Error, "There was an error creating");
setError(e, "There was an error creating");
setLoading(false);
}
} else if (action == "Create") {
try {
await participantCreateMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: {
setLoading(true);
api
?.v1TranscriptAddParticipant({
transcriptId,
requestBody: {
name: participantInput,
},
})
.then(() => {
participants.refetch();
setParticipantInput("");
setLoading(false);
inputRef.current?.focus();
})
.catch((e) => {
setError(e, "There was an error creating");
setLoading(false);
});
participants.refetch();
setParticipantInput("");
inputRef.current?.focus();
} catch (e) {
setError(e as Error, "There was an error creating");
}
}
};
const deleteParticipant = (participantId) => async (e) => {
const deleteParticipant = (participantId) => (e) => {
e.stopPropagation();
if (loading || participants.loading || topicWithWords.loading) return;
try {
await participantDeleteMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
participant_id: participantId,
},
},
setLoading(true);
api
?.v1TranscriptDeleteParticipant({ transcriptId, participantId })
.then(() => {
participants.refetch();
setLoading(false);
})
.catch((e) => {
setError(e, "There was an error deleting");
setLoading(false);
});
participants.refetch();
} catch (e) {
setError(e as Error, "There was an error deleting");
}
};
const selectParticipant = (participant) => (e) => {

View File

@@ -1,7 +1,6 @@
import useTopics from "../../useTopics";
import { Dispatch, SetStateAction, useEffect } from "react";
import type { components } from "../../../../reflector-api";
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
import { GetTranscriptTopic } from "../../../../api";
import {
BoxProps,
Box,

View File

@@ -2,10 +2,12 @@ import { useEffect, useRef, useState } from "react";
import React from "react";
import Markdown from "react-markdown";
import "../../../styles/markdown.css";
import type { components } from "../../../reflector-api";
type GetTranscript = components["schemas"]["GetTranscript"];
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
import { useTranscriptUpdate } from "../../../lib/apiHooks";
import {
GetTranscript,
GetTranscriptTopic,
UpdateTranscript,
} from "../../../api";
import useApi from "../../../lib/useApi";
import {
Flex,
Heading,
@@ -31,8 +33,9 @@ export default function FinalSummary(props: FinalSummaryProps) {
const [preEditSummary, setPreEditSummary] = useState("");
const [editedSummary, setEditedSummary] = useState("");
const api = useApi();
const { setError } = useError();
const updateTranscriptMutation = useTranscriptUpdate();
useEffect(() => {
setEditedSummary(props.transcriptResponse?.long_summary || "");
@@ -44,15 +47,12 @@ export default function FinalSummary(props: FinalSummaryProps) {
const updateSummary = async (newSummary: string, transcriptId: string) => {
try {
const updatedTranscript = await updateTranscriptMutation.mutateAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: {
long_summary: newSummary,
},
const requestBody: UpdateTranscript = {
long_summary: newSummary,
};
const updatedTranscript = await api?.v1TranscriptUpdate({
transcriptId,
requestBody,
});
if (props.onUpdate) {
props.onUpdate(newSummary);
@@ -60,7 +60,7 @@ export default function FinalSummary(props: FinalSummaryProps) {
console.log("Updated long summary:", updatedTranscript);
} catch (err) {
console.error("Failed to update long summary:", err);
setError(err as Error, "Failed to update long summary.");
setError(err, "Failed to update long summary.");
}
};
@@ -114,12 +114,7 @@ export default function FinalSummary(props: FinalSummaryProps) {
<Button onClick={onDiscardClick} variant="ghost">
Cancel
</Button>
<Button
onClick={onSaveClick}
disabled={updateTranscriptMutation.isPending}
>
Save
</Button>
<Button onClick={onSaveClick}>Save</Button>
</Flex>
)}
{!isEditMode && (

View File

@@ -86,7 +86,7 @@ export default function TranscriptDetails(details: TranscriptDetails) {
useActiveTopic={useActiveTopic}
waveform={waveform.waveform}
media={mp3.media}
mediaDuration={transcript.response?.duration || null}
mediaDuration={transcript.response.duration}
/>
) : !mp3.loading && (waveform.error || mp3.error) ? (
<Box p={4} bg="red.100" borderRadius="md">
@@ -116,7 +116,7 @@ export default function TranscriptDetails(details: TranscriptDetails) {
<Flex direction="column" gap={0}>
<Flex alignItems="center" gap={2}>
<TranscriptTitle
title={transcript.response?.title || "Unnamed Transcript"}
title={transcript.response.title || "Unnamed Transcript"}
transcriptId={transcriptId}
onUpdate={(newTitle) => {
transcript.reload();

View File

@@ -24,16 +24,10 @@ const TranscriptUpload = (details: TranscriptUpload) => {
const router = useRouter();
const [status_, setStatus] = useState(
const [status, setStatus] = useState(
webSockets.status.value || transcript.response?.status || "idle",
);
// status is obviously done if we have transcript
const status =
!transcript.loading && transcript.response?.status === "ended"
? transcript.response?.status
: status_;
useEffect(() => {
if (!transcriptStarted && webSockets.transcriptTextLive.length !== 0)
setTranscriptStarted(true);
@@ -41,11 +35,8 @@ const TranscriptUpload = (details: TranscriptUpload) => {
useEffect(() => {
//TODO HANDLE ERROR STATUS BETTER
// TODO deprecate webSockets.status.value / depend on transcript.response?.status from query lib
const newStatus =
transcript.response?.status === "ended"
? "ended"
: webSockets.status.value || transcript.response?.status || "idle";
webSockets.status.value || transcript.response?.status || "idle";
setStatus(newStatus);
if (newStatus && (newStatus == "ended" || newStatus == "error")) {
console.log(newStatus, "redirecting");

View File

@@ -1,33 +1,45 @@
import type { components } from "../../reflector-api";
import { useTranscriptCreate } from "../../lib/apiHooks";
import { useEffect, useState } from "react";
type CreateTranscript = components["schemas"]["CreateTranscript"];
type GetTranscript = components["schemas"]["GetTranscript"];
import { useError } from "../../(errors)/errorContext";
import { CreateTranscript, GetTranscript } from "../../api";
import useApi from "../../lib/useApi";
type UseCreateTranscript = {
transcript: GetTranscript | null;
loading: boolean;
error: Error | null;
create: (transcriptCreationDetails: CreateTranscript) => Promise<void>;
create: (transcriptCreationDetails: CreateTranscript) => void;
};
const useCreateTranscript = (): UseCreateTranscript => {
const createMutation = useTranscriptCreate();
const [transcript, setTranscript] = useState<GetTranscript | null>(null);
const [loading, setLoading] = useState<boolean>(false);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = useApi();
const create = async (transcriptCreationDetails: CreateTranscript) => {
if (createMutation.isPending) return;
const create = (transcriptCreationDetails: CreateTranscript) => {
if (loading || !api) return;
await createMutation.mutateAsync({
body: transcriptCreationDetails,
});
setLoading(true);
api
.v1TranscriptsCreate({ requestBody: transcriptCreationDetails })
.then((transcript) => {
setTranscript(transcript);
setLoading(false);
})
.catch((err) => {
setError(
err,
"There was an issue creating a transcript, please try again.",
);
setErrorState(err);
setLoading(false);
});
};
return {
transcript: createMutation.data || null,
loading: createMutation.isPending,
error: createMutation.error as Error | null,
create,
};
return { transcript, loading, error, create };
};
export default useCreateTranscript;

View File

@@ -1,7 +1,6 @@
import React, { useState } from "react";
import { useTranscriptUploadAudio } from "../../lib/apiHooks";
import useApi from "../../lib/useApi";
import { Button, Spinner } from "@chakra-ui/react";
import { useError } from "../../(errors)/errorContext";
type FileUploadButton = {
transcriptId: string;
@@ -9,16 +8,13 @@ type FileUploadButton = {
export default function FileUploadButton(props: FileUploadButton) {
const fileInputRef = React.useRef<HTMLInputElement>(null);
const uploadMutation = useTranscriptUploadAudio();
const { setError } = useError();
const api = useApi();
const [progress, setProgress] = useState(0);
const triggerFileUpload = () => {
fileInputRef.current?.click();
};
const handleFileUpload = async (
event: React.ChangeEvent<HTMLInputElement>,
) => {
const handleFileUpload = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file) {
@@ -28,45 +24,37 @@ export default function FileUploadButton(props: FileUploadButton) {
let start = 0;
let uploadedSize = 0;
api?.httpRequest.config.interceptors.request.use((request) => {
request.onUploadProgress = (progressEvent) => {
const currentProgress = Math.floor(
((uploadedSize + progressEvent.loaded) / file.size) * 100,
);
setProgress(currentProgress);
};
return request;
});
const uploadNextChunk = async () => {
if (chunkNumber == totalChunks) {
setProgress(0);
return;
}
if (chunkNumber == totalChunks) return;
const chunkSize = Math.min(maxChunkSize, file.size - start);
const end = start + chunkSize;
const chunk = file.slice(start, end);
try {
const formData = new FormData();
formData.append("chunk", chunk);
await api?.v1TranscriptRecordUpload({
transcriptId: props.transcriptId,
formData: {
chunk,
},
chunkNumber,
totalChunks,
});
await uploadMutation.mutateAsync({
params: {
path: {
transcript_id: props.transcriptId,
},
query: {
chunk_number: chunkNumber,
total_chunks: totalChunks,
},
},
body: formData as any,
});
uploadedSize += chunkSize;
chunkNumber++;
start = end;
uploadedSize += chunkSize;
const currentProgress = Math.floor((uploadedSize / file.size) * 100);
setProgress(currentProgress);
chunkNumber++;
start = end;
await uploadNextChunk();
} catch (error) {
setError(error as Error, "Failed to upload file");
setProgress(0);
}
uploadNextChunk();
};
uploadNextChunk();

View File

@@ -9,27 +9,33 @@ import { useRouter } from "next/navigation";
import useCreateTranscript from "../createTranscript";
import SelectSearch from "react-select-search";
import { supportedLanguages } from "../../../supportedLanguages";
import useSessionStatus from "../../../lib/useSessionStatus";
import { featureEnabled } from "../../../domainContext";
import { signIn } from "next-auth/react";
import {
Flex,
Box,
Spinner,
Heading,
Button,
Card,
Center,
Link,
CardBody,
Stack,
Text,
Icon,
Grid,
IconButton,
Spacer,
Menu,
Tooltip,
Input,
} from "@chakra-ui/react";
import { useAuth } from "../../../lib/AuthProvider";
import type { components } from "../../../reflector-api";
const TranscriptCreate = () => {
const isClient = typeof window !== "undefined";
const router = useRouter();
const auth = useAuth();
const isAuthenticated = auth.status === "authenticated";
const isAuthRefreshing = auth.status === "refreshing";
const isLoading = auth.status === "loading";
const { isLoading, isAuthenticated } = useSessionStatus();
const requireLogin = featureEnabled("requireLogin");
const [name, setName] = useState<string>("");
@@ -48,32 +54,20 @@ const TranscriptCreate = () => {
const [loadingUpload, setLoadingUpload] = useState(false);
const getTargetLanguage = () => {
if (targetLanguage === "NOTRANSLATION") return undefined;
if (targetLanguage === "NOTRANSLATION") return;
return targetLanguage;
};
const send = () => {
if (loadingRecord || createTranscript.loading || permissionDenied) return;
setLoadingRecord(true);
const targetLang = getTargetLanguage();
createTranscript.create({
name,
source_language: "en",
target_language: targetLang || "en",
source_kind: "live",
});
createTranscript.create({ name, target_language: getTargetLanguage() });
};
const uploadFile = () => {
if (loadingUpload || createTranscript.loading || permissionDenied) return;
setLoadingUpload(true);
const targetLang = getTargetLanguage();
createTranscript.create({
name,
source_language: "en",
target_language: targetLang || "en",
source_kind: "file",
});
createTranscript.create({ name, target_language: getTargetLanguage() });
};
useEffect(() => {
@@ -138,8 +132,8 @@ const TranscriptCreate = () => {
<Center>
{isLoading ? (
<Spinner />
) : requireLogin && !isAuthenticated && !isAuthRefreshing ? (
<Button onClick={() => auth.signIn("authentik")}>Log in</Button>
) : requireLogin && !isAuthenticated ? (
<Button onClick={() => signIn("authentik")}>Log in</Button>
) : (
<Flex
rounded="xl"

View File

@@ -5,9 +5,7 @@ import RegionsPlugin from "wavesurfer.js/dist/plugins/regions.esm.js";
import { formatTime, formatTimeMs } from "../../lib/time";
import { Topic } from "./webSocketTypes";
import type { components } from "../../reflector-api";
type AudioWaveform = components["schemas"]["AudioWaveform"];
import { AudioWaveform } from "../../api";
import { waveSurferStyles } from "../../styles/recorder";
import { Box, Flex, IconButton } from "@chakra-ui/react";
import { LuPause, LuPlay } from "react-icons/lu";
@@ -20,7 +18,7 @@ type PlayerProps = {
];
waveform: AudioWaveform;
media: HTMLMediaElement;
mediaDuration: number | null;
mediaDuration: number;
};
export default function Player(props: PlayerProps) {
@@ -52,9 +50,7 @@ export default function Player(props: PlayerProps) {
container: waveformRef.current,
peaks: [props.waveform.data],
height: "auto",
duration: props.mediaDuration
? Math.floor(props.mediaDuration / 1000)
: undefined,
duration: Math.floor(props.mediaDuration / 1000),
media: props.media,
...waveSurferStyles.playerSettings,

View File

@@ -6,6 +6,7 @@ import RecordPlugin from "../../lib/custom-plugins/record";
import { formatTime, formatTimeMs } from "../../lib/time";
import { waveSurferStyles } from "../../styles/recorder";
import { useError } from "../../(errors)/errorContext";
import FileUploadButton from "./fileUploadButton";
import useWebRTC from "./useWebRTC";
import useAudioDevice from "./useAudioDevice";
import { Box, Flex, IconButton, Menu, RadioGroup } from "@chakra-ui/react";

View File

@@ -2,10 +2,7 @@ import { useEffect, useState } from "react";
import { featureEnabled } from "../../domainContext";
import { ShareMode, toShareMode } from "../../lib/shareMode";
import type { components } from "../../reflector-api";
type GetTranscript = components["schemas"]["GetTranscript"];
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
type UpdateTranscript = components["schemas"]["UpdateTranscript"];
import { GetTranscript, GetTranscriptTopic, UpdateTranscript } from "../../api";
import {
Box,
Flex,
@@ -18,11 +15,12 @@ import {
createListCollection,
} from "@chakra-ui/react";
import { LuShare2 } from "react-icons/lu";
import { useTranscriptUpdate } from "../../lib/apiHooks";
import useApi from "../../lib/useApi";
import useSessionUser from "../../lib/useSessionUser";
import { CustomSession } from "../../lib/types";
import ShareLink from "./shareLink";
import ShareCopy from "./shareCopy";
import ShareZulip from "./shareZulip";
import { useAuth } from "../../lib/AuthProvider";
type ShareAndPrivacyProps = {
finalSummaryRef: any;
@@ -52,9 +50,12 @@ export default function ShareAndPrivacy(props: ShareAndPrivacyProps) {
);
const [shareLoading, setShareLoading] = useState(false);
const requireLogin = featureEnabled("requireLogin");
const updateTranscriptMutation = useTranscriptUpdate();
const api = useApi();
const updateShareMode = async (selectedValue: string) => {
if (!api)
throw new Error("ShareLink's API should always be ready at this point");
const selectedOption = shareOptionsData.find(
(option) => option.value === selectedValue,
);
@@ -66,27 +67,19 @@ export default function ShareAndPrivacy(props: ShareAndPrivacyProps) {
share_mode: selectedValue as "public" | "semi-private" | "private",
};
try {
const updatedTranscript = await updateTranscriptMutation.mutateAsync({
params: {
path: { transcript_id: props.transcriptResponse.id },
},
body: requestBody,
});
setShareMode(
shareOptionsData.find(
(option) => option.value === updatedTranscript.share_mode,
) || shareOptionsData[0],
);
} catch (err) {
console.error("Failed to update share mode:", err);
} finally {
setShareLoading(false);
}
const updatedTranscript = await api.v1TranscriptUpdate({
transcriptId: props.transcriptResponse.id,
requestBody,
});
setShareMode(
shareOptionsData.find(
(option) => option.value === updatedTranscript.share_mode,
) || shareOptionsData[0],
);
setShareLoading(false);
};
const auth = useAuth();
const userId = auth.status === "authenticated" ? auth.user?.id : null;
const userId = useSessionUser().id;
useEffect(() => {
setIsOwner(!!(requireLogin && userId === props.transcriptResponse.user_id));
@@ -131,7 +124,7 @@ export default function ShareAndPrivacy(props: ShareAndPrivacyProps) {
"This transcript is public. Everyone can access it."}
</Text>
{isOwner && (
{isOwner && api && (
<Select.Root
key={shareMode.value}
value={[shareMode.value || ""]}

View File

@@ -1,7 +1,5 @@
import { useState } from "react";
import type { components } from "../../reflector-api";
type GetTranscript = components["schemas"]["GetTranscript"];
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
import { GetTranscript, GetTranscriptTopic } from "../../api";
import { Button, BoxProps, Box } from "@chakra-ui/react";
type ShareCopyProps = {

View File

@@ -1,9 +1,6 @@
import { useState, useEffect, useMemo } from "react";
import { featureEnabled } from "../../domainContext";
import type { components } from "../../reflector-api";
type GetTranscript = components["schemas"]["GetTranscript"];
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
import { GetTranscript, GetTranscriptTopic } from "../../api";
import {
BoxProps,
Button,
@@ -15,15 +12,12 @@ import {
Checkbox,
Combobox,
Spinner,
Portal,
useFilter,
useListCollection,
} from "@chakra-ui/react";
import { TbBrandZulip } from "react-icons/tb";
import {
useZulipStreams,
useZulipTopics,
useTranscriptPostToZulip,
} from "../../lib/apiHooks";
import useApi from "../../lib/useApi";
type ShareZulipProps = {
transcriptResponse: GetTranscript;
@@ -36,75 +30,104 @@ interface Stream {
name: string;
}
interface Topic {
name: string;
}
export default function ShareZulip(props: ShareZulipProps & BoxProps) {
const [showModal, setShowModal] = useState(false);
const [stream, setStream] = useState<string | undefined>(undefined);
const [selectedStreamId, setSelectedStreamId] = useState<number | null>(null);
const [topic, setTopic] = useState<string | undefined>(undefined);
const [includeTopics, setIncludeTopics] = useState(false);
const { data: streams = [], isLoading: isLoadingStreams } = useZulipStreams();
const { data: topics = [] } = useZulipTopics(selectedStreamId);
const postToZulipMutation = useTranscriptPostToZulip();
const [isLoading, setIsLoading] = useState(true);
const [streams, setStreams] = useState<Stream[]>([]);
const [topics, setTopics] = useState<Topic[]>([]);
const api = useApi();
const { contains } = useFilter({ sensitivity: "base" });
const streamItems = useMemo(() => {
return streams.map((stream: Stream) => ({
label: stream.name,
value: stream.name,
}));
}, [streams]);
const {
collection: streamItemsCollection,
filter: streamItemsFilter,
set: streamItemsSet,
} = useListCollection({
initialItems: [] as { label: string; value: string }[],
filter: contains,
});
const topicItems = useMemo(() => {
return topics.map(({ name }) => ({
label: name,
value: name,
}));
}, [topics]);
const {
collection: topicItemsCollection,
filter: topicItemsFilter,
set: topicItemsSet,
} = useListCollection({
initialItems: [] as { label: string; value: string }[],
filter: contains,
});
const { collection: streamItemsCollection, filter: streamItemsFilter } =
useListCollection({
initialItems: streamItems,
filter: contains,
});
const { collection: topicItemsCollection, filter: topicItemsFilter } =
useListCollection({
initialItems: topicItems,
filter: contains,
});
// Update selected stream ID when stream changes
useEffect(() => {
if (stream && streams) {
const selectedStream = streams.find((s: Stream) => s.name === stream);
setSelectedStreamId(selectedStream ? selectedStream.stream_id : null);
} else {
setSelectedStreamId(null);
}
}, [stream, streams]);
const fetchZulipStreams = async () => {
if (!api) return;
try {
const response = await api.v1ZulipGetStreams();
setStreams(response);
streamItemsSet(
response.map((stream) => ({
label: stream.name,
value: stream.name,
})),
);
setIsLoading(false);
} catch (error) {
console.error("Error fetching Zulip streams:", error);
}
};
fetchZulipStreams();
}, [!api]);
useEffect(() => {
const fetchZulipTopics = async () => {
if (!api || !stream) return;
try {
const selectedStream = streams.find((s) => s.name === stream);
if (selectedStream) {
const response = await api.v1ZulipGetTopics({
streamId: selectedStream.stream_id,
});
setTopics(response);
topicItemsSet(
response.map((topic) => ({
label: topic.name,
value: topic.name,
})),
);
} else {
topicItemsSet([]);
}
} catch (error) {
console.error("Error fetching Zulip topics:", error);
}
};
fetchZulipTopics();
}, [stream, streams, api]);
const handleSendToZulip = async () => {
if (!props.transcriptResponse) return;
if (!api || !props.transcriptResponse) return;
if (stream && topic) {
try {
await postToZulipMutation.mutateAsync({
params: {
path: {
transcript_id: props.transcriptResponse.id,
},
query: {
stream,
topic,
include_topics: includeTopics,
},
},
await api.v1TranscriptPostToZulip({
transcriptId: props.transcriptResponse.id,
stream,
topic,
includeTopics,
});
setShowModal(false);
} catch (error) {
console.error("Error posting to Zulip:", error);
console.log(error);
}
}
};
@@ -132,7 +155,7 @@ export default function ShareZulip(props: ShareZulipProps & BoxProps) {
</Dialog.CloseTrigger>
</Dialog.Header>
<Dialog.Body>
{isLoadingStreams ? (
{isLoading ? (
<Flex justify="center" py={8}>
<Spinner />
</Flex>

View File

@@ -1,8 +1,6 @@
import { useState } from "react";
import type { components } from "../../reflector-api";
type UpdateTranscript = components["schemas"]["UpdateTranscript"];
import { useTranscriptUpdate } from "../../lib/apiHooks";
import { UpdateTranscript } from "../../api";
import useApi from "../../lib/useApi";
import { Heading, IconButton, Input, Flex, Spacer } from "@chakra-ui/react";
import { LuPen } from "react-icons/lu";
@@ -16,27 +14,24 @@ const TranscriptTitle = (props: TranscriptTitle) => {
const [displayedTitle, setDisplayedTitle] = useState(props.title);
const [preEditTitle, setPreEditTitle] = useState(props.title);
const [isEditing, setIsEditing] = useState(false);
const updateTranscriptMutation = useTranscriptUpdate();
const api = useApi();
const updateTitle = async (newTitle: string, transcriptId: string) => {
if (!api) return;
try {
const requestBody: UpdateTranscript = {
title: newTitle,
};
await updateTranscriptMutation.mutateAsync({
params: {
path: { transcript_id: transcriptId },
},
body: requestBody,
const updatedTranscript = await api?.v1TranscriptUpdate({
transcriptId,
requestBody,
});
if (props.onUpdate) {
props.onUpdate(newTitle);
}
console.log("Updated transcript title:", newTitle);
console.log("Updated transcript:", updatedTranscript);
} catch (err) {
console.error("Failed to update transcript:", err);
// Revert title on error
setDisplayedTitle(preEditTitle);
}
};

View File

@@ -1,7 +1,6 @@
import { useContext, useEffect, useState } from "react";
import { DomainContext } from "../../domainContext";
import { useTranscriptGet } from "../../lib/apiHooks";
import { useAuth } from "../../lib/AuthProvider";
import getApi from "../../lib/useApi";
export type Mp3Response = {
media: HTMLMediaElement | null;
@@ -18,17 +17,14 @@ const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => {
const [audioLoadingError, setAudioLoadingError] = useState<null | string>(
null,
);
const [transcriptMetadataLoading, setTranscriptMetadataLoading] =
useState<boolean>(true);
const [transcriptMetadataLoadingError, setTranscriptMetadataLoadingError] =
useState<string | null>(null);
const [audioDeleted, setAudioDeleted] = useState<boolean | null>(null);
const api = getApi();
const { api_url } = useContext(DomainContext);
const auth = useAuth();
const accessTokenInfo =
auth.status === "authenticated" ? auth.accessToken : null;
const {
data: transcript,
isLoading: transcriptMetadataLoading,
error: transcriptError,
} = useTranscriptGet(later ? null : transcriptId);
const accessTokenInfo = api?.httpRequest?.config?.TOKEN;
const [serviceWorker, setServiceWorker] =
useState<ServiceWorkerRegistration | null>(null);
@@ -56,50 +52,72 @@ const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => {
}, [navigator.serviceWorker, !serviceWorker, accessTokenInfo]);
useEffect(() => {
if (!transcriptId || later || !transcript) return;
if (!transcriptId || !api || later) return;
let stopped = false;
let audioElement: HTMLAudioElement | null = null;
let handleCanPlay: (() => void) | null = null;
let handleError: (() => void) | null = null;
setTranscriptMetadataLoading(true);
setAudioLoading(true);
const deleted = transcript.audio_deleted || false;
setAudioDeleted(deleted);
// First fetch transcript info to check if audio is deleted
api
.v1TranscriptGet({ transcriptId })
.then((transcript) => {
if (stopped) {
return;
}
if (deleted) {
// Audio is deleted, don't attempt to load it
setMedia(null);
setAudioLoadingError(null);
setAudioLoading(false);
return;
}
const deleted = transcript.audio_deleted || false;
setAudioDeleted(deleted);
setTranscriptMetadataLoadingError(null);
// Audio is not deleted, proceed to load it
audioElement = document.createElement("audio");
audioElement.src = `${api_url}/v1/transcripts/${transcriptId}/audio/mp3`;
audioElement.crossOrigin = "anonymous";
audioElement.preload = "auto";
if (deleted) {
// Audio is deleted, don't attempt to load it
setMedia(null);
setAudioLoadingError(null);
setAudioLoading(false);
return;
}
handleCanPlay = () => {
if (stopped) return;
setAudioLoading(false);
setAudioLoadingError(null);
};
// Audio is not deleted, proceed to load it
audioElement = document.createElement("audio");
audioElement.src = `${api_url}/v1/transcripts/${transcriptId}/audio/mp3`;
audioElement.crossOrigin = "anonymous";
audioElement.preload = "auto";
handleError = () => {
if (stopped) return;
setAudioLoading(false);
setAudioLoadingError("Failed to load audio");
};
handleCanPlay = () => {
if (stopped) return;
setAudioLoading(false);
setAudioLoadingError(null);
};
audioElement.addEventListener("canplay", handleCanPlay);
audioElement.addEventListener("error", handleError);
handleError = () => {
if (stopped) return;
setAudioLoading(false);
setAudioLoadingError("Failed to load audio");
};
if (!stopped) {
setMedia(audioElement);
}
audioElement.addEventListener("canplay", handleCanPlay);
audioElement.addEventListener("error", handleError);
if (!stopped) {
setMedia(audioElement);
}
})
.catch((error) => {
if (stopped) return;
console.error("Failed to fetch transcript:", error);
setAudioDeleted(null);
setTranscriptMetadataLoadingError(error.message);
setAudioLoading(false);
})
.finally(() => {
if (stopped) return;
setTranscriptMetadataLoading(false);
});
return () => {
stopped = true;
@@ -110,18 +128,14 @@ const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => {
if (handleError) audioElement.removeEventListener("error", handleError);
}
};
}, [transcriptId, transcript, later, api_url]);
}, [transcriptId, api, later, api_url]);
const getNow = () => {
setLater(false);
};
const loading = audioLoading || transcriptMetadataLoading;
const error =
audioLoadingError ||
(transcriptError
? (transcriptError as any).message || String(transcriptError)
: null);
const error = audioLoadingError || transcriptMetadataLoadingError;
return { media, loading, error, getNow, audioDeleted };
};

View File

@@ -1,6 +1,8 @@
import type { components } from "../../reflector-api";
type Participant = components["schemas"]["Participant"];
import { useTranscriptParticipants } from "../../lib/apiHooks";
import { useEffect, useState } from "react";
import { Participant } from "../../api";
import { useError } from "../../(errors)/errorContext";
import useApi from "../../lib/useApi";
import { shouldShowError } from "../../lib/errorUtils";
type ErrorParticipants = {
error: Error;
@@ -27,38 +29,46 @@ export type UseParticipants = (
) & { refetch: () => void };
const useParticipants = (transcriptId: string): UseParticipants => {
const {
data: response,
isLoading: loading,
error,
refetch,
} = useTranscriptParticipants(transcriptId || null);
const [response, setResponse] = useState<Participant[] | null>(null);
const [loading, setLoading] = useState<boolean>(true);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = useApi();
const [count, setCount] = useState(0);
// Type-safe return based on state
if (error) {
return {
error: error as Error,
loading: false,
response: null,
refetch,
} satisfies ErrorParticipants & { refetch: () => void };
}
const refetch = () => {
if (!loading) {
setCount(count + 1);
setLoading(true);
setErrorState(null);
}
};
if (loading || !response) {
return {
response: response || null,
loading: true,
error: null,
refetch,
} satisfies LoadingParticipants & { refetch: () => void };
}
useEffect(() => {
if (!transcriptId || !api) return;
return {
response,
loading: false,
error: null,
refetch,
} satisfies SuccessParticipants & { refetch: () => void };
setLoading(true);
api
.v1TranscriptGetParticipants({ transcriptId })
.then((result) => {
setResponse(result);
setLoading(false);
console.debug("Participants Loaded:", result);
})
.catch((error) => {
const shouldShowHuman = shouldShowError(error);
if (shouldShowHuman) {
setError(error, "There was an error loading the participants");
} else {
setError(error);
}
setErrorState(error);
setResponse(null);
setLoading(false);
});
}, [transcriptId, !api, count]);
return { response, loading, error, refetch } as UseParticipants;
};
export default useParticipants;

View File

@@ -0,0 +1,123 @@
// this hook is not great, we want to substitute it with a proper state management solution that is also not re-invention
import { useEffect, useRef, useState } from "react";
import { SearchResult, SourceKind } from "../../api";
import useApi from "../../lib/useApi";
import {
PaginationPage,
paginationPageTo0Based,
} from "../browse/_components/Pagination";
interface SearchFilters {
roomIds: readonly string[] | null;
sourceKind: SourceKind | null;
}
const EMPTY_SEARCH_FILTERS: SearchFilters = {
roomIds: null,
sourceKind: null,
};
type UseSearchTranscriptsOptions = {
pageSize: number;
page: PaginationPage;
};
interface UseSearchTranscriptsReturn {
results: SearchResult[];
totalCount: number;
isLoading: boolean;
error: unknown;
reload: () => void;
}
function hashEffectFilters(filters: SearchFilters): string {
return JSON.stringify(filters);
}
export function useSearchTranscripts(
query: string = "",
filters: SearchFilters = EMPTY_SEARCH_FILTERS,
options: UseSearchTranscriptsOptions = {
pageSize: 20,
page: PaginationPage(1),
},
): UseSearchTranscriptsReturn {
const { pageSize, page } = options;
const [reloadCount, setReloadCount] = useState(0);
const api = useApi();
const abortControllerRef = useRef<AbortController>();
const [data, setData] = useState<{ results: SearchResult[]; total: number }>({
results: [],
total: 0,
});
const [error, setError] = useState<any>();
const [isLoading, setIsLoading] = useState(false);
const filterHash = hashEffectFilters(filters);
useEffect(() => {
if (!api) {
setData({ results: [], total: 0 });
setError(undefined);
setIsLoading(false);
return;
}
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
const abortController = new AbortController();
abortControllerRef.current = abortController;
const performSearch = async () => {
setIsLoading(true);
try {
const response = await api.v1TranscriptsSearch({
q: query || "",
limit: pageSize,
offset: paginationPageTo0Based(page) * pageSize,
roomId: filters.roomIds?.[0],
sourceKind: filters.sourceKind || undefined,
});
if (abortController.signal.aborted) return;
setData(response);
setError(undefined);
} catch (err: unknown) {
if ((err as Error).name === "AbortError") {
return;
}
if (abortController.signal.aborted) {
console.error("Aborted search but error", err);
return;
}
setError(err);
} finally {
if (!abortController.signal.aborted) {
setIsLoading(false);
}
}
};
performSearch().then(() => {});
return () => {
abortController.abort();
};
}, [api, query, page, filterHash, pageSize, reloadCount]);
return {
results: data.results,
totalCount: data.total,
isLoading,
error,
reload: () => setReloadCount(reloadCount + 1),
};
}

View File

@@ -1,8 +1,9 @@
import type { components } from "../../reflector-api";
import { useTranscriptTopicsWithWordsPerSpeaker } from "../../lib/apiHooks";
import { useEffect, useState } from "react";
type GetTranscriptTopicWithWordsPerSpeaker =
components["schemas"]["GetTranscriptTopicWithWordsPerSpeaker"];
import { GetTranscriptTopicWithWordsPerSpeaker } from "../../api";
import { useError } from "../../(errors)/errorContext";
import useApi from "../../lib/useApi";
import { shouldShowError } from "../../lib/errorUtils";
type ErrorTopicWithWords = {
error: Error;
@@ -32,40 +33,47 @@ const useTopicWithWords = (
topicId: string | undefined,
transcriptId: string,
): UseTopicWithWords => {
const {
data: response,
isLoading: loading,
error,
refetch,
} = useTranscriptTopicsWithWordsPerSpeaker(
transcriptId || null,
topicId || null,
);
const [response, setResponse] =
useState<GetTranscriptTopicWithWordsPerSpeaker | null>(null);
const [loading, setLoading] = useState<boolean>(false);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = useApi();
if (error) {
return {
error: error as Error,
loading: false,
response: null,
refetch,
} satisfies ErrorTopicWithWords & { refetch: () => void };
}
const [count, setCount] = useState(0);
if (loading || !response) {
return {
response: response || null,
loading: true,
error: false,
refetch,
} satisfies LoadingTopicWithWords & { refetch: () => void };
}
const refetch = () => {
if (!loading) {
setCount(count + 1);
setLoading(true);
setErrorState(null);
}
};
return {
response,
loading: false,
error: null,
refetch,
} satisfies SuccessTopicWithWords & { refetch: () => void };
useEffect(() => {
if (!transcriptId || !topicId || !api) return;
setLoading(true);
api
.v1TranscriptGetTopicsWithWordsPerSpeaker({ transcriptId, topicId })
.then((result) => {
setResponse(result);
setLoading(false);
console.debug("Topics with words Loaded:", result);
})
.catch((error) => {
const shouldShowHuman = shouldShowError(error);
if (shouldShowHuman) {
setError(error, "There was an error loading the topics with words");
} else {
setError(error);
}
setErrorState(error);
});
}, [transcriptId, !api, topicId, count]);
return { response, loading, error, refetch } as UseTopicWithWords;
};
export default useTopicWithWords;

View File

@@ -1,7 +1,10 @@
import { useTranscriptTopics } from "../../lib/apiHooks";
import type { components } from "../../reflector-api";
import { useEffect, useState } from "react";
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
import { useError } from "../../(errors)/errorContext";
import { Topic } from "./webSocketTypes";
import useApi from "../../lib/useApi";
import { shouldShowError } from "../../lib/errorUtils";
import { GetTranscriptTopic } from "../../api";
type TranscriptTopics = {
topics: GetTranscriptTopic[] | null;
@@ -10,13 +13,34 @@ type TranscriptTopics = {
};
const useTopics = (id: string): TranscriptTopics => {
const { data: topics, isLoading: loading, error } = useTranscriptTopics(id);
const [topics, setTopics] = useState<Topic[] | null>(null);
const [loading, setLoading] = useState<boolean>(true);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = useApi();
useEffect(() => {
if (!id || !api) return;
return {
topics: topics || null,
loading,
error: error as Error | null,
};
setLoading(true);
api
.v1TranscriptGetTopics({ transcriptId: id })
.then((result) => {
setTopics(result);
setLoading(false);
console.debug("Transcript topics loaded:", result);
})
.catch((err) => {
setErrorState(err);
const shouldShowHuman = shouldShowError(err);
if (shouldShowHuman) {
setError(err, "There was an error loading the topics");
} else {
setError(err);
}
});
}, [id, !api]);
return { topics, loading, error };
};
export default useTopics;

View File

@@ -1,7 +1,8 @@
import type { components } from "../../reflector-api";
import { useTranscriptGet } from "../../lib/apiHooks";
type GetTranscript = components["schemas"]["GetTranscript"];
import { useEffect, useState } from "react";
import { GetTranscript } from "../../api";
import { useError } from "../../(errors)/errorContext";
import { shouldShowError } from "../../lib/errorUtils";
import useApi from "../../lib/useApi";
type ErrorTranscript = {
error: Error;
@@ -27,43 +28,43 @@ type SuccessTranscript = {
const useTranscript = (
id: string | null,
): ErrorTranscript | LoadingTranscript | SuccessTranscript => {
const { data, isLoading, error, refetch } = useTranscriptGet(id);
const [response, setResponse] = useState<GetTranscript | null>(null);
const [loading, setLoading] = useState<boolean>(true);
const [error, setErrorState] = useState<Error | null>(null);
const [reload, setReload] = useState(0);
const { setError } = useError();
const api = useApi();
const reloadHandler = () => setReload((prev) => prev + 1);
// Map to the expected return format
if (isLoading) {
return {
response: null,
loading: true,
error: false,
reload: refetch,
};
}
useEffect(() => {
if (!id || !api) return;
if (error) {
return {
error: error as Error,
loading: false,
response: null,
reload: refetch,
};
}
if (!response) {
setLoading(true);
}
// Check if data is undefined or null
if (!data) {
return {
response: null,
loading: true,
error: false,
reload: refetch,
};
}
api
.v1TranscriptGet({ transcriptId: id })
.then((result) => {
setResponse(result);
setLoading(false);
console.debug("Transcript Loaded:", result);
})
.catch((error) => {
const shouldShowHuman = shouldShowError(error);
if (shouldShowHuman) {
setError(error, "There was an error loading the transcript");
} else {
setError(error);
}
setErrorState(error);
});
}, [id, !api, reload]);
return {
response: data,
loading: false,
error: null,
reload: refetch,
};
return { response, loading, error, reload: reloadHandler } as
| ErrorTranscript
| LoadingTranscript
| SuccessTranscript;
};
export default useTranscript;

View File

@@ -1,7 +1,8 @@
import type { components } from "../../reflector-api";
import { useTranscriptWaveform } from "../../lib/apiHooks";
type AudioWaveform = components["schemas"]["AudioWaveform"];
import { useEffect, useState } from "react";
import { AudioWaveform } from "../../api";
import { useError } from "../../(errors)/errorContext";
import useApi from "../../lib/useApi";
import { shouldShowError } from "../../lib/errorUtils";
type AudioWaveFormResponse = {
waveform: AudioWaveform | null;
@@ -10,17 +11,35 @@ type AudioWaveFormResponse = {
};
const useWaveform = (id: string, skip: boolean): AudioWaveFormResponse => {
const {
data: waveform,
isLoading: loading,
error,
} = useTranscriptWaveform(skip ? null : id);
const [waveform, setWaveform] = useState<AudioWaveform | null>(null);
const [loading, setLoading] = useState<boolean>(false);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = useApi();
return {
waveform: waveform || null,
loading,
error: error as Error | null,
};
useEffect(() => {
if (!id || !api || skip) {
setLoading(false);
setErrorState(null);
setWaveform(null);
return;
}
setLoading(true);
setErrorState(null);
api
.v1TranscriptGetAudioWaveform({ transcriptId: id })
.then((result) => {
setWaveform(result);
setLoading(false);
console.debug("Transcript waveform loaded:", result);
})
.catch((err) => {
setErrorState(err);
setLoading(false);
});
}, [id, api, skip]);
return { waveform, loading, error };
};
export default useWaveform;

View File

@@ -1,9 +1,8 @@
import { useEffect, useState } from "react";
import Peer from "simple-peer";
import { useError } from "../../(errors)/errorContext";
import { useTranscriptWebRTC } from "../../lib/apiHooks";
import type { components } from "../../reflector-api";
type RtcOffer = components["schemas"]["RtcOffer"];
import useApi from "../../lib/useApi";
import { RtcOffer } from "../../api";
const useWebRTC = (
stream: MediaStream | null,
@@ -11,10 +10,10 @@ const useWebRTC = (
): Peer => {
const [peer, setPeer] = useState<Peer | null>(null);
const { setError } = useError();
const { mutateAsync: mutateWebRtcTranscriptAsync } = useTranscriptWebRTC();
const api = useApi();
useEffect(() => {
if (!stream || !transcriptId) {
if (!stream || !transcriptId || !api) {
return;
}
@@ -25,7 +24,7 @@ const useWebRTC = (
try {
p = new Peer({ initiator: true, stream: stream });
} catch (error) {
setError(error as Error, "Error creating WebRTC");
setError(error, "Error creating WebRTC");
return;
}
@@ -33,31 +32,26 @@ const useWebRTC = (
setError(new Error(`WebRTC error: ${err}`));
});
p.on("signal", async (data: any) => {
p.on("signal", (data: any) => {
if (!api) return;
if ("sdp" in data) {
const rtcOffer: RtcOffer = {
sdp: data.sdp,
type: data.type,
};
try {
const answer = await mutateWebRtcTranscriptAsync({
params: {
path: {
transcript_id: transcriptId,
},
},
body: rtcOffer,
api
.v1TranscriptRecordWebrtc({ transcriptId, requestBody: rtcOffer })
.then((answer) => {
try {
p.signal(answer);
} catch (error) {
setError(error);
}
})
.catch((error) => {
setError(error, "Error loading WebRTCOffer");
});
try {
p.signal(answer);
} catch (error) {
setError(error as Error);
}
} catch (error) {
setError(error as Error, "Error loading WebRTCOffer");
}
}
});
@@ -69,7 +63,7 @@ const useWebRTC = (
return () => {
p.destroy();
};
}, [stream, transcriptId, mutateWebRtcTranscriptAsync]);
}, [stream, transcriptId, !api]);
return peer;
};

View File

@@ -2,12 +2,8 @@ import { useContext, useEffect, useState } from "react";
import { Topic, FinalSummary, Status } from "./webSocketTypes";
import { useError } from "../../(errors)/errorContext";
import { DomainContext } from "../../domainContext";
import type { components } from "../../reflector-api";
type AudioWaveform = components["schemas"]["AudioWaveform"];
type GetTranscriptSegmentTopic =
components["schemas"]["GetTranscriptSegmentTopic"];
import { useQueryClient } from "@tanstack/react-query";
import { $api } from "../../lib/apiClient";
import { AudioWaveform, GetTranscriptSegmentTopic } from "../../api";
import useApi from "../../lib/useApi";
export type UseWebSockets = {
transcriptTextLive: string;
@@ -37,8 +33,8 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
const [status, setStatus] = useState<Status>({ value: "" });
const { setError } = useError();
const { websocket_url: websocketUrl } = useContext(DomainContext);
const queryClient = useQueryClient();
const { websocket_url } = useContext(DomainContext);
const api = useApi();
const [accumulatedText, setAccumulatedText] = useState<string>("");
@@ -109,13 +105,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
title: "Topic 1: Introduction to Quantum Mechanics",
transcript:
"A brief overview of quantum mechanics and its principles.",
segments: [
{
speaker: 1,
start: 0,
text: "This is the transcription of an example title",
},
],
},
{
id: "2",
@@ -326,9 +315,11 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
}
};
if (!transcriptId) return;
if (!transcriptId || !api) return;
const url = `${websocketUrl}/v1/transcripts/${transcriptId}/events`;
api?.v1TranscriptGetWebsocketEvents({ transcriptId }).then((result) => {});
const url = `${websocket_url}/v1/transcripts/${transcriptId}/events`;
let ws = new WebSocket(url);
ws.onopen = () => {
@@ -370,16 +361,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
return [...prevTopics, topic];
});
console.debug("TOPIC event:", message.data);
// Invalidate topics query to sync with WebSocket data
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}/topics",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
break;
case "FINAL_SHORT_SUMMARY":
@@ -389,16 +370,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
case "FINAL_LONG_SUMMARY":
if (message.data) {
setFinalSummary(message.data);
// Invalidate transcript query to sync summary
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
}
break;
@@ -406,16 +377,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
console.debug("FINAL_TITLE event:", message.data);
if (message.data) {
setTitle(message.data.title);
// Invalidate transcript query to sync title
queryClient.invalidateQueries({
queryKey: $api.queryOptions(
"get",
"/v1/transcripts/{transcript_id}",
{
params: { path: { transcript_id: transcriptId } },
},
).queryKey,
});
}
break;
@@ -473,11 +434,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
break;
case 1001: // Navigate away
break;
case 1006: // Closed by client Chrome
console.warn(
"WebSocket closed by client, likely duplicated connection in react dev mode",
);
break;
default:
setError(
new Error(`WebSocket closed unexpectedly with code: ${event.code}`),
@@ -494,7 +450,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
return () => {
ws.close();
};
}, [transcriptId, websocketUrl]);
}, [transcriptId, !api]);
return {
transcriptTextLive,

View File

@@ -1,6 +1,4 @@
import type { components } from "../../reflector-api";
type GetTranscriptTopic = components["schemas"]["GetTranscriptTopic"];
import { GetTranscriptTopic } from "../../api";
export type Topic = GetTranscriptTopic;

View File

@@ -1,21 +1,18 @@
"use client";
import { signOut, signIn } from "next-auth/react";
import useSessionStatus from "../lib/useSessionStatus";
import { Spinner, Link } from "@chakra-ui/react";
import { useAuth } from "../lib/AuthProvider";
export default function UserInfo() {
const auth = useAuth();
const status = auth.status;
const isLoading = status === "loading";
const isAuthenticated = status === "authenticated";
const isRefreshing = status === "refreshing";
const { isLoading, isAuthenticated } = useSessionStatus();
return isLoading ? (
<Spinner size="xs" className="mx-3" />
) : !isAuthenticated && !isRefreshing ? (
) : !isAuthenticated ? (
<Link
href="/"
className="font-light px-2"
onClick={() => auth.signIn("authentik")}
onClick={() => signIn("authentik")}
>
Log in
</Link>
@@ -23,7 +20,7 @@ export default function UserInfo() {
<Link
href="#"
className="font-light px-2"
onClick={() => auth.signOut({ callbackUrl: "/" })}
onClick={() => signOut({ callbackUrl: "/" })}
>
Log out
</Link>

View File

@@ -21,13 +21,11 @@ import { toaster } from "../components/ui/toaster";
import useRoomMeeting from "./useRoomMeeting";
import { useRouter } from "next/navigation";
import { notFound } from "next/navigation";
import useSessionStatus from "../lib/useSessionStatus";
import { useRecordingConsent } from "../recordingConsentContext";
import { useMeetingAudioConsent } from "../lib/apiHooks";
import type { components } from "../reflector-api";
type Meeting = components["schemas"]["Meeting"];
import useApi from "../lib/useApi";
import { Meeting } from "../api";
import { FaBars } from "react-icons/fa6";
import { useAuth } from "../lib/AuthProvider";
export type RoomDetails = {
params: {
@@ -78,30 +76,31 @@ const useConsentDialog = (
wherebyRef: RefObject<HTMLElement> /*accessibility*/,
) => {
const { state: consentState, touch, hasConsent } = useRecordingConsent();
const [consentLoading, setConsentLoading] = useState(false);
// toast would open duplicates, even with using "id=" prop
const [modalOpen, setModalOpen] = useState(false);
const audioConsentMutation = useMeetingAudioConsent();
const api = useApi();
const handleConsent = useCallback(
async (meetingId: string, given: boolean) => {
if (!api) return;
setConsentLoading(true);
try {
await audioConsentMutation.mutateAsync({
params: {
path: {
meeting_id: meetingId,
},
},
body: {
consent_given: given,
},
await api.v1MeetingAudioConsent({
meetingId,
requestBody: { consent_given: given },
});
touch(meetingId);
} catch (error) {
console.error("Error submitting consent:", error);
} finally {
setConsentLoading(false);
}
},
[audioConsentMutation, touch],
[api, touch],
);
const showConsentModal = useCallback(() => {
@@ -195,12 +194,7 @@ const useConsentDialog = (
return cleanup;
}, [meetingId, handleConsent, wherebyRef, modalOpen]);
return {
showConsentModal,
consentState,
hasConsent,
consentLoading: audioConsentMutation.isPending,
};
return { showConsentModal, consentState, hasConsent, consentLoading };
};
function ConsentDialogButton({
@@ -260,9 +254,7 @@ export default function Room(details: RoomDetails) {
const roomName = details.params.roomName;
const meeting = useRoomMeeting(roomName);
const router = useRouter();
const status = useAuth().status;
const isAuthenticated = status === "authenticated";
const isLoading = status === "loading" || meeting.loading;
const { isLoading, isAuthenticated } = useSessionStatus();
const roomUrl = meeting?.response?.host_room_url
? meeting?.response?.host_room_url

View File

@@ -1,10 +1,8 @@
import { useEffect, useState } from "react";
import { useError } from "../(errors)/errorContext";
import type { components } from "../reflector-api";
import { Meeting } from "../api";
import { shouldShowError } from "../lib/errorUtils";
type Meeting = components["schemas"]["Meeting"];
import { useRoomsCreateMeeting } from "../lib/apiHooks";
import useApi from "../lib/useApi";
import { notFound } from "next/navigation";
type ErrorMeeting = {
@@ -32,25 +30,27 @@ const useRoomMeeting = (
roomName: string | null | undefined,
): ErrorMeeting | LoadingMeeting | SuccessMeeting => {
const [response, setResponse] = useState<Meeting | null>(null);
const [loading, setLoading] = useState<boolean>(true);
const [error, setErrorState] = useState<Error | null>(null);
const [reload, setReload] = useState(0);
const { setError } = useError();
const createMeetingMutation = useRoomsCreateMeeting();
const api = useApi();
const reloadHandler = () => setReload((prev) => prev + 1);
useEffect(() => {
if (!roomName) return;
if (!roomName || !api) return;
const createMeeting = async () => {
try {
const result = await createMeetingMutation.mutateAsync({
params: {
path: {
room_name: roomName,
},
},
});
if (!response) {
setLoading(true);
}
api
.v1RoomsCreateMeeting({ roomName })
.then((result) => {
setResponse(result);
} catch (error: any) {
setLoading(false);
})
.catch((error) => {
const shouldShowHuman = shouldShowError(error);
if (shouldShowHuman && error.status !== 404) {
setError(
@@ -60,14 +60,9 @@ const useRoomMeeting = (
} else {
setError(error);
}
}
};
createMeeting();
}, [roomName, reload]);
const loading = createMeetingMutation.isPending && !response;
const error = createMeetingMutation.error as Error | null;
setErrorState(error);
});
}, [roomName, !api, reload]);
return { response, loading, error, reload: reloadHandler } as
| ErrorMeeting

37
www/app/api/OpenApi.ts Normal file
View File

@@ -0,0 +1,37 @@
import type { BaseHttpRequest } from "./core/BaseHttpRequest";
import type { OpenAPIConfig } from "./core/OpenAPI";
import { Interceptors } from "./core/OpenAPI";
import { AxiosHttpRequest } from "./core/AxiosHttpRequest";
import { DefaultService } from "./services.gen";
type HttpRequestConstructor = new (config: OpenAPIConfig) => BaseHttpRequest;
export class OpenApi {
public readonly default: DefaultService;
public readonly request: BaseHttpRequest;
constructor(
config?: Partial<OpenAPIConfig>,
HttpRequest: HttpRequestConstructor = AxiosHttpRequest,
) {
this.request = new HttpRequest({
BASE: config?.BASE ?? "",
VERSION: config?.VERSION ?? "0.1.0",
WITH_CREDENTIALS: config?.WITH_CREDENTIALS ?? false,
CREDENTIALS: config?.CREDENTIALS ?? "include",
TOKEN: config?.TOKEN,
USERNAME: config?.USERNAME,
PASSWORD: config?.PASSWORD,
HEADERS: config?.HEADERS,
ENCODE_PATH: config?.ENCODE_PATH,
interceptors: {
request: config?.interceptors?.request ?? new Interceptors(),
response: config?.interceptors?.response ?? new Interceptors(),
},
});
this.default = new DefaultService(this.request);
}
}

View File

@@ -1,5 +1,8 @@
// NextAuth route handler for Authentik
// Refresh rotation has been taken from https://next-auth.js.org/v3/tutorials/refresh-token-rotation even if we are using 4.x
import NextAuth from "next-auth";
import { authOptions } from "../../../lib/authBackend";
import { authOptions } from "../../../lib/auth";
const handler = NextAuth(authOptions);

View File

@@ -0,0 +1,25 @@
import type { ApiRequestOptions } from "./ApiRequestOptions";
import type { ApiResult } from "./ApiResult";
export class ApiError extends Error {
public readonly url: string;
public readonly status: number;
public readonly statusText: string;
public readonly body: unknown;
public readonly request: ApiRequestOptions;
constructor(
request: ApiRequestOptions,
response: ApiResult,
message: string,
) {
super(message);
this.name = "ApiError";
this.url = response.url;
this.status = response.status;
this.statusText = response.statusText;
this.body = response.body;
this.request = request;
}
}

View File

@@ -0,0 +1,21 @@
export type ApiRequestOptions<T = unknown> = {
readonly method:
| "GET"
| "PUT"
| "POST"
| "DELETE"
| "OPTIONS"
| "HEAD"
| "PATCH";
readonly url: string;
readonly path?: Record<string, unknown>;
readonly cookies?: Record<string, unknown>;
readonly headers?: Record<string, unknown>;
readonly query?: Record<string, unknown>;
readonly formData?: Record<string, unknown>;
readonly body?: any;
readonly mediaType?: string;
readonly responseHeader?: string;
readonly responseTransformer?: (data: unknown) => Promise<T>;
readonly errors?: Record<number | string, string>;
};

View File

@@ -0,0 +1,7 @@
export type ApiResult<TData = any> = {
readonly body: TData;
readonly ok: boolean;
readonly status: number;
readonly statusText: string;
readonly url: string;
};

Some files were not shown because too many files have changed in this diff Show More