mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-30 17:05:19 +00:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
72dca7cacc | ||
|
|
4ae56b730a | ||
|
|
cf6e867cf1 | ||
|
|
183601a121 | ||
|
|
b53c8da398 | ||
|
|
22a50bb94d | ||
|
|
504ca74184 | ||
|
|
a455b8090a | ||
|
|
6b0292d5f0 | ||
|
|
304315daaf | ||
|
|
7845f679c3 | ||
|
|
c155f66982 | ||
|
|
a682846645 |
27
CHANGELOG.md
27
CHANGELOG.md
@@ -1,5 +1,32 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## [0.38.2](https://github.com/GreyhavenHQ/reflector/compare/v0.38.1...v0.38.2) (2026-03-12)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* add auth guards to prevent anonymous access to write endpoints in non-public mode ([#907](https://github.com/GreyhavenHQ/reflector/issues/907)) ([cf6e867](https://github.com/GreyhavenHQ/reflector/commit/cf6e867cf12c42411e5a7412f6ec44eee8351665))
|
||||||
|
* add tests that check some of the issues are already fixed ([#905](https://github.com/GreyhavenHQ/reflector/issues/905)) ([b53c8da](https://github.com/GreyhavenHQ/reflector/commit/b53c8da3981c394bdab08504b45d25f62c35495a))
|
||||||
|
|
||||||
|
## [0.38.1](https://github.com/GreyhavenHQ/reflector/compare/v0.38.0...v0.38.1) (2026-03-06)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* pin hatchet sdk version ([#903](https://github.com/GreyhavenHQ/reflector/issues/903)) ([504ca74](https://github.com/GreyhavenHQ/reflector/commit/504ca74184211eda9020d0b38ba7bd2b55d09991))
|
||||||
|
|
||||||
|
## [0.38.0](https://github.com/GreyhavenHQ/reflector/compare/v0.37.0...v0.38.0) (2026-03-06)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* 3-mode selfhosted refactoring (--gpu, --cpu, --hosted) + audio token auth fallback ([#896](https://github.com/GreyhavenHQ/reflector/issues/896)) ([a682846](https://github.com/GreyhavenHQ/reflector/commit/a6828466456407c808302e9eb8dc4b4f0614dd6f))
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* improve hatchet workflow reliability ([#900](https://github.com/GreyhavenHQ/reflector/issues/900)) ([c155f66](https://github.com/GreyhavenHQ/reflector/commit/c155f669825e8e2a6e929821a1ef0bd94237dc11))
|
||||||
|
|
||||||
## [0.37.0](https://github.com/GreyhavenHQ/reflector/compare/v0.36.0...v0.37.0) (2026-03-03)
|
## [0.37.0](https://github.com/GreyhavenHQ/reflector/compare/v0.36.0...v0.37.0) (2026-03-03)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
# Self-hosted production Docker Compose — single file for everything.
|
# Self-hosted production Docker Compose — single file for everything.
|
||||||
#
|
#
|
||||||
# Usage: ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
|
# Usage: ./scripts/setup-selfhosted.sh <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--garage] [--caddy]
|
||||||
# or: docker compose -f docker-compose.selfhosted.yml --profile gpu [--profile ollama-gpu] [--profile garage] [--profile caddy] up -d
|
# or: docker compose -f docker-compose.selfhosted.yml [--profile gpu] [--profile ollama-gpu] [--profile garage] [--profile caddy] up -d
|
||||||
#
|
#
|
||||||
# Specialized models (pick ONE — required):
|
# ML processing modes (pick ONE — required):
|
||||||
# --profile gpu NVIDIA GPU for transcription/diarization/translation
|
# --gpu NVIDIA GPU container for transcription/diarization/translation (profile: gpu)
|
||||||
# --profile cpu CPU-only for transcription/diarization/translation
|
# --cpu In-process CPU processing on server/worker (no ML container needed)
|
||||||
|
# --hosted Remote GPU service URL (no ML container needed)
|
||||||
#
|
#
|
||||||
# Local LLM (optional — for summarization/topics):
|
# Local LLM (optional — for summarization/topics):
|
||||||
# --profile ollama-gpu Local Ollama with NVIDIA GPU
|
# --profile ollama-gpu Local Ollama with NVIDIA GPU
|
||||||
@@ -45,16 +46,9 @@ services:
|
|||||||
REDIS_HOST: redis
|
REDIS_HOST: redis
|
||||||
CELERY_BROKER_URL: redis://redis:6379/1
|
CELERY_BROKER_URL: redis://redis:6379/1
|
||||||
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
||||||
# Specialized models via gpu/cpu container (aliased as "transcription")
|
# ML backend config comes from env_file (server/.env), set per-mode by setup script
|
||||||
TRANSCRIPT_BACKEND: modal
|
# HF_TOKEN needed for in-process pyannote diarization (--cpu mode)
|
||||||
TRANSCRIPT_URL: http://transcription:8000
|
HF_TOKEN: ${HF_TOKEN:-}
|
||||||
TRANSCRIPT_MODAL_API_KEY: selfhosted
|
|
||||||
DIARIZATION_BACKEND: modal
|
|
||||||
DIARIZATION_URL: http://transcription:8000
|
|
||||||
TRANSLATION_BACKEND: modal
|
|
||||||
TRANSLATE_URL: http://transcription:8000
|
|
||||||
PADDING_BACKEND: modal
|
|
||||||
PADDING_URL: http://transcription:8000
|
|
||||||
# WebRTC: fixed UDP port range for ICE candidates (mapped above)
|
# WebRTC: fixed UDP port range for ICE candidates (mapped above)
|
||||||
WEBRTC_PORT_RANGE: "51000-51100"
|
WEBRTC_PORT_RANGE: "51000-51100"
|
||||||
depends_on:
|
depends_on:
|
||||||
@@ -79,15 +73,8 @@ services:
|
|||||||
REDIS_HOST: redis
|
REDIS_HOST: redis
|
||||||
CELERY_BROKER_URL: redis://redis:6379/1
|
CELERY_BROKER_URL: redis://redis:6379/1
|
||||||
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
CELERY_RESULT_BACKEND: redis://redis:6379/1
|
||||||
TRANSCRIPT_BACKEND: modal
|
# ML backend config comes from env_file (server/.env), set per-mode by setup script
|
||||||
TRANSCRIPT_URL: http://transcription:8000
|
HF_TOKEN: ${HF_TOKEN:-}
|
||||||
TRANSCRIPT_MODAL_API_KEY: selfhosted
|
|
||||||
DIARIZATION_BACKEND: modal
|
|
||||||
DIARIZATION_URL: http://transcription:8000
|
|
||||||
TRANSLATION_BACKEND: modal
|
|
||||||
TRANSLATE_URL: http://transcription:8000
|
|
||||||
PADDING_BACKEND: modal
|
|
||||||
PADDING_URL: http://transcription:8000
|
|
||||||
depends_on:
|
depends_on:
|
||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
@@ -150,6 +137,7 @@ services:
|
|||||||
postgres:
|
postgres:
|
||||||
image: postgres:17-alpine
|
image: postgres:17-alpine
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
command: ["postgres", "-c", "max_connections=200"]
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_USER: reflector
|
POSTGRES_USER: reflector
|
||||||
POSTGRES_PASSWORD: reflector
|
POSTGRES_PASSWORD: reflector
|
||||||
@@ -165,7 +153,10 @@ services:
|
|||||||
|
|
||||||
# ===========================================================
|
# ===========================================================
|
||||||
# Specialized model containers (transcription, diarization, translation)
|
# Specialized model containers (transcription, diarization, translation)
|
||||||
# Both gpu and cpu get alias "transcription" so server config never changes.
|
# Only the gpu profile is activated by the setup script (--gpu mode).
|
||||||
|
# The cpu service definition is kept for manual/standalone use but is
|
||||||
|
# NOT activated by --cpu mode (which uses in-process local backends).
|
||||||
|
# Both services get alias "transcription" so server config never changes.
|
||||||
# ===========================================================
|
# ===========================================================
|
||||||
|
|
||||||
gpu:
|
gpu:
|
||||||
|
|||||||
@@ -254,15 +254,15 @@ Reflector can run completely offline:
|
|||||||
Control where each step happens:
|
Control where each step happens:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# All local processing
|
# All in-process processing
|
||||||
TRANSCRIPT_BACKEND=local
|
TRANSCRIPT_BACKEND=whisper
|
||||||
DIARIZATION_BACKEND=local
|
DIARIZATION_BACKEND=pyannote
|
||||||
TRANSLATION_BACKEND=local
|
TRANSLATION_BACKEND=marian
|
||||||
|
|
||||||
# Hybrid approach
|
# Hybrid approach
|
||||||
TRANSCRIPT_BACKEND=modal # Fast GPU processing
|
TRANSCRIPT_BACKEND=modal # Fast GPU processing
|
||||||
DIARIZATION_BACKEND=local # Sensitive speaker data
|
DIARIZATION_BACKEND=pyannote # Sensitive speaker data
|
||||||
TRANSLATION_BACKEND=modal # Non-sensitive translation
|
TRANSLATION_BACKEND=modal # Non-sensitive translation
|
||||||
```
|
```
|
||||||
|
|
||||||
### Storage Options
|
### Storage Options
|
||||||
|
|||||||
@@ -57,7 +57,8 @@
|
|||||||
"minimatch@>=9.0.0 <9.0.7": "9.0.7",
|
"minimatch@>=9.0.0 <9.0.7": "9.0.7",
|
||||||
"lodash@<4.17.23": "4.17.23",
|
"lodash@<4.17.23": "4.17.23",
|
||||||
"js-yaml@<4.1.1": "4.1.1",
|
"js-yaml@<4.1.1": "4.1.1",
|
||||||
"gray-matter": "github:jonschlinkert/gray-matter#234163e"
|
"gray-matter": "github:jonschlinkert/gray-matter#234163e",
|
||||||
|
"serialize-javascript": "7.0.4"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
78
docs/pnpm-lock.yaml
generated
78
docs/pnpm-lock.yaml
generated
@@ -11,6 +11,7 @@ overrides:
|
|||||||
lodash@<4.17.23: 4.17.23
|
lodash@<4.17.23: 4.17.23
|
||||||
js-yaml@<4.1.1: 4.1.1
|
js-yaml@<4.1.1: 4.1.1
|
||||||
gray-matter: github:jonschlinkert/gray-matter#234163e
|
gray-matter: github:jonschlinkert/gray-matter#234163e
|
||||||
|
serialize-javascript: 7.0.4
|
||||||
|
|
||||||
importers:
|
importers:
|
||||||
|
|
||||||
@@ -1489,36 +1490,42 @@ packages:
|
|||||||
engines: {node: '>= 10.0.0'}
|
engines: {node: '>= 10.0.0'}
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
'@parcel/watcher-linux-arm-musl@2.5.6':
|
'@parcel/watcher-linux-arm-musl@2.5.6':
|
||||||
resolution: {integrity: sha512-Ve3gUCG57nuUUSyjBq/MAM0CzArtuIOxsBdQ+ftz6ho8n7s1i9E1Nmk/xmP323r2YL0SONs1EuwqBp2u1k5fxg==}
|
resolution: {integrity: sha512-Ve3gUCG57nuUUSyjBq/MAM0CzArtuIOxsBdQ+ftz6ho8n7s1i9E1Nmk/xmP323r2YL0SONs1EuwqBp2u1k5fxg==}
|
||||||
engines: {node: '>= 10.0.0'}
|
engines: {node: '>= 10.0.0'}
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
'@parcel/watcher-linux-arm64-glibc@2.5.6':
|
'@parcel/watcher-linux-arm64-glibc@2.5.6':
|
||||||
resolution: {integrity: sha512-f2g/DT3NhGPdBmMWYoxixqYr3v/UXcmLOYy16Bx0TM20Tchduwr4EaCbmxh1321TABqPGDpS8D/ggOTaljijOA==}
|
resolution: {integrity: sha512-f2g/DT3NhGPdBmMWYoxixqYr3v/UXcmLOYy16Bx0TM20Tchduwr4EaCbmxh1321TABqPGDpS8D/ggOTaljijOA==}
|
||||||
engines: {node: '>= 10.0.0'}
|
engines: {node: '>= 10.0.0'}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
'@parcel/watcher-linux-arm64-musl@2.5.6':
|
'@parcel/watcher-linux-arm64-musl@2.5.6':
|
||||||
resolution: {integrity: sha512-qb6naMDGlbCwdhLj6hgoVKJl2odL34z2sqkC7Z6kzir8b5W65WYDpLB6R06KabvZdgoHI/zxke4b3zR0wAbDTA==}
|
resolution: {integrity: sha512-qb6naMDGlbCwdhLj6hgoVKJl2odL34z2sqkC7Z6kzir8b5W65WYDpLB6R06KabvZdgoHI/zxke4b3zR0wAbDTA==}
|
||||||
engines: {node: '>= 10.0.0'}
|
engines: {node: '>= 10.0.0'}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
'@parcel/watcher-linux-x64-glibc@2.5.6':
|
'@parcel/watcher-linux-x64-glibc@2.5.6':
|
||||||
resolution: {integrity: sha512-kbT5wvNQlx7NaGjzPFu8nVIW1rWqV780O7ZtkjuWaPUgpv2NMFpjYERVi0UYj1msZNyCzGlaCWEtzc+exjMGbQ==}
|
resolution: {integrity: sha512-kbT5wvNQlx7NaGjzPFu8nVIW1rWqV780O7ZtkjuWaPUgpv2NMFpjYERVi0UYj1msZNyCzGlaCWEtzc+exjMGbQ==}
|
||||||
engines: {node: '>= 10.0.0'}
|
engines: {node: '>= 10.0.0'}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
'@parcel/watcher-linux-x64-musl@2.5.6':
|
'@parcel/watcher-linux-x64-musl@2.5.6':
|
||||||
resolution: {integrity: sha512-1JRFeC+h7RdXwldHzTsmdtYR/Ku8SylLgTU/reMuqdVD7CtLwf0VR1FqeprZ0eHQkO0vqsbvFLXUmYm/uNKJBg==}
|
resolution: {integrity: sha512-1JRFeC+h7RdXwldHzTsmdtYR/Ku8SylLgTU/reMuqdVD7CtLwf0VR1FqeprZ0eHQkO0vqsbvFLXUmYm/uNKJBg==}
|
||||||
engines: {node: '>= 10.0.0'}
|
engines: {node: '>= 10.0.0'}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
'@parcel/watcher-win32-arm64@2.5.6':
|
'@parcel/watcher-win32-arm64@2.5.6':
|
||||||
resolution: {integrity: sha512-3ukyebjc6eGlw9yRt678DxVF7rjXatWiHvTXqphZLvo7aC5NdEgFufVwjFfY51ijYEWpXbqF5jtrK275z52D4Q==}
|
resolution: {integrity: sha512-3ukyebjc6eGlw9yRt678DxVF7rjXatWiHvTXqphZLvo7aC5NdEgFufVwjFfY51ijYEWpXbqF5jtrK275z52D4Q==}
|
||||||
@@ -1729,10 +1736,6 @@ packages:
|
|||||||
resolution: {integrity: sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==}
|
resolution: {integrity: sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==}
|
||||||
engines: {node: '>=14.16'}
|
engines: {node: '>=14.16'}
|
||||||
|
|
||||||
'@trysound/sax@0.2.0':
|
|
||||||
resolution: {integrity: sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==}
|
|
||||||
engines: {node: '>=10.13.0'}
|
|
||||||
|
|
||||||
'@types/body-parser@1.19.6':
|
'@types/body-parser@1.19.6':
|
||||||
resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==}
|
resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==}
|
||||||
|
|
||||||
@@ -3010,8 +3013,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
|
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
|
||||||
engines: {node: '>= 4'}
|
engines: {node: '>= 4'}
|
||||||
|
|
||||||
dompurify@3.3.1:
|
dompurify@3.3.2:
|
||||||
resolution: {integrity: sha512-qkdCKzLNtrgPFP1Vo+98FRzJnBRGe4ffyCea9IwHB1fyxPOeNTHpLKYGd4Uk9xvNoH0ZoOjwZxNptyMwqrId1Q==}
|
resolution: {integrity: sha512-6obghkliLdmKa56xdbLOpUZ43pAR6xFy1uOrxBaIDjT+yaRuuybLjGS9eVBoSR/UPU5fq3OXClEHLJNGvbxKpQ==}
|
||||||
|
engines: {node: '>=20'}
|
||||||
|
|
||||||
domutils@2.8.0:
|
domutils@2.8.0:
|
||||||
resolution: {integrity: sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==}
|
resolution: {integrity: sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==}
|
||||||
@@ -3406,8 +3410,8 @@ packages:
|
|||||||
graphlib@2.1.8:
|
graphlib@2.1.8:
|
||||||
resolution: {integrity: sha512-jcLLfkpoVGmH7/InMC/1hIvOPSUh38oJtGhvrOFGzioE1DZ+0YW16RgmOJhHiuWTvGiJQ9Z1Ik43JvkRPRvE+A==}
|
resolution: {integrity: sha512-jcLLfkpoVGmH7/InMC/1hIvOPSUh38oJtGhvrOFGzioE1DZ+0YW16RgmOJhHiuWTvGiJQ9Z1Ik43JvkRPRvE+A==}
|
||||||
|
|
||||||
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e:
|
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec:
|
||||||
resolution: {tarball: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e}
|
resolution: {tarball: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec}
|
||||||
version: 4.0.3
|
version: 4.0.3
|
||||||
engines: {node: '>=6.0'}
|
engines: {node: '>=6.0'}
|
||||||
|
|
||||||
@@ -3599,8 +3603,8 @@ packages:
|
|||||||
immer@11.1.4:
|
immer@11.1.4:
|
||||||
resolution: {integrity: sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw==}
|
resolution: {integrity: sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw==}
|
||||||
|
|
||||||
immutable@5.1.4:
|
immutable@5.1.5:
|
||||||
resolution: {integrity: sha512-p6u1bG3YSnINT5RQmx/yRZBpenIl30kVxkTLDyHLIMk0gict704Q9n+thfDI7lTRm9vXdDYutVzXhzcThxTnXA==}
|
resolution: {integrity: sha512-t7xcm2siw+hlUM68I+UEOK+z84RzmN59as9DZ7P1l0994DKUWV7UXBMQZVxaoMSRQ+PBZbHCOoBt7a2wxOMt+A==}
|
||||||
|
|
||||||
import-fresh@3.3.1:
|
import-fresh@3.3.1:
|
||||||
resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==}
|
resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==}
|
||||||
@@ -5060,9 +5064,6 @@ packages:
|
|||||||
resolution: {integrity: sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==}
|
resolution: {integrity: sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
|
|
||||||
randombytes@2.1.0:
|
|
||||||
resolution: {integrity: sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==}
|
|
||||||
|
|
||||||
range-parser@1.2.0:
|
range-parser@1.2.0:
|
||||||
resolution: {integrity: sha512-kA5WQoNVo4t9lNx2kQNFCxKeBl5IbbSNBl1M/tLkw9WCn+hxNBAW5Qh8gdhs63CJnhjJ2zQWFoqPJP2sK1AV5A==}
|
resolution: {integrity: sha512-kA5WQoNVo4t9lNx2kQNFCxKeBl5IbbSNBl1M/tLkw9WCn+hxNBAW5Qh8gdhs63CJnhjJ2zQWFoqPJP2sK1AV5A==}
|
||||||
engines: {node: '>= 0.6'}
|
engines: {node: '>= 0.6'}
|
||||||
@@ -5385,6 +5386,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==}
|
resolution: {integrity: sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==}
|
||||||
engines: {node: '>=11.0.0'}
|
engines: {node: '>=11.0.0'}
|
||||||
|
|
||||||
|
sax@1.5.0:
|
||||||
|
resolution: {integrity: sha512-21IYA3Q5cQf089Z6tgaUTr7lDAyzoTPx5HRtbhsME8Udispad8dC/+sziTNugOEx54ilvatQ9YCzl4KQLPcRHA==}
|
||||||
|
engines: {node: '>=11.0.0'}
|
||||||
|
|
||||||
scheduler@0.27.0:
|
scheduler@0.27.0:
|
||||||
resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==}
|
resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==}
|
||||||
|
|
||||||
@@ -5435,8 +5440,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==}
|
resolution: {integrity: sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==}
|
||||||
engines: {node: '>= 0.8.0'}
|
engines: {node: '>= 0.8.0'}
|
||||||
|
|
||||||
serialize-javascript@6.0.2:
|
serialize-javascript@7.0.4:
|
||||||
resolution: {integrity: sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==}
|
resolution: {integrity: sha512-DuGdB+Po43Q5Jxwpzt1lhyFSYKryqoNjQSA9M92tyw0lyHIOur+XCalOUe0KTJpyqzT8+fQ5A0Jf7vCx/NKmIg==}
|
||||||
|
engines: {node: '>=20.0.0'}
|
||||||
|
|
||||||
serve-handler@6.1.6:
|
serve-handler@6.1.6:
|
||||||
resolution: {integrity: sha512-x5RL9Y2p5+Sh3D38Fh9i/iQ5ZK+e4xuXRd/pGbM4D13tgo/MGwbttUk8emytcr1YYzBYs+apnUngBDFYfpjPuQ==}
|
resolution: {integrity: sha512-x5RL9Y2p5+Sh3D38Fh9i/iQ5ZK+e4xuXRd/pGbM4D13tgo/MGwbttUk8emytcr1YYzBYs+apnUngBDFYfpjPuQ==}
|
||||||
@@ -5675,8 +5681,8 @@ packages:
|
|||||||
svg-parser@2.0.4:
|
svg-parser@2.0.4:
|
||||||
resolution: {integrity: sha512-e4hG1hRwoOdRb37cIMSgzNsxyzKfayW6VOflrwvR+/bzrkyxY/31WkbgnQpgtrNp1SdpJvpUAGTa/ZoiPNDuRQ==}
|
resolution: {integrity: sha512-e4hG1hRwoOdRb37cIMSgzNsxyzKfayW6VOflrwvR+/bzrkyxY/31WkbgnQpgtrNp1SdpJvpUAGTa/ZoiPNDuRQ==}
|
||||||
|
|
||||||
svgo@3.3.2:
|
svgo@3.3.3:
|
||||||
resolution: {integrity: sha512-OoohrmuUlBs8B8o6MB2Aevn+pRIH9zDALSR+6hhqVfa6fRwG/Qw9VUMSMW9VNg2CFc/MTIfabtdOVl9ODIJjpw==}
|
resolution: {integrity: sha512-+wn7I4p7YgJhHs38k2TNjy1vCfPIfLIJWR5MnCStsN8WuuTcBnRKcMHQLMM2ijxGZmDoZwNv8ipl5aTTen62ng==}
|
||||||
engines: {node: '>=14.0.0'}
|
engines: {node: '>=14.0.0'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
@@ -8156,7 +8162,7 @@ snapshots:
|
|||||||
fs-extra: 11.3.3
|
fs-extra: 11.3.3
|
||||||
github-slugger: 1.5.0
|
github-slugger: 1.5.0
|
||||||
globby: 11.1.0
|
globby: 11.1.0
|
||||||
gray-matter: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e
|
gray-matter: https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec
|
||||||
jiti: 1.21.7
|
jiti: 1.21.7
|
||||||
js-yaml: 4.1.1
|
js-yaml: 4.1.1
|
||||||
lodash: 4.17.23
|
lodash: 4.17.23
|
||||||
@@ -8732,7 +8738,7 @@ snapshots:
|
|||||||
'@svgr/core': 8.1.0(typescript@5.9.3)
|
'@svgr/core': 8.1.0(typescript@5.9.3)
|
||||||
cosmiconfig: 8.3.6(typescript@5.9.3)
|
cosmiconfig: 8.3.6(typescript@5.9.3)
|
||||||
deepmerge: 4.3.1
|
deepmerge: 4.3.1
|
||||||
svgo: 3.3.2
|
svgo: 3.3.3
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- typescript
|
- typescript
|
||||||
|
|
||||||
@@ -8754,8 +8760,6 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
defer-to-connect: 2.0.1
|
defer-to-connect: 2.0.1
|
||||||
|
|
||||||
'@trysound/sax@0.2.0': {}
|
|
||||||
|
|
||||||
'@types/body-parser@1.19.6':
|
'@types/body-parser@1.19.6':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/connect': 3.4.38
|
'@types/connect': 3.4.38
|
||||||
@@ -9670,7 +9674,7 @@ snapshots:
|
|||||||
globby: 13.2.2
|
globby: 13.2.2
|
||||||
normalize-path: 3.0.0
|
normalize-path: 3.0.0
|
||||||
schema-utils: 4.3.3
|
schema-utils: 4.3.3
|
||||||
serialize-javascript: 6.0.2
|
serialize-javascript: 7.0.4
|
||||||
webpack: 5.105.3
|
webpack: 5.105.3
|
||||||
|
|
||||||
core-js-compat@3.48.0:
|
core-js-compat@3.48.0:
|
||||||
@@ -9748,7 +9752,7 @@ snapshots:
|
|||||||
jest-worker: 29.7.0
|
jest-worker: 29.7.0
|
||||||
postcss: 8.5.6
|
postcss: 8.5.6
|
||||||
schema-utils: 4.3.3
|
schema-utils: 4.3.3
|
||||||
serialize-javascript: 6.0.2
|
serialize-javascript: 7.0.4
|
||||||
webpack: 5.105.3
|
webpack: 5.105.3
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
clean-css: 5.3.3
|
clean-css: 5.3.3
|
||||||
@@ -10228,7 +10232,7 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
domelementtype: 2.3.0
|
domelementtype: 2.3.0
|
||||||
|
|
||||||
dompurify@3.3.1:
|
dompurify@3.3.2:
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@types/trusted-types': 2.0.7
|
'@types/trusted-types': 2.0.7
|
||||||
|
|
||||||
@@ -10654,7 +10658,7 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
lodash: 4.17.23
|
lodash: 4.17.23
|
||||||
|
|
||||||
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e:
|
gray-matter@https://codeload.github.com/jonschlinkert/gray-matter/tar.gz/234163e317c87fe031e9368ffabde9c9149ce3ec:
|
||||||
dependencies:
|
dependencies:
|
||||||
js-yaml: 4.1.1
|
js-yaml: 4.1.1
|
||||||
kind-of: 6.0.3
|
kind-of: 6.0.3
|
||||||
@@ -10933,7 +10937,7 @@ snapshots:
|
|||||||
|
|
||||||
immer@11.1.4: {}
|
immer@11.1.4: {}
|
||||||
|
|
||||||
immutable@5.1.4: {}
|
immutable@5.1.5: {}
|
||||||
|
|
||||||
import-fresh@3.3.1:
|
import-fresh@3.3.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -11466,7 +11470,7 @@ snapshots:
|
|||||||
d3-sankey: 0.12.3
|
d3-sankey: 0.12.3
|
||||||
dagre-d3-es: 7.0.13
|
dagre-d3-es: 7.0.13
|
||||||
dayjs: 1.11.19
|
dayjs: 1.11.19
|
||||||
dompurify: 3.3.1
|
dompurify: 3.3.2
|
||||||
katex: 0.16.33
|
katex: 0.16.33
|
||||||
khroma: 2.1.0
|
khroma: 2.1.0
|
||||||
lodash-es: 4.17.23
|
lodash-es: 4.17.23
|
||||||
@@ -12598,7 +12602,7 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
postcss: 8.5.6
|
postcss: 8.5.6
|
||||||
postcss-value-parser: 4.2.0
|
postcss-value-parser: 4.2.0
|
||||||
svgo: 3.3.2
|
svgo: 3.3.3
|
||||||
|
|
||||||
postcss-unique-selectors@6.0.4(postcss@8.5.6):
|
postcss-unique-selectors@6.0.4(postcss@8.5.6):
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -12709,10 +12713,6 @@ snapshots:
|
|||||||
|
|
||||||
quick-lru@5.1.1: {}
|
quick-lru@5.1.1: {}
|
||||||
|
|
||||||
randombytes@2.1.0:
|
|
||||||
dependencies:
|
|
||||||
safe-buffer: 5.2.1
|
|
||||||
|
|
||||||
range-parser@1.2.0: {}
|
range-parser@1.2.0: {}
|
||||||
|
|
||||||
range-parser@1.2.1: {}
|
range-parser@1.2.1: {}
|
||||||
@@ -13091,13 +13091,15 @@ snapshots:
|
|||||||
sass@1.97.3:
|
sass@1.97.3:
|
||||||
dependencies:
|
dependencies:
|
||||||
chokidar: 4.0.3
|
chokidar: 4.0.3
|
||||||
immutable: 5.1.4
|
immutable: 5.1.5
|
||||||
source-map-js: 1.2.1
|
source-map-js: 1.2.1
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@parcel/watcher': 2.5.6
|
'@parcel/watcher': 2.5.6
|
||||||
|
|
||||||
sax@1.4.4: {}
|
sax@1.4.4: {}
|
||||||
|
|
||||||
|
sax@1.5.0: {}
|
||||||
|
|
||||||
scheduler@0.27.0: {}
|
scheduler@0.27.0: {}
|
||||||
|
|
||||||
schema-dts@1.1.5: {}
|
schema-dts@1.1.5: {}
|
||||||
@@ -13157,9 +13159,7 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
serialize-javascript@6.0.2:
|
serialize-javascript@7.0.4: {}
|
||||||
dependencies:
|
|
||||||
randombytes: 2.1.0
|
|
||||||
|
|
||||||
serve-handler@6.1.6:
|
serve-handler@6.1.6:
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -13444,15 +13444,15 @@ snapshots:
|
|||||||
|
|
||||||
svg-parser@2.0.4: {}
|
svg-parser@2.0.4: {}
|
||||||
|
|
||||||
svgo@3.3.2:
|
svgo@3.3.3:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@trysound/sax': 0.2.0
|
|
||||||
commander: 7.2.0
|
commander: 7.2.0
|
||||||
css-select: 5.2.2
|
css-select: 5.2.2
|
||||||
css-tree: 2.3.1
|
css-tree: 2.3.1
|
||||||
css-what: 6.2.2
|
css-what: 6.2.2
|
||||||
csso: 5.0.5
|
csso: 5.0.5
|
||||||
picocolors: 1.1.1
|
picocolors: 1.1.1
|
||||||
|
sax: 1.5.0
|
||||||
|
|
||||||
swagger2openapi@7.0.8:
|
swagger2openapi@7.0.8:
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -13477,7 +13477,7 @@ snapshots:
|
|||||||
'@jridgewell/trace-mapping': 0.3.31
|
'@jridgewell/trace-mapping': 0.3.31
|
||||||
jest-worker: 27.5.1
|
jest-worker: 27.5.1
|
||||||
schema-utils: 4.3.3
|
schema-utils: 4.3.3
|
||||||
serialize-javascript: 6.0.2
|
serialize-javascript: 7.0.4
|
||||||
terser: 5.46.0
|
terser: 5.46.0
|
||||||
webpack: 5.105.3
|
webpack: 5.105.3
|
||||||
|
|
||||||
|
|||||||
@@ -53,9 +53,12 @@ cd reflector
|
|||||||
# Same but without a domain (self-signed cert, access via IP):
|
# Same but without a domain (self-signed cert, access via IP):
|
||||||
./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
|
./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
|
||||||
|
|
||||||
# CPU-only (same, but slower):
|
# CPU-only (in-process ML, no GPU container):
|
||||||
./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
|
./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
|
||||||
|
|
||||||
|
# Remote GPU service (your own hosted GPU, no local ML container):
|
||||||
|
./scripts/setup-selfhosted.sh --hosted --garage --caddy
|
||||||
|
|
||||||
# With password authentication (single admin user):
|
# With password authentication (single admin user):
|
||||||
./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --password mysecretpass
|
./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --password mysecretpass
|
||||||
|
|
||||||
@@ -65,14 +68,15 @@ cd reflector
|
|||||||
|
|
||||||
That's it. The script generates env files, secrets, starts all containers, waits for health checks, and prints the URL.
|
That's it. The script generates env files, secrets, starts all containers, waits for health checks, and prints the URL.
|
||||||
|
|
||||||
## Specialized Models (Required)
|
## ML Processing Modes (Required)
|
||||||
|
|
||||||
Pick `--gpu` or `--cpu`. This determines how **transcription, diarization, translation, and audio padding** run:
|
Pick `--gpu`, `--cpu`, or `--hosted`. This determines how **transcription, diarization, translation, and audio padding** run:
|
||||||
|
|
||||||
| Flag | What it does | Requires |
|
| Flag | What it does | Requires |
|
||||||
|------|-------------|----------|
|
|------|-------------|----------|
|
||||||
| `--gpu` | NVIDIA GPU acceleration for ML models | NVIDIA GPU + drivers + `nvidia-container-toolkit` |
|
| `--gpu` | NVIDIA GPU container for ML models | NVIDIA GPU + drivers + `nvidia-container-toolkit` |
|
||||||
| `--cpu` | CPU-only (slower but works without GPU) | 8+ cores, 32GB+ RAM recommended |
|
| `--cpu` | In-process CPU processing on server/worker (no ML container) | 8+ cores, 16GB+ RAM (32GB recommended for large files) |
|
||||||
|
| `--hosted` | Remote GPU service URL (no local ML container) | A running GPU service instance (e.g. `gpu/self_hosted/`) |
|
||||||
|
|
||||||
## Local LLM (Optional)
|
## Local LLM (Optional)
|
||||||
|
|
||||||
@@ -130,9 +134,11 @@ Browse all available models at https://ollama.com/library.
|
|||||||
|
|
||||||
- **`--gpu --ollama-gpu`**: Best for servers with NVIDIA GPU. Fully self-contained, no external API keys needed.
|
- **`--gpu --ollama-gpu`**: Best for servers with NVIDIA GPU. Fully self-contained, no external API keys needed.
|
||||||
- **`--cpu --ollama-cpu`**: No GPU available but want everything self-contained. Slower but works.
|
- **`--cpu --ollama-cpu`**: No GPU available but want everything self-contained. Slower but works.
|
||||||
|
- **`--hosted --ollama-cpu`**: Remote GPU for ML, local CPU for LLM. Great when you have a separate GPU server.
|
||||||
- **`--gpu --ollama-cpu`**: GPU for transcription, CPU for LLM. Saves GPU VRAM for ML models.
|
- **`--gpu --ollama-cpu`**: GPU for transcription, CPU for LLM. Saves GPU VRAM for ML models.
|
||||||
- **`--gpu`**: Have NVIDIA GPU but prefer a cloud LLM (faster/better summaries with GPT-4, Claude, etc.).
|
- **`--gpu`**: Have NVIDIA GPU but prefer a cloud LLM (faster/better summaries with GPT-4, Claude, etc.).
|
||||||
- **`--cpu`**: No GPU, prefer cloud LLM. Slowest transcription but best summary quality.
|
- **`--cpu`**: No GPU, prefer cloud LLM. Slowest transcription but best summary quality.
|
||||||
|
- **`--hosted`**: Remote GPU, cloud LLM. No local ML at all.
|
||||||
|
|
||||||
## Other Optional Flags
|
## Other Optional Flags
|
||||||
|
|
||||||
@@ -160,7 +166,7 @@ Without `--caddy` or `--domain`, no ports are exposed. Point your own reverse pr
|
|||||||
4. **Generate `www/.env`** — Auto-detects server IP, sets URLs
|
4. **Generate `www/.env`** — Auto-detects server IP, sets URLs
|
||||||
5. **Storage setup** — Either initializes Garage (bucket, keys, permissions) or prompts for external S3 credentials
|
5. **Storage setup** — Either initializes Garage (bucket, keys, permissions) or prompts for external S3 credentials
|
||||||
6. **Caddyfile** — Generates domain-specific (Let's Encrypt) or IP-specific (self-signed) configuration
|
6. **Caddyfile** — Generates domain-specific (Let's Encrypt) or IP-specific (self-signed) configuration
|
||||||
7. **Build & start** — Always builds GPU/CPU model image from source. With `--build`, also builds backend and frontend from source; otherwise pulls prebuilt images from the registry
|
7. **Build & start** — For `--gpu`, builds the GPU model image from source. For `--cpu` and `--hosted`, no ML container is built. With `--build`, also builds backend and frontend from source; otherwise pulls prebuilt images from the registry
|
||||||
8. **Auto-detects video platforms** — If `DAILY_API_KEY` is found in `server/.env`, generates `.env.hatchet` (dashboard URL/cookie config), starts Hatchet workflow engine, and generates an API token. If any video platform is configured, enables the Rooms feature
|
8. **Auto-detects video platforms** — If `DAILY_API_KEY` is found in `server/.env`, generates `.env.hatchet` (dashboard URL/cookie config), starts Hatchet workflow engine, and generates an API token. If any video platform is configured, enables the Rooms feature
|
||||||
9. **Health checks** — Waits for each service, pulls Ollama model if needed, warns about missing LLM config
|
9. **Health checks** — Waits for each service, pulls Ollama model if needed, warns about missing LLM config
|
||||||
|
|
||||||
@@ -181,7 +187,7 @@ Without `--caddy` or `--domain`, no ports are exposed. Point your own reverse pr
|
|||||||
| `ADMIN_PASSWORD_HASH` | PBKDF2 hash for password auth | *(unset)* |
|
| `ADMIN_PASSWORD_HASH` | PBKDF2 hash for password auth | *(unset)* |
|
||||||
| `WEBRTC_HOST` | IP advertised in WebRTC ICE candidates | Auto-detected (server IP) |
|
| `WEBRTC_HOST` | IP advertised in WebRTC ICE candidates | Auto-detected (server IP) |
|
||||||
| `TRANSCRIPT_URL` | Specialized model endpoint | `http://transcription:8000` |
|
| `TRANSCRIPT_URL` | Specialized model endpoint | `http://transcription:8000` |
|
||||||
| `PADDING_BACKEND` | Audio padding backend (`local` or `modal`) | `modal` (selfhosted), `local` (default) |
|
| `PADDING_BACKEND` | Audio padding backend (`pyav` or `modal`) | `modal` (selfhosted), `pyav` (default) |
|
||||||
| `PADDING_URL` | Audio padding endpoint (when `PADDING_BACKEND=modal`) | `http://transcription:8000` |
|
| `PADDING_URL` | Audio padding endpoint (when `PADDING_BACKEND=modal`) | `http://transcription:8000` |
|
||||||
| `LLM_URL` | OpenAI-compatible LLM endpoint | Auto-set for Ollama modes |
|
| `LLM_URL` | OpenAI-compatible LLM endpoint | Auto-set for Ollama modes |
|
||||||
| `LLM_API_KEY` | LLM API key | `not-needed` for Ollama |
|
| `LLM_API_KEY` | LLM API key | `not-needed` for Ollama |
|
||||||
@@ -604,10 +610,9 @@ The setup script is idempotent — it won't overwrite existing secrets or env va
|
|||||||
│ │ │
|
│ │ │
|
||||||
v v v
|
v v v
|
||||||
┌───────────┐ ┌─────────┐ ┌─────────┐
|
┌───────────┐ ┌─────────┐ ┌─────────┐
|
||||||
│transcription│ │postgres │ │ redis │
|
│ ML models │ │postgres │ │ redis │
|
||||||
│(gpu/cpu) │ │ :5432 │ │ :6379 │
|
│ (varies) │ │ :5432 │ │ :6379 │
|
||||||
│ :8000 │ └─────────┘ └─────────┘
|
└───────────┘ └─────────┘ └─────────┘
|
||||||
└───────────┘
|
|
||||||
│
|
│
|
||||||
┌─────┴─────┐ ┌─────────┐
|
┌─────┴─────┐ ┌─────────┐
|
||||||
│ ollama │ │ garage │
|
│ ollama │ │ garage │
|
||||||
@@ -622,6 +627,11 @@ The setup script is idempotent — it won't overwrite existing secrets or env va
|
|||||||
│ │ :8888 │──│ -cpu / -llm │ │
|
│ │ :8888 │──│ -cpu / -llm │ │
|
||||||
│ └─────────┘ └───────────────┘ │
|
│ └─────────┘ └───────────────┘ │
|
||||||
└───────────────────────────────────┘
|
└───────────────────────────────────┘
|
||||||
|
|
||||||
|
ML models box varies by mode:
|
||||||
|
--gpu: Local GPU container (transcription:8000)
|
||||||
|
--cpu: In-process on server/worker (no container)
|
||||||
|
--hosted: Remote GPU service (user URL)
|
||||||
```
|
```
|
||||||
|
|
||||||
All services communicate over Docker's internal network. Only Caddy (if enabled) exposes ports to the internet. Hatchet services are only started when `DAILY_API_KEY` is configured.
|
All services communicate over Docker's internal network. Only Caddy (if enabled) exposes ports to the internet. Hatchet services are only started when `DAILY_API_KEY` is configured.
|
||||||
|
|||||||
@@ -11,10 +11,11 @@ dependencies = [
|
|||||||
"faster-whisper>=1.1.0",
|
"faster-whisper>=1.1.0",
|
||||||
"librosa==0.10.1",
|
"librosa==0.10.1",
|
||||||
"numpy<2",
|
"numpy<2",
|
||||||
"silero-vad==5.1.0",
|
"silero-vad==5.1.2",
|
||||||
"transformers>=4.35.0",
|
"transformers>=4.35.0",
|
||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
"pyannote.audio==3.1.0",
|
"pyannote.audio==3.4.0",
|
||||||
|
"pytorch-lightning<2.6",
|
||||||
"torchaudio>=2.3.0",
|
"torchaudio>=2.3.0",
|
||||||
"av>=13.1.0",
|
"av>=13.1.0",
|
||||||
]
|
]
|
||||||
|
|||||||
18
gpu/self_hosted/uv.lock
generated
18
gpu/self_hosted/uv.lock
generated
@@ -1742,7 +1742,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyannote-audio"
|
name = "pyannote-audio"
|
||||||
version = "3.1.0"
|
version = "3.4.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "asteroid-filterbanks" },
|
{ name = "asteroid-filterbanks" },
|
||||||
@@ -1765,9 +1765,9 @@ dependencies = [
|
|||||||
{ name = "torchaudio" },
|
{ name = "torchaudio" },
|
||||||
{ name = "torchmetrics" },
|
{ name = "torchmetrics" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/ad/55/7253267c35e2aa9188b1d86cba121eb5bdd91ed12d3194488625a008cae7/pyannote.audio-3.1.0.tar.gz", hash = "sha256:da04705443d3b74607e034d3ca88f8b572c7e9672dd9a4199cab65a0dbc33fad", size = 14812058, upload-time = "2023-11-16T12:26:38.939Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/ec/1e/efe9619c38f1281ddf21640654d8ea9e3f67c459b76f78657b26d8557bbe/pyannote_audio-3.4.0.tar.gz", hash = "sha256:d523d883cb8d37cb6daf99f3ba83f9138bb193646ad71e6eae7deb89d8ddd642", size = 804850, upload-time = "2025-09-09T07:04:51.17Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/a1/37/158859ce4c45b5ba2dca40b53b0c10d36f935b7f6d4e737298397167c8b1/pyannote.audio-3.1.0-py2.py3-none-any.whl", hash = "sha256:66ab485728c6e141760e80555cb7a083e7be824cd528cc79b9e6f7d6421a91ae", size = 208592, upload-time = "2023-11-16T12:26:36.726Z" },
|
{ url = "https://files.pythonhosted.org/packages/79/13/620c6f711b723653092fd063bfee82a6af5ea3a4d3c42efc53ce623a7f4d/pyannote_audio-3.4.0-py2.py3-none-any.whl", hash = "sha256:36e38f058059f46da3478dda581cda53d9d85a21173a3e70bbdbc3ba93b5e1b7", size = 897789, upload-time = "2025-09-09T07:04:49.464Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2075,6 +2075,7 @@ dependencies = [
|
|||||||
{ name = "librosa" },
|
{ name = "librosa" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "pyannote-audio" },
|
{ name = "pyannote-audio" },
|
||||||
|
{ name = "pytorch-lightning" },
|
||||||
{ name = "sentencepiece" },
|
{ name = "sentencepiece" },
|
||||||
{ name = "silero-vad" },
|
{ name = "silero-vad" },
|
||||||
{ name = "torch" },
|
{ name = "torch" },
|
||||||
@@ -2090,9 +2091,10 @@ requires-dist = [
|
|||||||
{ name = "faster-whisper", specifier = ">=1.1.0" },
|
{ name = "faster-whisper", specifier = ">=1.1.0" },
|
||||||
{ name = "librosa", specifier = "==0.10.1" },
|
{ name = "librosa", specifier = "==0.10.1" },
|
||||||
{ name = "numpy", specifier = "<2" },
|
{ name = "numpy", specifier = "<2" },
|
||||||
{ name = "pyannote-audio", specifier = "==3.1.0" },
|
{ name = "pyannote-audio", specifier = "==3.4.0" },
|
||||||
|
{ name = "pytorch-lightning", specifier = "<2.6" },
|
||||||
{ name = "sentencepiece" },
|
{ name = "sentencepiece" },
|
||||||
{ name = "silero-vad", specifier = "==5.1.0" },
|
{ name = "silero-vad", specifier = "==5.1.2" },
|
||||||
{ name = "torch", specifier = ">=2.3.0" },
|
{ name = "torch", specifier = ">=2.3.0" },
|
||||||
{ name = "torchaudio", specifier = ">=2.3.0" },
|
{ name = "torchaudio", specifier = ">=2.3.0" },
|
||||||
{ name = "transformers", specifier = ">=4.35.0" },
|
{ name = "transformers", specifier = ">=4.35.0" },
|
||||||
@@ -2472,16 +2474,16 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "silero-vad"
|
name = "silero-vad"
|
||||||
version = "5.1"
|
version = "5.1.2"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "onnxruntime" },
|
{ name = "onnxruntime" },
|
||||||
{ name = "torch" },
|
{ name = "torch" },
|
||||||
{ name = "torchaudio" },
|
{ name = "torchaudio" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/7c/5d/b912e45d21b8b61859a552554893222d2cdebfd0f9afa7e8ba69c7a3441a/silero_vad-5.1.tar.gz", hash = "sha256:c644275ba5df06cee596cc050ba0bd1e0f5237d1abfa44d58dd4618f6e77434d", size = 3996829, upload-time = "2024-07-09T13:19:24.181Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/b1/b4/d0311b2e6220a11f8f4699f4a278cb088131573286cdfe804c87c7eb5123/silero_vad-5.1.2.tar.gz", hash = "sha256:c442971160026d2d7aa0ad83f0c7ee86c89797a65289fe625c8ea59fc6fb828d", size = 5098526, upload-time = "2024-10-09T09:50:47.019Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/0e/be/0fdbc72030b93d6f55107490d5d2185ddf0dbabdc921f589649d3e92ccd5/silero_vad-5.1-py3-none-any.whl", hash = "sha256:ecb50b484f538f7a962ce5cd3c07120d9db7b9d5a0c5861ccafe459856f22c8f", size = 3939986, upload-time = "2024-07-09T13:19:21.383Z" },
|
{ url = "https://files.pythonhosted.org/packages/98/f7/5ae11d13fbb733cd3bfd7ff1c3a3902e6f55437df4b72307c1f168146268/silero_vad-5.1.2-py3-none-any.whl", hash = "sha256:93b41953d7774b165407fda6b533c119c5803864e367d5034dc626c82cfdf661", size = 5026737, upload-time = "2024-10-09T09:50:44.355Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -4,11 +4,12 @@
|
|||||||
# Single script to configure and launch everything on one server.
|
# Single script to configure and launch everything on one server.
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# ./scripts/setup-selfhosted.sh <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASSWORD] [--build]
|
# ./scripts/setup-selfhosted.sh <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASSWORD] [--build]
|
||||||
#
|
#
|
||||||
# Specialized models (pick ONE — required):
|
# ML processing modes (pick ONE — required):
|
||||||
# --gpu NVIDIA GPU for transcription/diarization/translation
|
# --gpu NVIDIA GPU container for transcription/diarization/translation
|
||||||
# --cpu CPU-only for transcription/diarization/translation (slower)
|
# --cpu In-process CPU processing (no ML container, slower)
|
||||||
|
# --hosted Remote GPU service URL (no ML container)
|
||||||
#
|
#
|
||||||
# Local LLM (optional — for summarization & topic detection):
|
# Local LLM (optional — for summarization & topic detection):
|
||||||
# --ollama-gpu Local Ollama with NVIDIA GPU acceleration
|
# --ollama-gpu Local Ollama with NVIDIA GPU acceleration
|
||||||
@@ -29,6 +30,7 @@
|
|||||||
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
|
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy
|
||||||
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com
|
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com
|
||||||
# ./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
|
# ./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
|
||||||
|
# ./scripts/setup-selfhosted.sh --hosted --garage --caddy
|
||||||
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy
|
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy
|
||||||
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy --password mysecretpass
|
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy --password mysecretpass
|
||||||
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy
|
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy
|
||||||
@@ -183,11 +185,14 @@ for i in "${!ARGS[@]}"; do
|
|||||||
arg="${ARGS[$i]}"
|
arg="${ARGS[$i]}"
|
||||||
case "$arg" in
|
case "$arg" in
|
||||||
--gpu)
|
--gpu)
|
||||||
[[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; }
|
[[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu, --cpu, and --hosted. Pick one."; exit 1; }
|
||||||
MODEL_MODE="gpu" ;;
|
MODEL_MODE="gpu" ;;
|
||||||
--cpu)
|
--cpu)
|
||||||
[[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu and --cpu. Pick one."; exit 1; }
|
[[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu, --cpu, and --hosted. Pick one."; exit 1; }
|
||||||
MODEL_MODE="cpu" ;;
|
MODEL_MODE="cpu" ;;
|
||||||
|
--hosted)
|
||||||
|
[[ -n "$MODEL_MODE" ]] && { err "Cannot combine --gpu, --cpu, and --hosted. Pick one."; exit 1; }
|
||||||
|
MODEL_MODE="hosted" ;;
|
||||||
--ollama-gpu)
|
--ollama-gpu)
|
||||||
[[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; }
|
[[ -n "$OLLAMA_MODE" ]] && { err "Cannot combine --ollama-gpu and --ollama-cpu. Pick one."; exit 1; }
|
||||||
OLLAMA_MODE="ollama-gpu" ;;
|
OLLAMA_MODE="ollama-gpu" ;;
|
||||||
@@ -224,20 +229,21 @@ for i in "${!ARGS[@]}"; do
|
|||||||
SKIP_NEXT=true ;;
|
SKIP_NEXT=true ;;
|
||||||
*)
|
*)
|
||||||
err "Unknown argument: $arg"
|
err "Unknown argument: $arg"
|
||||||
err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASS] [--build]"
|
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASS] [--build]"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
if [[ -z "$MODEL_MODE" ]]; then
|
if [[ -z "$MODEL_MODE" ]]; then
|
||||||
err "No model mode specified. You must choose --gpu or --cpu."
|
err "No model mode specified. You must choose --gpu, --cpu, or --hosted."
|
||||||
err ""
|
err ""
|
||||||
err "Usage: $0 <--gpu|--cpu> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASS] [--build]"
|
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--password PASS] [--build]"
|
||||||
err ""
|
err ""
|
||||||
err "Specialized models (required):"
|
err "ML processing modes (required):"
|
||||||
err " --gpu NVIDIA GPU for transcription/diarization/translation"
|
err " --gpu NVIDIA GPU container for transcription/diarization/translation"
|
||||||
err " --cpu CPU-only (slower but works without GPU)"
|
err " --cpu In-process CPU processing (no ML container, slower)"
|
||||||
|
err " --hosted Remote GPU service URL (no ML container)"
|
||||||
err ""
|
err ""
|
||||||
err "Local LLM (optional):"
|
err "Local LLM (optional):"
|
||||||
err " --ollama-gpu Local Ollama with GPU (for summarization/topics)"
|
err " --ollama-gpu Local Ollama with GPU (for summarization/topics)"
|
||||||
@@ -255,7 +261,9 @@ if [[ -z "$MODEL_MODE" ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Build profiles list — one profile per feature
|
# Build profiles list — one profile per feature
|
||||||
COMPOSE_PROFILES=("$MODEL_MODE")
|
# Only --gpu needs a compose profile; --cpu and --hosted use in-process/remote backends
|
||||||
|
COMPOSE_PROFILES=()
|
||||||
|
[[ "$MODEL_MODE" == "gpu" ]] && COMPOSE_PROFILES+=("gpu")
|
||||||
[[ -n "$OLLAMA_MODE" ]] && COMPOSE_PROFILES+=("$OLLAMA_MODE")
|
[[ -n "$OLLAMA_MODE" ]] && COMPOSE_PROFILES+=("$OLLAMA_MODE")
|
||||||
[[ "$USE_GARAGE" == "true" ]] && COMPOSE_PROFILES+=("garage")
|
[[ "$USE_GARAGE" == "true" ]] && COMPOSE_PROFILES+=("garage")
|
||||||
[[ "$USE_CADDY" == "true" ]] && COMPOSE_PROFILES+=("caddy")
|
[[ "$USE_CADDY" == "true" ]] && COMPOSE_PROFILES+=("caddy")
|
||||||
@@ -422,43 +430,102 @@ step_server_env() {
|
|||||||
env_set "$SERVER_ENV" "WEBRTC_HOST" "$PRIMARY_IP"
|
env_set "$SERVER_ENV" "WEBRTC_HOST" "$PRIMARY_IP"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Specialized models (always via gpu/cpu container aliased as "transcription")
|
# Specialized models — backend configuration per mode
|
||||||
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
|
|
||||||
env_set "$SERVER_ENV" "TRANSCRIPT_URL" "http://transcription:8000"
|
|
||||||
env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "selfhosted"
|
|
||||||
env_set "$SERVER_ENV" "DIARIZATION_ENABLED" "true"
|
env_set "$SERVER_ENV" "DIARIZATION_ENABLED" "true"
|
||||||
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
|
case "$MODEL_MODE" in
|
||||||
env_set "$SERVER_ENV" "DIARIZATION_URL" "http://transcription:8000"
|
gpu)
|
||||||
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
|
# GPU container aliased as "transcription" on docker network
|
||||||
env_set "$SERVER_ENV" "TRANSLATE_URL" "http://transcription:8000"
|
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
|
||||||
env_set "$SERVER_ENV" "PADDING_BACKEND" "modal"
|
env_set "$SERVER_ENV" "TRANSCRIPT_URL" "http://transcription:8000"
|
||||||
env_set "$SERVER_ENV" "PADDING_URL" "http://transcription:8000"
|
env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "selfhosted"
|
||||||
|
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
|
||||||
|
env_set "$SERVER_ENV" "DIARIZATION_URL" "http://transcription:8000"
|
||||||
|
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
|
||||||
|
env_set "$SERVER_ENV" "TRANSLATE_URL" "http://transcription:8000"
|
||||||
|
env_set "$SERVER_ENV" "PADDING_BACKEND" "modal"
|
||||||
|
env_set "$SERVER_ENV" "PADDING_URL" "http://transcription:8000"
|
||||||
|
ok "ML backends: GPU container (modal)"
|
||||||
|
;;
|
||||||
|
cpu)
|
||||||
|
# In-process backends — no ML service container needed
|
||||||
|
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "whisper"
|
||||||
|
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "pyannote"
|
||||||
|
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "marian"
|
||||||
|
env_set "$SERVER_ENV" "PADDING_BACKEND" "pyav"
|
||||||
|
ok "ML backends: in-process CPU (whisper/pyannote/marian/pyav)"
|
||||||
|
;;
|
||||||
|
hosted)
|
||||||
|
# Remote GPU service — user provides URL
|
||||||
|
local gpu_url=""
|
||||||
|
if env_has_key "$SERVER_ENV" "TRANSCRIPT_URL"; then
|
||||||
|
gpu_url=$(env_get "$SERVER_ENV" "TRANSCRIPT_URL")
|
||||||
|
fi
|
||||||
|
if [[ -z "$gpu_url" ]] && [[ -t 0 ]]; then
|
||||||
|
echo ""
|
||||||
|
info "Enter the URL of your remote GPU service (e.g. https://gpu.example.com)"
|
||||||
|
read -rp " GPU service URL: " gpu_url
|
||||||
|
fi
|
||||||
|
if [[ -z "$gpu_url" ]]; then
|
||||||
|
err "GPU service URL required for --hosted mode."
|
||||||
|
err "Set TRANSCRIPT_URL in server/.env or provide it interactively."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
|
||||||
|
env_set "$SERVER_ENV" "TRANSCRIPT_URL" "$gpu_url"
|
||||||
|
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
|
||||||
|
env_set "$SERVER_ENV" "DIARIZATION_URL" "$gpu_url"
|
||||||
|
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
|
||||||
|
env_set "$SERVER_ENV" "TRANSLATE_URL" "$gpu_url"
|
||||||
|
env_set "$SERVER_ENV" "PADDING_BACKEND" "modal"
|
||||||
|
env_set "$SERVER_ENV" "PADDING_URL" "$gpu_url"
|
||||||
|
# API key for remote service
|
||||||
|
local gpu_api_key=""
|
||||||
|
if env_has_key "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY"; then
|
||||||
|
gpu_api_key=$(env_get "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY")
|
||||||
|
fi
|
||||||
|
if [[ -z "$gpu_api_key" ]] && [[ -t 0 ]]; then
|
||||||
|
read -rp " GPU service API key (or Enter to skip): " gpu_api_key
|
||||||
|
fi
|
||||||
|
if [[ -n "$gpu_api_key" ]]; then
|
||||||
|
env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "$gpu_api_key"
|
||||||
|
fi
|
||||||
|
ok "ML backends: remote hosted ($gpu_url)"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
# HuggingFace token for gated models (pyannote diarization)
|
# HuggingFace token for gated models (pyannote diarization)
|
||||||
# Written to root .env so docker compose picks it up for gpu/cpu containers
|
# --gpu: written to root .env (docker compose passes to GPU container)
|
||||||
local root_env="$ROOT_DIR/.env"
|
# --cpu: written to both root .env and server/.env (in-process pyannote needs it)
|
||||||
local current_hf_token="${HF_TOKEN:-}"
|
# --hosted: not needed (remote service handles its own auth)
|
||||||
if [[ -f "$root_env" ]] && env_has_key "$root_env" "HF_TOKEN"; then
|
if [[ "$MODEL_MODE" != "hosted" ]]; then
|
||||||
current_hf_token=$(env_get "$root_env" "HF_TOKEN")
|
local root_env="$ROOT_DIR/.env"
|
||||||
fi
|
local current_hf_token="${HF_TOKEN:-}"
|
||||||
if [[ -z "$current_hf_token" ]]; then
|
if [[ -f "$root_env" ]] && env_has_key "$root_env" "HF_TOKEN"; then
|
||||||
echo ""
|
current_hf_token=$(env_get "$root_env" "HF_TOKEN")
|
||||||
warn "HF_TOKEN not set. Diarization will use a public model fallback."
|
fi
|
||||||
warn "For best results, get a token at https://huggingface.co/settings/tokens"
|
if [[ -z "$current_hf_token" ]]; then
|
||||||
warn "and accept pyannote licenses at https://huggingface.co/pyannote/speaker-diarization-3.1"
|
echo ""
|
||||||
if [[ -t 0 ]]; then
|
warn "HF_TOKEN not set. Diarization will use a public model fallback."
|
||||||
read -rp " HuggingFace token (or press Enter to skip): " current_hf_token
|
warn "For best results, get a token at https://huggingface.co/settings/tokens"
|
||||||
|
warn "and accept pyannote licenses at https://huggingface.co/pyannote/speaker-diarization-3.1"
|
||||||
|
if [[ -t 0 ]]; then
|
||||||
|
read -rp " HuggingFace token (or press Enter to skip): " current_hf_token
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [[ -n "$current_hf_token" ]]; then
|
||||||
|
touch "$root_env"
|
||||||
|
env_set "$root_env" "HF_TOKEN" "$current_hf_token"
|
||||||
|
export HF_TOKEN="$current_hf_token"
|
||||||
|
# In CPU mode, server process needs HF_TOKEN directly
|
||||||
|
if [[ "$MODEL_MODE" == "cpu" ]]; then
|
||||||
|
env_set "$SERVER_ENV" "HF_TOKEN" "$current_hf_token"
|
||||||
|
fi
|
||||||
|
ok "HF_TOKEN configured"
|
||||||
|
else
|
||||||
|
touch "$root_env"
|
||||||
|
env_set "$root_env" "HF_TOKEN" ""
|
||||||
|
ok "HF_TOKEN skipped (using public model fallback)"
|
||||||
fi
|
fi
|
||||||
fi
|
|
||||||
if [[ -n "$current_hf_token" ]]; then
|
|
||||||
touch "$root_env"
|
|
||||||
env_set "$root_env" "HF_TOKEN" "$current_hf_token"
|
|
||||||
export HF_TOKEN="$current_hf_token"
|
|
||||||
ok "HF_TOKEN configured"
|
|
||||||
else
|
|
||||||
touch "$root_env"
|
|
||||||
env_set "$root_env" "HF_TOKEN" ""
|
|
||||||
ok "HF_TOKEN skipped (using public model fallback)"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# LLM configuration
|
# LLM configuration
|
||||||
@@ -799,11 +866,12 @@ CADDYEOF
|
|||||||
step_services() {
|
step_services() {
|
||||||
info "Step 6: Starting Docker services"
|
info "Step 6: Starting Docker services"
|
||||||
|
|
||||||
# Build GPU/CPU image from source (always needed — no prebuilt image)
|
# Build GPU image from source (only for --gpu mode)
|
||||||
local build_svc="$MODEL_MODE"
|
if [[ "$MODEL_MODE" == "gpu" ]]; then
|
||||||
info "Building $build_svc image (first build downloads ML models, may take a while)..."
|
info "Building gpu image (first build downloads ML models, may take a while)..."
|
||||||
compose_cmd build "$build_svc"
|
compose_cmd build gpu
|
||||||
ok "$build_svc image built"
|
ok "gpu image built"
|
||||||
|
fi
|
||||||
|
|
||||||
# Build or pull backend and frontend images
|
# Build or pull backend and frontend images
|
||||||
if [[ "$BUILD_IMAGES" == "true" ]]; then
|
if [[ "$BUILD_IMAGES" == "true" ]]; then
|
||||||
@@ -871,25 +939,29 @@ step_services() {
|
|||||||
step_health() {
|
step_health() {
|
||||||
info "Step 7: Health checks"
|
info "Step 7: Health checks"
|
||||||
|
|
||||||
# Specialized model service (gpu or cpu)
|
# Specialized model service (only for --gpu mode)
|
||||||
local model_svc="$MODEL_MODE"
|
if [[ "$MODEL_MODE" == "gpu" ]]; then
|
||||||
|
info "Waiting for gpu service (first start downloads ~1GB of models)..."
|
||||||
info "Waiting for $model_svc service (first start downloads ~1GB of models)..."
|
local model_ok=false
|
||||||
local model_ok=false
|
for i in $(seq 1 120); do
|
||||||
for i in $(seq 1 120); do
|
if curl -sf http://localhost:8000/docs > /dev/null 2>&1; then
|
||||||
if curl -sf http://localhost:8000/docs > /dev/null 2>&1; then
|
model_ok=true
|
||||||
model_ok=true
|
break
|
||||||
break
|
fi
|
||||||
|
echo -ne "\r Waiting for gpu service... ($i/120)"
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
if [[ "$model_ok" == "true" ]]; then
|
||||||
|
ok "gpu service healthy (transcription + diarization)"
|
||||||
|
else
|
||||||
|
warn "gpu service not ready yet — it will keep loading in the background"
|
||||||
|
warn "Check with: docker compose -f docker-compose.selfhosted.yml logs gpu"
|
||||||
fi
|
fi
|
||||||
echo -ne "\r Waiting for $model_svc service... ($i/120)"
|
elif [[ "$MODEL_MODE" == "cpu" ]]; then
|
||||||
sleep 5
|
ok "CPU mode — ML processing runs in-process on server/worker (no separate service)"
|
||||||
done
|
elif [[ "$MODEL_MODE" == "hosted" ]]; then
|
||||||
echo ""
|
ok "Hosted mode — ML processing via remote GPU service (no local health check)"
|
||||||
if [[ "$model_ok" == "true" ]]; then
|
|
||||||
ok "$model_svc service healthy (transcription + diarization)"
|
|
||||||
else
|
|
||||||
warn "$model_svc service not ready yet — it will keep loading in the background"
|
|
||||||
warn "Check with: docker compose -f docker-compose.selfhosted.yml logs $model_svc"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Ollama (if applicable)
|
# Ollama (if applicable)
|
||||||
|
|||||||
@@ -89,11 +89,11 @@ LLM_CONTEXT_WINDOW=16000
|
|||||||
## =======================================================
|
## =======================================================
|
||||||
## Audio Padding
|
## Audio Padding
|
||||||
##
|
##
|
||||||
## backends: local (in-process PyAV), modal (HTTP API client)
|
## backends: pyav (in-process PyAV), modal (HTTP API client)
|
||||||
## Default is "local" — no external service needed.
|
## Default is "pyav" — no external service needed.
|
||||||
## Set to "modal" when using Modal.com or self-hosted gpu/self_hosted/ container.
|
## Set to "modal" when using Modal.com or self-hosted gpu/self_hosted/ container.
|
||||||
## =======================================================
|
## =======================================================
|
||||||
#PADDING_BACKEND=local
|
#PADDING_BACKEND=pyav
|
||||||
#PADDING_BACKEND=modal
|
#PADDING_BACKEND=modal
|
||||||
#PADDING_URL=https://xxxxx--reflector-padding-web.modal.run
|
#PADDING_URL=https://xxxxx--reflector-padding-web.modal.run
|
||||||
#PADDING_MODAL_API_KEY=xxxxx
|
#PADDING_MODAL_API_KEY=xxxxx
|
||||||
@@ -101,8 +101,8 @@ LLM_CONTEXT_WINDOW=16000
|
|||||||
## =======================================================
|
## =======================================================
|
||||||
## Diarization
|
## Diarization
|
||||||
##
|
##
|
||||||
## Only available on modal
|
## backends: modal (HTTP API), pyannote (in-process pyannote.audio)
|
||||||
## To allow diarization, you need to expose expose the files to be dowloded by the pipeline
|
## To allow diarization, you need to expose expose the files to be downloaded by the pipeline
|
||||||
## =======================================================
|
## =======================================================
|
||||||
DIARIZATION_ENABLED=false
|
DIARIZATION_ENABLED=false
|
||||||
DIARIZATION_BACKEND=modal
|
DIARIZATION_BACKEND=modal
|
||||||
|
|||||||
@@ -32,26 +32,46 @@ AUTH_BACKEND=none
|
|||||||
|
|
||||||
# =======================================================
|
# =======================================================
|
||||||
# Specialized Models (Transcription, Diarization, Translation)
|
# Specialized Models (Transcription, Diarization, Translation)
|
||||||
# These run in the gpu/cpu container — NOT an LLM.
|
# These do NOT use an LLM. Configured per mode by the setup script:
|
||||||
# The "modal" backend means "HTTP API client" — it talks to
|
#
|
||||||
# the self-hosted container, not Modal.com cloud.
|
# --gpu mode: modal backends → GPU container (http://transcription:8000)
|
||||||
|
# --cpu mode: whisper/pyannote/marian/pyav → in-process ML on server/worker
|
||||||
|
# --hosted mode: modal backends → user-provided remote GPU service URL
|
||||||
# =======================================================
|
# =======================================================
|
||||||
|
|
||||||
|
# --- --gpu mode (default) ---
|
||||||
TRANSCRIPT_BACKEND=modal
|
TRANSCRIPT_BACKEND=modal
|
||||||
TRANSCRIPT_URL=http://transcription:8000
|
TRANSCRIPT_URL=http://transcription:8000
|
||||||
TRANSCRIPT_MODAL_API_KEY=selfhosted
|
TRANSCRIPT_MODAL_API_KEY=selfhosted
|
||||||
|
|
||||||
DIARIZATION_ENABLED=true
|
DIARIZATION_ENABLED=true
|
||||||
DIARIZATION_BACKEND=modal
|
DIARIZATION_BACKEND=modal
|
||||||
DIARIZATION_URL=http://transcription:8000
|
DIARIZATION_URL=http://transcription:8000
|
||||||
|
|
||||||
TRANSLATION_BACKEND=modal
|
TRANSLATION_BACKEND=modal
|
||||||
TRANSLATE_URL=http://transcription:8000
|
TRANSLATE_URL=http://transcription:8000
|
||||||
|
|
||||||
PADDING_BACKEND=modal
|
PADDING_BACKEND=modal
|
||||||
PADDING_URL=http://transcription:8000
|
PADDING_URL=http://transcription:8000
|
||||||
|
|
||||||
# HuggingFace token — optional, for gated models (e.g. pyannote).
|
# --- --cpu mode (set by setup script) ---
|
||||||
# Falls back to public S3 model bundle if not set.
|
# TRANSCRIPT_BACKEND=whisper
|
||||||
|
# DIARIZATION_BACKEND=pyannote
|
||||||
|
# TRANSLATION_BACKEND=marian
|
||||||
|
# PADDING_BACKEND=pyav
|
||||||
|
|
||||||
|
# --- --hosted mode (set by setup script) ---
|
||||||
|
# TRANSCRIPT_BACKEND=modal
|
||||||
|
# TRANSCRIPT_URL=https://your-gpu-service.example.com
|
||||||
|
# DIARIZATION_BACKEND=modal
|
||||||
|
# DIARIZATION_URL=https://your-gpu-service.example.com
|
||||||
|
# ... (all URLs point to one remote service)
|
||||||
|
|
||||||
|
# Whisper model sizes for local transcription (--cpu mode)
|
||||||
|
# Options: "tiny", "base", "small", "medium", "large-v2"
|
||||||
|
# WHISPER_CHUNK_MODEL=tiny
|
||||||
|
# WHISPER_FILE_MODEL=tiny
|
||||||
|
|
||||||
|
# HuggingFace token — for gated models (e.g. pyannote diarization).
|
||||||
|
# Required for --gpu and --cpu modes; falls back to public S3 bundle if not set.
|
||||||
|
# Not needed for --hosted mode (remote service handles its own auth).
|
||||||
# HF_TOKEN=hf_xxxxx
|
# HF_TOKEN=hf_xxxxx
|
||||||
|
|
||||||
# =======================================================
|
# =======================================================
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ ENV PYTHONUNBUFFERED=1 \
|
|||||||
|
|
||||||
# builder install base dependencies
|
# builder install base dependencies
|
||||||
WORKDIR /tmp
|
WORKDIR /tmp
|
||||||
RUN apt-get update && apt-get install -y curl && apt-get clean
|
RUN apt-get update && apt-get install -y curl ffmpeg && apt-get clean
|
||||||
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
||||||
RUN sh /uv-installer.sh && rm /uv-installer.sh
|
RUN sh /uv-installer.sh && rm /uv-installer.sh
|
||||||
ENV PATH="/root/.local/bin/:$PATH"
|
ENV PATH="/root/.local/bin/:$PATH"
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ dependencies = [
|
|||||||
"pytest-env>=1.1.5",
|
"pytest-env>=1.1.5",
|
||||||
"webvtt-py>=0.5.0",
|
"webvtt-py>=0.5.0",
|
||||||
"icalendar>=6.0.0",
|
"icalendar>=6.0.0",
|
||||||
"hatchet-sdk>=0.47.0",
|
"hatchet-sdk==1.22.16",
|
||||||
"pydantic>=2.12.5",
|
"pydantic>=2.12.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -71,9 +71,12 @@ local = [
|
|||||||
"faster-whisper>=0.10.0",
|
"faster-whisper>=0.10.0",
|
||||||
]
|
]
|
||||||
silero-vad = [
|
silero-vad = [
|
||||||
"silero-vad>=5.1.2",
|
"silero-vad==5.1.2",
|
||||||
"torch>=2.8.0",
|
"torch>=2.8.0",
|
||||||
"torchaudio>=2.8.0",
|
"torchaudio>=2.8.0",
|
||||||
|
"pyannote.audio==3.4.0",
|
||||||
|
"pytorch-lightning<2.6",
|
||||||
|
"librosa==0.10.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.uv]
|
[tool.uv]
|
||||||
|
|||||||
@@ -12,8 +12,10 @@ AccessTokenInfo = auth_module.AccessTokenInfo
|
|||||||
authenticated = auth_module.authenticated
|
authenticated = auth_module.authenticated
|
||||||
current_user = auth_module.current_user
|
current_user = auth_module.current_user
|
||||||
current_user_optional = auth_module.current_user_optional
|
current_user_optional = auth_module.current_user_optional
|
||||||
|
current_user_optional_if_public_mode = auth_module.current_user_optional_if_public_mode
|
||||||
parse_ws_bearer_token = auth_module.parse_ws_bearer_token
|
parse_ws_bearer_token = auth_module.parse_ws_bearer_token
|
||||||
current_user_ws_optional = auth_module.current_user_ws_optional
|
current_user_ws_optional = auth_module.current_user_ws_optional
|
||||||
|
verify_raw_token = auth_module.verify_raw_token
|
||||||
|
|
||||||
# Optional router (e.g. for /auth/login in password backend)
|
# Optional router (e.g. for /auth/login in password backend)
|
||||||
router = getattr(auth_module, "router", None)
|
router = getattr(auth_module, "router", None)
|
||||||
|
|||||||
@@ -129,6 +129,17 @@ async def current_user_optional(
|
|||||||
return await _authenticate_user(jwt_token, api_key, jwtauth)
|
return await _authenticate_user(jwt_token, api_key, jwtauth)
|
||||||
|
|
||||||
|
|
||||||
|
async def current_user_optional_if_public_mode(
|
||||||
|
jwt_token: Annotated[Optional[str], Depends(oauth2_scheme)],
|
||||||
|
api_key: Annotated[Optional[str], Depends(api_key_header)],
|
||||||
|
jwtauth: JWTAuth = Depends(),
|
||||||
|
) -> Optional[UserInfo]:
|
||||||
|
user = await _authenticate_user(jwt_token, api_key, jwtauth)
|
||||||
|
if user is None and not settings.PUBLIC_MODE:
|
||||||
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||||
|
return user
|
||||||
|
|
||||||
|
|
||||||
def parse_ws_bearer_token(
|
def parse_ws_bearer_token(
|
||||||
websocket: "WebSocket",
|
websocket: "WebSocket",
|
||||||
) -> tuple[Optional[str], Optional[str]]:
|
) -> tuple[Optional[str], Optional[str]]:
|
||||||
@@ -144,3 +155,8 @@ async def current_user_ws_optional(websocket: "WebSocket") -> Optional[UserInfo]
|
|||||||
if not token:
|
if not token:
|
||||||
return None
|
return None
|
||||||
return await _authenticate_user(token, None, JWTAuth())
|
return await _authenticate_user(token, None, JWTAuth())
|
||||||
|
|
||||||
|
|
||||||
|
async def verify_raw_token(token: str) -> Optional[UserInfo]:
|
||||||
|
"""Verify a raw JWT token string (used for query-param auth fallback)."""
|
||||||
|
return await _authenticate_user(token, None, JWTAuth())
|
||||||
|
|||||||
@@ -21,9 +21,19 @@ def current_user_optional():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def current_user_optional_if_public_mode():
|
||||||
|
# auth_none means no authentication at all — always public
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def parse_ws_bearer_token(websocket):
|
def parse_ws_bearer_token(websocket):
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
async def current_user_ws_optional(websocket):
|
async def current_user_ws_optional(websocket):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def verify_raw_token(token):
|
||||||
|
"""Verify a raw JWT token string (used for query-param auth fallback)."""
|
||||||
|
return None
|
||||||
|
|||||||
@@ -150,6 +150,16 @@ async def current_user_optional(
|
|||||||
return await _authenticate_user(jwt_token, api_key)
|
return await _authenticate_user(jwt_token, api_key)
|
||||||
|
|
||||||
|
|
||||||
|
async def current_user_optional_if_public_mode(
|
||||||
|
jwt_token: Annotated[Optional[str], Depends(oauth2_scheme)],
|
||||||
|
api_key: Annotated[Optional[str], Depends(api_key_header)],
|
||||||
|
) -> Optional[UserInfo]:
|
||||||
|
user = await _authenticate_user(jwt_token, api_key)
|
||||||
|
if user is None and not settings.PUBLIC_MODE:
|
||||||
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||||
|
return user
|
||||||
|
|
||||||
|
|
||||||
# --- WebSocket auth (same pattern as auth_jwt.py) ---
|
# --- WebSocket auth (same pattern as auth_jwt.py) ---
|
||||||
def parse_ws_bearer_token(
|
def parse_ws_bearer_token(
|
||||||
websocket: "WebSocket",
|
websocket: "WebSocket",
|
||||||
@@ -168,6 +178,11 @@ async def current_user_ws_optional(websocket: "WebSocket") -> Optional[UserInfo]
|
|||||||
return await _authenticate_user(token, None)
|
return await _authenticate_user(token, None)
|
||||||
|
|
||||||
|
|
||||||
|
async def verify_raw_token(token: str) -> Optional[UserInfo]:
|
||||||
|
"""Verify a raw JWT token string (used for query-param auth fallback)."""
|
||||||
|
return await _authenticate_user(token, None)
|
||||||
|
|
||||||
|
|
||||||
# --- Login router ---
|
# --- Login router ---
|
||||||
router = APIRouter(prefix="/auth", tags=["auth"])
|
router = APIRouter(prefix="/auth", tags=["auth"])
|
||||||
|
|
||||||
|
|||||||
@@ -697,6 +697,18 @@ class TranscriptController:
|
|||||||
return False
|
return False
|
||||||
return user_id and transcript.user_id == user_id
|
return user_id and transcript.user_id == user_id
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def check_can_mutate(transcript: Transcript, user_id: str | None) -> None:
|
||||||
|
"""
|
||||||
|
Raises HTTP 403 if the user cannot mutate the transcript.
|
||||||
|
|
||||||
|
Policy:
|
||||||
|
- Anonymous transcripts (user_id is None) are editable by anyone
|
||||||
|
- Owned transcripts can only be mutated by their owner
|
||||||
|
"""
|
||||||
|
if transcript.user_id is not None and transcript.user_id != user_id:
|
||||||
|
raise HTTPException(status_code=403, detail="Not authorized")
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def transaction(self):
|
async def transaction(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -39,5 +39,12 @@ TIMEOUT_MEDIUM = (
|
|||||||
300 # Single LLM calls, waveform generation (5m for slow LLM responses)
|
300 # Single LLM calls, waveform generation (5m for slow LLM responses)
|
||||||
)
|
)
|
||||||
TIMEOUT_LONG = 180 # Action items (larger context LLM)
|
TIMEOUT_LONG = 180 # Action items (larger context LLM)
|
||||||
TIMEOUT_AUDIO = 720 # Audio processing: padding, mixdown
|
TIMEOUT_TITLE = 300 # generate_title (single LLM call; doc: reduce from 600s)
|
||||||
TIMEOUT_HEAVY = 600 # Transcription, fan-out LLM tasks
|
TIMEOUT_AUDIO = 720 # Audio processing: padding, mixdown (Hatchet execution_timeout)
|
||||||
|
TIMEOUT_AUDIO_HTTP = (
|
||||||
|
660 # httpx timeout for pad_track — below 720 so Hatchet doesn't race
|
||||||
|
)
|
||||||
|
TIMEOUT_HEAVY = 600 # Transcription, fan-out LLM tasks (Hatchet execution_timeout)
|
||||||
|
TIMEOUT_HEAVY_HTTP = (
|
||||||
|
540 # httpx timeout for transcribe_track — below 600 so Hatchet doesn't race
|
||||||
|
)
|
||||||
|
|||||||
74
server/reflector/hatchet/error_classification.py
Normal file
74
server/reflector/hatchet/error_classification.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
"""Classify exceptions as non-retryable for Hatchet workflows.
|
||||||
|
|
||||||
|
When a task raises NonRetryableException (or an exception classified as
|
||||||
|
non-retryable and re-raised as such), Hatchet stops immediately — no further
|
||||||
|
retries. Used by with_error_handling to avoid wasting retries on config errors,
|
||||||
|
auth failures, corrupt data, etc.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Optional dependencies: only classify if the exception type is available.
|
||||||
|
# This avoids hard dependency on openai/av/botocore for code paths that don't use them.
|
||||||
|
try:
|
||||||
|
import openai
|
||||||
|
except ImportError:
|
||||||
|
openai = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
try:
|
||||||
|
import av
|
||||||
|
except ImportError:
|
||||||
|
av = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
try:
|
||||||
|
from botocore.exceptions import ClientError as BotoClientError
|
||||||
|
except ImportError:
|
||||||
|
BotoClientError = None # type: ignore[misc, assignment]
|
||||||
|
|
||||||
|
from hatchet_sdk import NonRetryableException
|
||||||
|
from httpx import HTTPStatusError
|
||||||
|
|
||||||
|
from reflector.llm import LLMParseError
|
||||||
|
|
||||||
|
# HTTP status codes that won't change on retry (auth, not found, payment, payload)
|
||||||
|
NON_RETRYABLE_HTTP_STATUSES = {401, 402, 403, 404, 413}
|
||||||
|
NON_RETRYABLE_S3_CODES = {"AccessDenied", "NoSuchBucket", "NoSuchKey"}
|
||||||
|
|
||||||
|
|
||||||
|
def is_non_retryable(e: BaseException) -> bool:
|
||||||
|
"""Return True if the exception should stop Hatchet retries immediately.
|
||||||
|
|
||||||
|
Hard failures (config, auth, missing resource, corrupt data) return True.
|
||||||
|
Transient errors (timeouts, 5xx, 429, connection) return False.
|
||||||
|
"""
|
||||||
|
if isinstance(e, NonRetryableException):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Config/input errors
|
||||||
|
if isinstance(e, (ValueError, TypeError)):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# HTTP status codes that won't change on retry
|
||||||
|
if isinstance(e, HTTPStatusError):
|
||||||
|
return e.response.status_code in NON_RETRYABLE_HTTP_STATUSES
|
||||||
|
|
||||||
|
# OpenAI auth errors
|
||||||
|
if openai is not None and isinstance(e, openai.AuthenticationError):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# LLM parse failures (already retried internally)
|
||||||
|
if isinstance(e, LLMParseError):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# S3 permission/existence errors
|
||||||
|
if BotoClientError is not None and isinstance(e, BotoClientError):
|
||||||
|
code = e.response.get("Error", {}).get("Code", "")
|
||||||
|
return code in NON_RETRYABLE_S3_CODES
|
||||||
|
|
||||||
|
# Corrupt audio (PyAV) — AVError in some versions; fallback to InvalidDataError
|
||||||
|
if av is not None:
|
||||||
|
av_error = getattr(av, "AVError", None) or getattr(
|
||||||
|
getattr(av, "error", None), "InvalidDataError", None
|
||||||
|
)
|
||||||
|
if av_error is not None and isinstance(e, av_error):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
@@ -27,6 +27,7 @@ from hatchet_sdk import (
|
|||||||
ConcurrencyExpression,
|
ConcurrencyExpression,
|
||||||
ConcurrencyLimitStrategy,
|
ConcurrencyLimitStrategy,
|
||||||
Context,
|
Context,
|
||||||
|
NonRetryableException,
|
||||||
)
|
)
|
||||||
from hatchet_sdk.labels import DesiredWorkerLabel
|
from hatchet_sdk.labels import DesiredWorkerLabel
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
@@ -43,8 +44,10 @@ from reflector.hatchet.constants import (
|
|||||||
TIMEOUT_LONG,
|
TIMEOUT_LONG,
|
||||||
TIMEOUT_MEDIUM,
|
TIMEOUT_MEDIUM,
|
||||||
TIMEOUT_SHORT,
|
TIMEOUT_SHORT,
|
||||||
|
TIMEOUT_TITLE,
|
||||||
TaskName,
|
TaskName,
|
||||||
)
|
)
|
||||||
|
from reflector.hatchet.error_classification import is_non_retryable
|
||||||
from reflector.hatchet.workflows.models import (
|
from reflector.hatchet.workflows.models import (
|
||||||
ActionItemsResult,
|
ActionItemsResult,
|
||||||
ConsentResult,
|
ConsentResult,
|
||||||
@@ -216,6 +219,13 @@ def make_audio_progress_logger(
|
|||||||
R = TypeVar("R")
|
R = TypeVar("R")
|
||||||
|
|
||||||
|
|
||||||
|
def _successful_run_results(
|
||||||
|
results: list[dict[str, Any] | BaseException],
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Return only successful (non-exception) results from aio_run_many(return_exceptions=True)."""
|
||||||
|
return [r for r in results if not isinstance(r, BaseException)]
|
||||||
|
|
||||||
|
|
||||||
def with_error_handling(
|
def with_error_handling(
|
||||||
step_name: TaskName, set_error_status: bool = True
|
step_name: TaskName, set_error_status: bool = True
|
||||||
) -> Callable[
|
) -> Callable[
|
||||||
@@ -243,8 +253,12 @@ def with_error_handling(
|
|||||||
error=str(e),
|
error=str(e),
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
if set_error_status:
|
if is_non_retryable(e):
|
||||||
await set_workflow_error_status(input.transcript_id)
|
# Hard fail: stop retries, set error status, fail workflow
|
||||||
|
if set_error_status:
|
||||||
|
await set_workflow_error_status(input.transcript_id)
|
||||||
|
raise NonRetryableException(str(e)) from e
|
||||||
|
# Transient: do not set error status — Hatchet will retry
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return wrapper # type: ignore[return-value]
|
return wrapper # type: ignore[return-value]
|
||||||
@@ -253,7 +267,10 @@ def with_error_handling(
|
|||||||
|
|
||||||
|
|
||||||
@daily_multitrack_pipeline.task(
|
@daily_multitrack_pipeline.task(
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT), retries=3
|
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||||
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=10,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.GET_RECORDING)
|
@with_error_handling(TaskName.GET_RECORDING)
|
||||||
async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
|
async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
|
||||||
@@ -309,6 +326,8 @@ async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
|
|||||||
parents=[get_recording],
|
parents=[get_recording],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=10,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.GET_PARTICIPANTS)
|
@with_error_handling(TaskName.GET_PARTICIPANTS)
|
||||||
async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsResult:
|
async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsResult:
|
||||||
@@ -412,6 +431,8 @@ async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsRe
|
|||||||
parents=[get_participants],
|
parents=[get_participants],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=30,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.PROCESS_TRACKS)
|
@with_error_handling(TaskName.PROCESS_TRACKS)
|
||||||
async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksResult:
|
async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksResult:
|
||||||
@@ -435,7 +456,7 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
|||||||
for i, track in enumerate(input.tracks)
|
for i, track in enumerate(input.tracks)
|
||||||
]
|
]
|
||||||
|
|
||||||
results = await track_workflow.aio_run_many(bulk_runs)
|
results = await track_workflow.aio_run_many(bulk_runs, return_exceptions=True)
|
||||||
|
|
||||||
target_language = participants_result.target_language
|
target_language = participants_result.target_language
|
||||||
|
|
||||||
@@ -443,7 +464,18 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
|||||||
padded_tracks = []
|
padded_tracks = []
|
||||||
created_padded_files = set()
|
created_padded_files = set()
|
||||||
|
|
||||||
for result in results:
|
for i, result in enumerate(results):
|
||||||
|
if isinstance(result, BaseException):
|
||||||
|
logger.error(
|
||||||
|
"[Hatchet] process_tracks: track workflow failed, failing step",
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
track_index=i,
|
||||||
|
error=str(result),
|
||||||
|
)
|
||||||
|
ctx.log(f"process_tracks: track {i} failed ({result}), failing step")
|
||||||
|
raise ValueError(
|
||||||
|
f"Track {i} workflow failed after retries: {result!s}"
|
||||||
|
) from result
|
||||||
transcribe_result = TranscribeTrackResult(**result[TaskName.TRANSCRIBE_TRACK])
|
transcribe_result = TranscribeTrackResult(**result[TaskName.TRANSCRIBE_TRACK])
|
||||||
track_words.append(transcribe_result.words)
|
track_words.append(transcribe_result.words)
|
||||||
|
|
||||||
@@ -481,7 +513,9 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
|||||||
@daily_multitrack_pipeline.task(
|
@daily_multitrack_pipeline.task(
|
||||||
parents=[process_tracks],
|
parents=[process_tracks],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_AUDIO),
|
execution_timeout=timedelta(seconds=TIMEOUT_AUDIO),
|
||||||
retries=3,
|
retries=2,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=15,
|
||||||
desired_worker_labels={
|
desired_worker_labels={
|
||||||
"pool": DesiredWorkerLabel(
|
"pool": DesiredWorkerLabel(
|
||||||
value="cpu-heavy",
|
value="cpu-heavy",
|
||||||
@@ -593,6 +627,8 @@ async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
|
|||||||
parents=[mixdown_tracks],
|
parents=[mixdown_tracks],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=10,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.GENERATE_WAVEFORM)
|
@with_error_handling(TaskName.GENERATE_WAVEFORM)
|
||||||
async def generate_waveform(input: PipelineInput, ctx: Context) -> WaveformResult:
|
async def generate_waveform(input: PipelineInput, ctx: Context) -> WaveformResult:
|
||||||
@@ -661,6 +697,8 @@ async def generate_waveform(input: PipelineInput, ctx: Context) -> WaveformResul
|
|||||||
parents=[process_tracks],
|
parents=[process_tracks],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=30,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.DETECT_TOPICS)
|
@with_error_handling(TaskName.DETECT_TOPICS)
|
||||||
async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
|
async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
|
||||||
@@ -722,11 +760,22 @@ async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
|
|||||||
for chunk in chunks
|
for chunk in chunks
|
||||||
]
|
]
|
||||||
|
|
||||||
results = await topic_chunk_workflow.aio_run_many(bulk_runs)
|
results = await topic_chunk_workflow.aio_run_many(bulk_runs, return_exceptions=True)
|
||||||
|
|
||||||
topic_chunks = [
|
topic_chunks: list[TopicChunkResult] = []
|
||||||
TopicChunkResult(**result[TaskName.DETECT_CHUNK_TOPIC]) for result in results
|
for i, result in enumerate(results):
|
||||||
]
|
if isinstance(result, BaseException):
|
||||||
|
logger.error(
|
||||||
|
"[Hatchet] detect_topics: chunk workflow failed, failing step",
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
chunk_index=i,
|
||||||
|
error=str(result),
|
||||||
|
)
|
||||||
|
ctx.log(f"detect_topics: chunk {i} failed ({result}), failing step")
|
||||||
|
raise ValueError(
|
||||||
|
f"Topic chunk {i} workflow failed after retries: {result!s}"
|
||||||
|
) from result
|
||||||
|
topic_chunks.append(TopicChunkResult(**result[TaskName.DETECT_CHUNK_TOPIC]))
|
||||||
|
|
||||||
async with fresh_db_connection():
|
async with fresh_db_connection():
|
||||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||||
@@ -764,8 +813,10 @@ async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
|
|||||||
|
|
||||||
@daily_multitrack_pipeline.task(
|
@daily_multitrack_pipeline.task(
|
||||||
parents=[detect_topics],
|
parents=[detect_topics],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
execution_timeout=timedelta(seconds=TIMEOUT_TITLE),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=15,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.GENERATE_TITLE)
|
@with_error_handling(TaskName.GENERATE_TITLE)
|
||||||
async def generate_title(input: PipelineInput, ctx: Context) -> TitleResult:
|
async def generate_title(input: PipelineInput, ctx: Context) -> TitleResult:
|
||||||
@@ -830,7 +881,9 @@ async def generate_title(input: PipelineInput, ctx: Context) -> TitleResult:
|
|||||||
@daily_multitrack_pipeline.task(
|
@daily_multitrack_pipeline.task(
|
||||||
parents=[detect_topics],
|
parents=[detect_topics],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||||
retries=3,
|
retries=5,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=30,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.EXTRACT_SUBJECTS)
|
@with_error_handling(TaskName.EXTRACT_SUBJECTS)
|
||||||
async def extract_subjects(input: PipelineInput, ctx: Context) -> SubjectsResult:
|
async def extract_subjects(input: PipelineInput, ctx: Context) -> SubjectsResult:
|
||||||
@@ -909,6 +962,8 @@ async def extract_subjects(input: PipelineInput, ctx: Context) -> SubjectsResult
|
|||||||
parents=[extract_subjects],
|
parents=[extract_subjects],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=30,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.PROCESS_SUBJECTS)
|
@with_error_handling(TaskName.PROCESS_SUBJECTS)
|
||||||
async def process_subjects(input: PipelineInput, ctx: Context) -> ProcessSubjectsResult:
|
async def process_subjects(input: PipelineInput, ctx: Context) -> ProcessSubjectsResult:
|
||||||
@@ -935,12 +990,24 @@ async def process_subjects(input: PipelineInput, ctx: Context) -> ProcessSubject
|
|||||||
for i, subject in enumerate(subjects)
|
for i, subject in enumerate(subjects)
|
||||||
]
|
]
|
||||||
|
|
||||||
results = await subject_workflow.aio_run_many(bulk_runs)
|
results = await subject_workflow.aio_run_many(bulk_runs, return_exceptions=True)
|
||||||
|
|
||||||
subject_summaries = [
|
subject_summaries: list[SubjectSummaryResult] = []
|
||||||
SubjectSummaryResult(**result[TaskName.GENERATE_DETAILED_SUMMARY])
|
for i, result in enumerate(results):
|
||||||
for result in results
|
if isinstance(result, BaseException):
|
||||||
]
|
logger.error(
|
||||||
|
"[Hatchet] process_subjects: subject workflow failed, failing step",
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
subject_index=i,
|
||||||
|
error=str(result),
|
||||||
|
)
|
||||||
|
ctx.log(f"process_subjects: subject {i} failed ({result}), failing step")
|
||||||
|
raise ValueError(
|
||||||
|
f"Subject {i} workflow failed after retries: {result!s}"
|
||||||
|
) from result
|
||||||
|
subject_summaries.append(
|
||||||
|
SubjectSummaryResult(**result[TaskName.GENERATE_DETAILED_SUMMARY])
|
||||||
|
)
|
||||||
|
|
||||||
ctx.log(f"process_subjects complete: {len(subject_summaries)} summaries")
|
ctx.log(f"process_subjects complete: {len(subject_summaries)} summaries")
|
||||||
|
|
||||||
@@ -951,6 +1018,8 @@ async def process_subjects(input: PipelineInput, ctx: Context) -> ProcessSubject
|
|||||||
parents=[process_subjects],
|
parents=[process_subjects],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=15,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.GENERATE_RECAP)
|
@with_error_handling(TaskName.GENERATE_RECAP)
|
||||||
async def generate_recap(input: PipelineInput, ctx: Context) -> RecapResult:
|
async def generate_recap(input: PipelineInput, ctx: Context) -> RecapResult:
|
||||||
@@ -1040,6 +1109,8 @@ async def generate_recap(input: PipelineInput, ctx: Context) -> RecapResult:
|
|||||||
parents=[extract_subjects],
|
parents=[extract_subjects],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_LONG),
|
execution_timeout=timedelta(seconds=TIMEOUT_LONG),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=15,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.IDENTIFY_ACTION_ITEMS)
|
@with_error_handling(TaskName.IDENTIFY_ACTION_ITEMS)
|
||||||
async def identify_action_items(
|
async def identify_action_items(
|
||||||
@@ -1108,6 +1179,8 @@ async def identify_action_items(
|
|||||||
parents=[process_tracks, generate_title, generate_recap, identify_action_items],
|
parents=[process_tracks, generate_title, generate_recap, identify_action_items],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||||
retries=3,
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=5,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.FINALIZE)
|
@with_error_handling(TaskName.FINALIZE)
|
||||||
async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
||||||
@@ -1177,7 +1250,11 @@ async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
|
|||||||
|
|
||||||
|
|
||||||
@daily_multitrack_pipeline.task(
|
@daily_multitrack_pipeline.task(
|
||||||
parents=[finalize], execution_timeout=timedelta(seconds=TIMEOUT_SHORT), retries=3
|
parents=[finalize],
|
||||||
|
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||||
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=10,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.CLEANUP_CONSENT, set_error_status=False)
|
@with_error_handling(TaskName.CLEANUP_CONSENT, set_error_status=False)
|
||||||
async def cleanup_consent(input: PipelineInput, ctx: Context) -> ConsentResult:
|
async def cleanup_consent(input: PipelineInput, ctx: Context) -> ConsentResult:
|
||||||
@@ -1283,6 +1360,8 @@ async def cleanup_consent(input: PipelineInput, ctx: Context) -> ConsentResult:
|
|||||||
parents=[cleanup_consent],
|
parents=[cleanup_consent],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
execution_timeout=timedelta(seconds=TIMEOUT_SHORT),
|
||||||
retries=5,
|
retries=5,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=15,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.POST_ZULIP, set_error_status=False)
|
@with_error_handling(TaskName.POST_ZULIP, set_error_status=False)
|
||||||
async def post_zulip(input: PipelineInput, ctx: Context) -> ZulipResult:
|
async def post_zulip(input: PipelineInput, ctx: Context) -> ZulipResult:
|
||||||
@@ -1310,6 +1389,8 @@ async def post_zulip(input: PipelineInput, ctx: Context) -> ZulipResult:
|
|||||||
parents=[cleanup_consent],
|
parents=[cleanup_consent],
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||||
retries=5,
|
retries=5,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=15,
|
||||||
)
|
)
|
||||||
@with_error_handling(TaskName.SEND_WEBHOOK, set_error_status=False)
|
@with_error_handling(TaskName.SEND_WEBHOOK, set_error_status=False)
|
||||||
async def send_webhook(input: PipelineInput, ctx: Context) -> WebhookResult:
|
async def send_webhook(input: PipelineInput, ctx: Context) -> WebhookResult:
|
||||||
@@ -1378,3 +1459,32 @@ async def send_webhook(input: PipelineInput, ctx: Context) -> WebhookResult:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
ctx.log(f"send_webhook unexpected error, continuing anyway: {e}")
|
ctx.log(f"send_webhook unexpected error, continuing anyway: {e}")
|
||||||
return WebhookResult(webhook_sent=False)
|
return WebhookResult(webhook_sent=False)
|
||||||
|
|
||||||
|
|
||||||
|
async def on_workflow_failure(input: PipelineInput, ctx: Context) -> None:
|
||||||
|
"""Run when the workflow is truly dead (all retries exhausted).
|
||||||
|
|
||||||
|
Sets transcript status to 'error' only if it is not already 'ended'.
|
||||||
|
Post-finalize tasks (cleanup_consent, post_zulip, send_webhook) use
|
||||||
|
set_error_status=False; if one of them fails, we must not overwrite
|
||||||
|
the 'ended' status that finalize already set.
|
||||||
|
"""
|
||||||
|
async with fresh_db_connection():
|
||||||
|
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||||
|
if transcript and transcript.status == "ended":
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] on_workflow_failure: transcript already ended, skipping error status (failure was post-finalize)",
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
)
|
||||||
|
ctx.log(
|
||||||
|
"on_workflow_failure: transcript already ended, skipping error status"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
await set_workflow_error_status(input.transcript_id)
|
||||||
|
|
||||||
|
|
||||||
|
@daily_multitrack_pipeline.on_failure_task()
|
||||||
|
async def _register_on_workflow_failure(input: PipelineInput, ctx: Context) -> None:
|
||||||
|
await on_workflow_failure(input, ctx)
|
||||||
|
|||||||
@@ -34,7 +34,12 @@ padding_workflow = hatchet.workflow(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@padding_workflow.task(execution_timeout=timedelta(seconds=TIMEOUT_AUDIO), retries=3)
|
@padding_workflow.task(
|
||||||
|
execution_timeout=timedelta(seconds=TIMEOUT_AUDIO),
|
||||||
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=30,
|
||||||
|
)
|
||||||
async def pad_track(input: PaddingInput, ctx: Context) -> PadTrackResult:
|
async def pad_track(input: PaddingInput, ctx: Context) -> PadTrackResult:
|
||||||
"""Pad audio track with silence based on WebM container start_time."""
|
"""Pad audio track with silence based on WebM container start_time."""
|
||||||
ctx.log(f"pad_track: track {input.track_index}, s3_key={input.s3_key}")
|
ctx.log(f"pad_track: track {input.track_index}, s3_key={input.s3_key}")
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from hatchet_sdk.rate_limit import RateLimit
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from reflector.hatchet.client import HatchetClientManager
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
from reflector.hatchet.constants import LLM_RATE_LIMIT_KEY, TIMEOUT_MEDIUM
|
from reflector.hatchet.constants import LLM_RATE_LIMIT_KEY, TIMEOUT_HEAVY
|
||||||
from reflector.hatchet.workflows.models import SubjectSummaryResult
|
from reflector.hatchet.workflows.models import SubjectSummaryResult
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.processors.summary.prompts import (
|
from reflector.processors.summary.prompts import (
|
||||||
@@ -41,8 +41,10 @@ subject_workflow = hatchet.workflow(
|
|||||||
|
|
||||||
|
|
||||||
@subject_workflow.task(
|
@subject_workflow.task(
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||||
retries=3,
|
retries=5,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=60,
|
||||||
rate_limits=[RateLimit(static_key=LLM_RATE_LIMIT_KEY, units=2)],
|
rate_limits=[RateLimit(static_key=LLM_RATE_LIMIT_KEY, units=2)],
|
||||||
)
|
)
|
||||||
async def generate_detailed_summary(
|
async def generate_detailed_summary(
|
||||||
|
|||||||
@@ -50,7 +50,9 @@ topic_chunk_workflow = hatchet.workflow(
|
|||||||
|
|
||||||
@topic_chunk_workflow.task(
|
@topic_chunk_workflow.task(
|
||||||
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
execution_timeout=timedelta(seconds=TIMEOUT_MEDIUM),
|
||||||
retries=3,
|
retries=5,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=60,
|
||||||
rate_limits=[RateLimit(static_key=LLM_RATE_LIMIT_KEY, units=1)],
|
rate_limits=[RateLimit(static_key=LLM_RATE_LIMIT_KEY, units=1)],
|
||||||
)
|
)
|
||||||
async def detect_chunk_topic(input: TopicChunkInput, ctx: Context) -> TopicChunkResult:
|
async def detect_chunk_topic(input: TopicChunkInput, ctx: Context) -> TopicChunkResult:
|
||||||
|
|||||||
@@ -44,7 +44,12 @@ hatchet = HatchetClientManager.get_client()
|
|||||||
track_workflow = hatchet.workflow(name="TrackProcessing", input_validator=TrackInput)
|
track_workflow = hatchet.workflow(name="TrackProcessing", input_validator=TrackInput)
|
||||||
|
|
||||||
|
|
||||||
@track_workflow.task(execution_timeout=timedelta(seconds=TIMEOUT_AUDIO), retries=3)
|
@track_workflow.task(
|
||||||
|
execution_timeout=timedelta(seconds=TIMEOUT_AUDIO),
|
||||||
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=30,
|
||||||
|
)
|
||||||
async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
|
async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
|
||||||
"""Pad single audio track with silence for alignment.
|
"""Pad single audio track with silence for alignment.
|
||||||
|
|
||||||
@@ -137,7 +142,11 @@ async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
|
|||||||
|
|
||||||
|
|
||||||
@track_workflow.task(
|
@track_workflow.task(
|
||||||
parents=[pad_track], execution_timeout=timedelta(seconds=TIMEOUT_HEAVY), retries=3
|
parents=[pad_track],
|
||||||
|
execution_timeout=timedelta(seconds=TIMEOUT_HEAVY),
|
||||||
|
retries=3,
|
||||||
|
backoff_factor=2.0,
|
||||||
|
backoff_max_seconds=30,
|
||||||
)
|
)
|
||||||
async def transcribe_track(input: TrackInput, ctx: Context) -> TranscribeTrackResult:
|
async def transcribe_track(input: TrackInput, ctx: Context) -> TranscribeTrackResult:
|
||||||
"""Transcribe audio track using GPU (Modal.com) or local Whisper."""
|
"""Transcribe audio track using GPU (Modal.com) or local Whisper."""
|
||||||
|
|||||||
@@ -65,10 +65,25 @@ class LLM:
|
|||||||
async def get_response(
|
async def get_response(
|
||||||
self, prompt: str, texts: list[str], tone_name: str | None = None
|
self, prompt: str, texts: list[str], tone_name: str | None = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Get a text response using TreeSummarize for non-function-calling models"""
|
"""Get a text response using TreeSummarize for non-function-calling models.
|
||||||
summarizer = TreeSummarize(verbose=False)
|
|
||||||
response = await summarizer.aget_response(prompt, texts, tone_name=tone_name)
|
Uses the same retry() wrapper as get_structured_response for transient
|
||||||
return str(response).strip()
|
network errors (connection, timeout, OSError) with exponential backoff.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def _call():
|
||||||
|
summarizer = TreeSummarize(verbose=False)
|
||||||
|
response = await summarizer.aget_response(
|
||||||
|
prompt, texts, tone_name=tone_name
|
||||||
|
)
|
||||||
|
return str(response).strip()
|
||||||
|
|
||||||
|
return await retry(_call)(
|
||||||
|
retry_attempts=3,
|
||||||
|
retry_backoff_interval=1.0,
|
||||||
|
retry_backoff_max=30.0,
|
||||||
|
retry_ignore_exc_types=(ConnectionError, TimeoutError, OSError),
|
||||||
|
)
|
||||||
|
|
||||||
async def get_structured_response(
|
async def get_structured_response(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ from .audio_diarization_auto import AudioDiarizationAutoProcessor # noqa: F401
|
|||||||
from .audio_downscale import AudioDownscaleProcessor # noqa: F401
|
from .audio_downscale import AudioDownscaleProcessor # noqa: F401
|
||||||
from .audio_file_writer import AudioFileWriterProcessor # noqa: F401
|
from .audio_file_writer import AudioFileWriterProcessor # noqa: F401
|
||||||
from .audio_merge import AudioMergeProcessor # noqa: F401
|
from .audio_merge import AudioMergeProcessor # noqa: F401
|
||||||
|
from .audio_padding import AudioPaddingProcessor # noqa: F401
|
||||||
|
from .audio_padding_auto import AudioPaddingAutoProcessor # noqa: F401
|
||||||
from .audio_transcript import AudioTranscriptProcessor # noqa: F401
|
from .audio_transcript import AudioTranscriptProcessor # noqa: F401
|
||||||
from .audio_transcript_auto import AudioTranscriptAutoProcessor # noqa: F401
|
from .audio_transcript_auto import AudioTranscriptAutoProcessor # noqa: F401
|
||||||
from .base import ( # noqa: F401
|
from .base import ( # noqa: F401
|
||||||
|
|||||||
86
server/reflector/processors/_audio_download.py
Normal file
86
server/reflector/processors/_audio_download.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
"""
|
||||||
|
Shared audio download utility for local processors.
|
||||||
|
|
||||||
|
Downloads audio from a URL to a temporary file for in-process ML inference.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
S3_TIMEOUT = 60
|
||||||
|
|
||||||
|
|
||||||
|
async def download_audio_to_temp(url: str) -> Path:
|
||||||
|
"""Download audio from URL to a temporary file.
|
||||||
|
|
||||||
|
The caller is responsible for deleting the temp file after use.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Presigned URL or public URL to download audio from.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the downloaded temporary file.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
return await loop.run_in_executor(None, _download_blocking, url)
|
||||||
|
|
||||||
|
|
||||||
|
def _download_blocking(url: str) -> Path:
|
||||||
|
"""Blocking download implementation."""
|
||||||
|
log = logger.bind(url=url[:80])
|
||||||
|
log.info("Downloading audio to temp file")
|
||||||
|
|
||||||
|
response = requests.get(url, stream=True, timeout=S3_TIMEOUT)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Determine extension from content-type or URL
|
||||||
|
ext = _detect_extension(url, response.headers.get("content-type", ""))
|
||||||
|
|
||||||
|
fd, tmp_path = tempfile.mkstemp(suffix=ext)
|
||||||
|
try:
|
||||||
|
total_bytes = 0
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
total_bytes += len(chunk)
|
||||||
|
log.info("Audio downloaded", bytes=total_bytes, path=tmp_path)
|
||||||
|
return Path(tmp_path)
|
||||||
|
except Exception:
|
||||||
|
# Clean up on failure
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_extension(url: str, content_type: str) -> str:
|
||||||
|
"""Detect audio file extension from URL or content-type."""
|
||||||
|
# Try URL path first
|
||||||
|
path = url.split("?")[0] # Strip query params
|
||||||
|
for ext in (".wav", ".mp3", ".mp4", ".m4a", ".webm", ".ogg", ".flac"):
|
||||||
|
if path.lower().endswith(ext):
|
||||||
|
return ext
|
||||||
|
|
||||||
|
# Try content-type
|
||||||
|
ct_map = {
|
||||||
|
"audio/wav": ".wav",
|
||||||
|
"audio/x-wav": ".wav",
|
||||||
|
"audio/mpeg": ".mp3",
|
||||||
|
"audio/mp4": ".m4a",
|
||||||
|
"audio/webm": ".webm",
|
||||||
|
"audio/ogg": ".ogg",
|
||||||
|
"audio/flac": ".flac",
|
||||||
|
}
|
||||||
|
for ct, ext in ct_map.items():
|
||||||
|
if ct in content_type.lower():
|
||||||
|
return ext
|
||||||
|
|
||||||
|
return ".audio"
|
||||||
76
server/reflector/processors/_marian_translator_service.py
Normal file
76
server/reflector/processors/_marian_translator_service.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
"""
|
||||||
|
MarianMT translation service.
|
||||||
|
|
||||||
|
Singleton service that loads HuggingFace MarianMT translation models
|
||||||
|
and reuses them across all MarianMT translator processor instances.
|
||||||
|
|
||||||
|
Ported from gpu/self_hosted/app/services/translator.py for in-process use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from transformers import MarianMTModel, MarianTokenizer, pipeline
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class MarianTranslatorService:
|
||||||
|
"""MarianMT text translation service for in-process use."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._pipeline = None
|
||||||
|
self._current_pair = None
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def load(self, source_language: str = "en", target_language: str = "fr"):
|
||||||
|
"""Load the translation model for a specific language pair."""
|
||||||
|
model_name = self._resolve_model_name(source_language, target_language)
|
||||||
|
logger.info(
|
||||||
|
"Loading MarianMT model: %s (%s -> %s)",
|
||||||
|
model_name,
|
||||||
|
source_language,
|
||||||
|
target_language,
|
||||||
|
)
|
||||||
|
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
||||||
|
model = MarianMTModel.from_pretrained(model_name)
|
||||||
|
self._pipeline = pipeline("translation", model=model, tokenizer=tokenizer)
|
||||||
|
self._current_pair = (source_language.lower(), target_language.lower())
|
||||||
|
|
||||||
|
def _resolve_model_name(self, src: str, tgt: str) -> str:
|
||||||
|
"""Resolve language pair to MarianMT model name."""
|
||||||
|
pair = (src.lower(), tgt.lower())
|
||||||
|
mapping = {
|
||||||
|
("en", "fr"): "Helsinki-NLP/opus-mt-en-fr",
|
||||||
|
("fr", "en"): "Helsinki-NLP/opus-mt-fr-en",
|
||||||
|
("en", "es"): "Helsinki-NLP/opus-mt-en-es",
|
||||||
|
("es", "en"): "Helsinki-NLP/opus-mt-es-en",
|
||||||
|
("en", "de"): "Helsinki-NLP/opus-mt-en-de",
|
||||||
|
("de", "en"): "Helsinki-NLP/opus-mt-de-en",
|
||||||
|
}
|
||||||
|
return mapping.get(pair, "Helsinki-NLP/opus-mt-en-fr")
|
||||||
|
|
||||||
|
def translate(self, text: str, source_language: str, target_language: str) -> dict:
|
||||||
|
"""Translate text between languages.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to translate.
|
||||||
|
source_language: Source language code (e.g. "en").
|
||||||
|
target_language: Target language code (e.g. "fr").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with "text" key containing {source_language: original, target_language: translated}.
|
||||||
|
"""
|
||||||
|
pair = (source_language.lower(), target_language.lower())
|
||||||
|
if self._pipeline is None or self._current_pair != pair:
|
||||||
|
self.load(source_language, target_language)
|
||||||
|
with self._lock:
|
||||||
|
results = self._pipeline(
|
||||||
|
text, src_lang=source_language, tgt_lang=target_language
|
||||||
|
)
|
||||||
|
translated = results[0]["translation_text"] if results else ""
|
||||||
|
return {"text": {source_language: text, target_language: translated}}
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level singleton — shared across all MarianMT translator processors
|
||||||
|
translator_service = MarianTranslatorService()
|
||||||
133
server/reflector/processors/_pyannote_diarization_service.py
Normal file
133
server/reflector/processors/_pyannote_diarization_service.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
"""
|
||||||
|
Pyannote diarization service using pyannote.audio.
|
||||||
|
|
||||||
|
Singleton service that loads the pyannote speaker diarization model once
|
||||||
|
and reuses it across all pyannote diarization processor instances.
|
||||||
|
|
||||||
|
Ported from gpu/self_hosted/app/services/diarizer.py for in-process use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import tarfile
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torchaudio
|
||||||
|
import yaml
|
||||||
|
from pyannote.audio import Pipeline
|
||||||
|
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
S3_BUNDLE_URL = "https://reflector-public.s3.us-east-1.amazonaws.com/pyannote-speaker-diarization-3.1.tar.gz"
|
||||||
|
BUNDLE_CACHE_DIR = Path.home() / ".cache" / "pyannote-bundle"
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_model(cache_dir: Path) -> str:
|
||||||
|
"""Download and extract S3 model bundle if not cached."""
|
||||||
|
model_dir = cache_dir / "pyannote-speaker-diarization-3.1"
|
||||||
|
config_path = model_dir / "config.yaml"
|
||||||
|
|
||||||
|
if config_path.exists():
|
||||||
|
logger.info("Using cached model bundle at %s", model_dir)
|
||||||
|
return str(model_dir)
|
||||||
|
|
||||||
|
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
tarball_path = cache_dir / "model.tar.gz"
|
||||||
|
|
||||||
|
logger.info("Downloading model bundle from %s", S3_BUNDLE_URL)
|
||||||
|
with urlopen(S3_BUNDLE_URL) as response, open(tarball_path, "wb") as f:
|
||||||
|
while chunk := response.read(8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
logger.info("Extracting model bundle")
|
||||||
|
with tarfile.open(tarball_path, "r:gz") as tar:
|
||||||
|
tar.extractall(path=cache_dir, filter="data")
|
||||||
|
tarball_path.unlink()
|
||||||
|
|
||||||
|
_patch_config(model_dir, cache_dir)
|
||||||
|
return str(model_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def _patch_config(model_dir: Path, cache_dir: Path) -> None:
|
||||||
|
"""Rewrite config.yaml to reference local pytorch_model.bin paths."""
|
||||||
|
config_path = model_dir / "config.yaml"
|
||||||
|
with open(config_path) as f:
|
||||||
|
config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
config["pipeline"]["params"]["segmentation"] = str(
|
||||||
|
cache_dir / "pyannote-segmentation-3.0" / "pytorch_model.bin"
|
||||||
|
)
|
||||||
|
config["pipeline"]["params"]["embedding"] = str(
|
||||||
|
cache_dir / "pyannote-wespeaker-voxceleb-resnet34-LM" / "pytorch_model.bin"
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(config_path, "w") as f:
|
||||||
|
yaml.dump(config, f)
|
||||||
|
|
||||||
|
logger.info("Patched config.yaml with local model paths")
|
||||||
|
|
||||||
|
|
||||||
|
class PyannoteDiarizationService:
|
||||||
|
"""Pyannote speaker diarization service for in-process use."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._pipeline = None
|
||||||
|
self._device = "cpu"
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
self._device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
hf_token = settings.HF_TOKEN
|
||||||
|
|
||||||
|
if hf_token:
|
||||||
|
logger.info("Loading pyannote model from HuggingFace (HF_TOKEN set)")
|
||||||
|
self._pipeline = Pipeline.from_pretrained(
|
||||||
|
"pyannote/speaker-diarization-3.1",
|
||||||
|
use_auth_token=hf_token,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info("HF_TOKEN not set — loading model from S3 bundle")
|
||||||
|
model_path = _ensure_model(BUNDLE_CACHE_DIR)
|
||||||
|
config_path = Path(model_path) / "config.yaml"
|
||||||
|
self._pipeline = Pipeline.from_pretrained(str(config_path))
|
||||||
|
|
||||||
|
self._pipeline.to(torch.device(self._device))
|
||||||
|
|
||||||
|
def diarize_file(self, file_path: str, timestamp: float = 0.0) -> dict:
|
||||||
|
"""Run speaker diarization on an audio file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the audio file.
|
||||||
|
timestamp: Offset to add to all segment timestamps.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with "diarization" key containing list of
|
||||||
|
{"start": float, "end": float, "speaker": int} segments.
|
||||||
|
"""
|
||||||
|
if self._pipeline is None:
|
||||||
|
self.load()
|
||||||
|
waveform, sample_rate = torchaudio.load(file_path)
|
||||||
|
with self._lock:
|
||||||
|
diarization = self._pipeline(
|
||||||
|
{"waveform": waveform, "sample_rate": sample_rate}
|
||||||
|
)
|
||||||
|
segments = []
|
||||||
|
for diarization_segment, _, speaker in diarization.itertracks(yield_label=True):
|
||||||
|
segments.append(
|
||||||
|
{
|
||||||
|
"start": round(timestamp + diarization_segment.start, 3),
|
||||||
|
"end": round(timestamp + diarization_segment.end, 3),
|
||||||
|
"speaker": int(speaker[-2:])
|
||||||
|
if speaker and speaker[-2:].isdigit()
|
||||||
|
else 0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return {"diarization": segments}
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level singleton — shared across all pyannote diarization processors
|
||||||
|
diarization_service = PyannoteDiarizationService()
|
||||||
37
server/reflector/processors/audio_diarization_pyannote.py
Normal file
37
server/reflector/processors/audio_diarization_pyannote.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
"""
|
||||||
|
Pyannote audio diarization processor using pyannote.audio in-process.
|
||||||
|
|
||||||
|
Downloads audio from URL, runs pyannote diarization locally,
|
||||||
|
and returns speaker segments. No HTTP backend needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
from reflector.processors._audio_download import download_audio_to_temp
|
||||||
|
from reflector.processors._pyannote_diarization_service import diarization_service
|
||||||
|
from reflector.processors.audio_diarization import AudioDiarizationProcessor
|
||||||
|
from reflector.processors.audio_diarization_auto import AudioDiarizationAutoProcessor
|
||||||
|
from reflector.processors.types import AudioDiarizationInput
|
||||||
|
|
||||||
|
|
||||||
|
class AudioDiarizationPyannoteProcessor(AudioDiarizationProcessor):
|
||||||
|
INPUT_TYPE = AudioDiarizationInput
|
||||||
|
|
||||||
|
async def _diarize(self, data: AudioDiarizationInput):
|
||||||
|
"""Run pyannote diarization on audio from URL."""
|
||||||
|
tmp_path = await download_audio_to_temp(data.audio_url)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None, diarization_service.diarize_file, str(tmp_path)
|
||||||
|
)
|
||||||
|
return result["diarization"]
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
AudioDiarizationAutoProcessor.register("pyannote", AudioDiarizationPyannoteProcessor)
|
||||||
23
server/reflector/processors/audio_padding.py
Normal file
23
server/reflector/processors/audio_padding.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
"""
|
||||||
|
Base class for audio padding processors.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class PaddingResponse(BaseModel):
|
||||||
|
size: int
|
||||||
|
cancelled: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class AudioPaddingProcessor:
|
||||||
|
"""Base class for audio padding processors."""
|
||||||
|
|
||||||
|
async def pad_track(
|
||||||
|
self,
|
||||||
|
track_url: str,
|
||||||
|
output_url: str,
|
||||||
|
start_time_seconds: float,
|
||||||
|
track_index: int,
|
||||||
|
) -> PaddingResponse:
|
||||||
|
raise NotImplementedError
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
|
from reflector.processors.audio_padding import AudioPaddingProcessor
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
|
||||||
class AudioPaddingAutoProcessor:
|
class AudioPaddingAutoProcessor(AudioPaddingProcessor):
|
||||||
_registry = {}
|
_registry = {}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -6,19 +6,14 @@ import asyncio
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from reflector.hatchet.constants import TIMEOUT_AUDIO
|
from reflector.hatchet.constants import TIMEOUT_AUDIO_HTTP
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
|
from reflector.processors.audio_padding import AudioPaddingProcessor, PaddingResponse
|
||||||
from reflector.processors.audio_padding_auto import AudioPaddingAutoProcessor
|
from reflector.processors.audio_padding_auto import AudioPaddingAutoProcessor
|
||||||
|
|
||||||
|
|
||||||
class PaddingResponse(BaseModel):
|
class AudioPaddingModalProcessor(AudioPaddingProcessor):
|
||||||
size: int
|
|
||||||
cancelled: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
class AudioPaddingModalProcessor:
|
|
||||||
"""Audio padding processor using Modal.com CPU backend via HTTP."""
|
"""Audio padding processor using Modal.com CPU backend via HTTP."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -65,7 +60,7 @@ class AudioPaddingModalProcessor:
|
|||||||
headers["Authorization"] = f"Bearer {self.modal_api_key}"
|
headers["Authorization"] = f"Bearer {self.modal_api_key}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=TIMEOUT_AUDIO) as client:
|
async with httpx.AsyncClient(timeout=TIMEOUT_AUDIO_HTTP) as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
url,
|
url,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Local audio padding processor using PyAV.
|
PyAV audio padding processor.
|
||||||
|
|
||||||
Pads audio tracks with silence directly in-process (no HTTP).
|
Pads audio tracks with silence directly in-process (no HTTP).
|
||||||
Reuses the shared PyAV utilities from reflector.utils.audio_padding.
|
Reuses the shared PyAV utilities from reflector.utils.audio_padding.
|
||||||
@@ -12,15 +12,15 @@ import tempfile
|
|||||||
import av
|
import av
|
||||||
|
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
|
from reflector.processors.audio_padding import AudioPaddingProcessor, PaddingResponse
|
||||||
from reflector.processors.audio_padding_auto import AudioPaddingAutoProcessor
|
from reflector.processors.audio_padding_auto import AudioPaddingAutoProcessor
|
||||||
from reflector.processors.audio_padding_modal import PaddingResponse
|
|
||||||
from reflector.utils.audio_padding import apply_audio_padding_to_file
|
from reflector.utils.audio_padding import apply_audio_padding_to_file
|
||||||
|
|
||||||
S3_TIMEOUT = 60
|
S3_TIMEOUT = 60
|
||||||
|
|
||||||
|
|
||||||
class AudioPaddingLocalProcessor:
|
class AudioPaddingPyavProcessor(AudioPaddingProcessor):
|
||||||
"""Audio padding processor using local PyAV (no HTTP backend)."""
|
"""Audio padding processor using PyAV (no HTTP backend)."""
|
||||||
|
|
||||||
async def pad_track(
|
async def pad_track(
|
||||||
self,
|
self,
|
||||||
@@ -29,7 +29,7 @@ class AudioPaddingLocalProcessor:
|
|||||||
start_time_seconds: float,
|
start_time_seconds: float,
|
||||||
track_index: int,
|
track_index: int,
|
||||||
) -> PaddingResponse:
|
) -> PaddingResponse:
|
||||||
"""Pad audio track with silence locally via PyAV.
|
"""Pad audio track with silence via PyAV.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
track_url: Presigned GET URL for source audio track
|
track_url: Presigned GET URL for source audio track
|
||||||
@@ -130,4 +130,4 @@ class AudioPaddingLocalProcessor:
|
|||||||
log.warning("Failed to cleanup temp directory", error=str(e))
|
log.warning("Failed to cleanup temp directory", error=str(e))
|
||||||
|
|
||||||
|
|
||||||
AudioPaddingAutoProcessor.register("local", AudioPaddingLocalProcessor)
|
AudioPaddingAutoProcessor.register("pyav", AudioPaddingPyavProcessor)
|
||||||
@@ -3,13 +3,17 @@ from faster_whisper import WhisperModel
|
|||||||
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
from reflector.processors.audio_transcript import AudioTranscriptProcessor
|
||||||
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
|
||||||
from reflector.processors.types import AudioFile, Transcript, Word
|
from reflector.processors.types import AudioFile, Transcript, Word
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
|
||||||
class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):
|
class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.model = WhisperModel(
|
self.model = WhisperModel(
|
||||||
"tiny", device="cpu", compute_type="float32", num_workers=12
|
settings.WHISPER_CHUNK_MODEL,
|
||||||
|
device="cpu",
|
||||||
|
compute_type="float32",
|
||||||
|
num_workers=12,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _transcript(self, data: AudioFile):
|
async def _transcript(self, data: AudioFile):
|
||||||
|
|||||||
39
server/reflector/processors/file_diarization_pyannote.py
Normal file
39
server/reflector/processors/file_diarization_pyannote.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""
|
||||||
|
Pyannote file diarization processor using pyannote.audio in-process.
|
||||||
|
|
||||||
|
Downloads audio from URL, runs pyannote diarization locally,
|
||||||
|
and returns speaker segments. No HTTP backend needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
from reflector.processors._audio_download import download_audio_to_temp
|
||||||
|
from reflector.processors._pyannote_diarization_service import diarization_service
|
||||||
|
from reflector.processors.file_diarization import (
|
||||||
|
FileDiarizationInput,
|
||||||
|
FileDiarizationOutput,
|
||||||
|
FileDiarizationProcessor,
|
||||||
|
)
|
||||||
|
from reflector.processors.file_diarization_auto import FileDiarizationAutoProcessor
|
||||||
|
|
||||||
|
|
||||||
|
class FileDiarizationPyannoteProcessor(FileDiarizationProcessor):
|
||||||
|
async def _diarize(self, data: FileDiarizationInput):
|
||||||
|
"""Run pyannote diarization on file from URL."""
|
||||||
|
self.logger.info(f"Starting pyannote diarization from {data.audio_url}")
|
||||||
|
tmp_path = await download_audio_to_temp(data.audio_url)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None, diarization_service.diarize_file, str(tmp_path)
|
||||||
|
)
|
||||||
|
return FileDiarizationOutput(diarization=result["diarization"])
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
FileDiarizationAutoProcessor.register("pyannote", FileDiarizationPyannoteProcessor)
|
||||||
275
server/reflector/processors/file_transcript_whisper.py
Normal file
275
server/reflector/processors/file_transcript_whisper.py
Normal file
@@ -0,0 +1,275 @@
|
|||||||
|
"""
|
||||||
|
Local file transcription processor using faster-whisper with Silero VAD pipeline.
|
||||||
|
|
||||||
|
Downloads audio from URL, segments it using Silero VAD, transcribes each
|
||||||
|
segment with faster-whisper, and merges results. No HTTP backend needed.
|
||||||
|
|
||||||
|
VAD pipeline ported from gpu/self_hosted/app/services/transcriber.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import threading
|
||||||
|
from typing import Generator
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from silero_vad import VADIterator, load_silero_vad
|
||||||
|
|
||||||
|
from reflector.processors._audio_download import download_audio_to_temp
|
||||||
|
from reflector.processors.file_transcript import (
|
||||||
|
FileTranscriptInput,
|
||||||
|
FileTranscriptProcessor,
|
||||||
|
)
|
||||||
|
from reflector.processors.file_transcript_auto import FileTranscriptAutoProcessor
|
||||||
|
from reflector.processors.types import Transcript, Word
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
SAMPLE_RATE = 16000
|
||||||
|
|
||||||
|
VAD_CONFIG = {
|
||||||
|
"batch_max_duration": 30.0,
|
||||||
|
"silence_padding": 0.5,
|
||||||
|
"window_size": 512,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FileTranscriptWhisperProcessor(FileTranscriptProcessor):
|
||||||
|
"""Transcribe complete audio files using local faster-whisper with VAD."""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self._model = None
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def _ensure_model(self):
|
||||||
|
"""Lazy-load the whisper model on first use."""
|
||||||
|
if self._model is not None:
|
||||||
|
return
|
||||||
|
|
||||||
|
import faster_whisper
|
||||||
|
import torch
|
||||||
|
|
||||||
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
compute_type = "float16" if device == "cuda" else "int8"
|
||||||
|
model_name = settings.WHISPER_FILE_MODEL
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
"Loading whisper model",
|
||||||
|
model=model_name,
|
||||||
|
device=device,
|
||||||
|
compute_type=compute_type,
|
||||||
|
)
|
||||||
|
self._model = faster_whisper.WhisperModel(
|
||||||
|
model_name,
|
||||||
|
device=device,
|
||||||
|
compute_type=compute_type,
|
||||||
|
num_workers=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _transcript(self, data: FileTranscriptInput):
|
||||||
|
"""Download file, run VAD segmentation, transcribe each segment."""
|
||||||
|
tmp_path = await download_audio_to_temp(data.audio_url)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
self._transcribe_file_blocking,
|
||||||
|
str(tmp_path),
|
||||||
|
data.language,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _transcribe_file_blocking(self, file_path: str, language: str) -> Transcript:
|
||||||
|
"""Blocking transcription with VAD pipeline."""
|
||||||
|
self._ensure_model()
|
||||||
|
|
||||||
|
audio_array = _load_audio_via_ffmpeg(file_path, SAMPLE_RATE)
|
||||||
|
|
||||||
|
# VAD segmentation → batch merging
|
||||||
|
merged_batches: list[tuple[float, float]] = []
|
||||||
|
batch_start = None
|
||||||
|
batch_end = None
|
||||||
|
max_duration = VAD_CONFIG["batch_max_duration"]
|
||||||
|
|
||||||
|
for seg_start, seg_end in _vad_segments(audio_array):
|
||||||
|
if batch_start is None:
|
||||||
|
batch_start, batch_end = seg_start, seg_end
|
||||||
|
continue
|
||||||
|
if seg_end - batch_start <= max_duration:
|
||||||
|
batch_end = seg_end
|
||||||
|
else:
|
||||||
|
merged_batches.append((batch_start, batch_end))
|
||||||
|
batch_start, batch_end = seg_start, seg_end
|
||||||
|
|
||||||
|
if batch_start is not None and batch_end is not None:
|
||||||
|
merged_batches.append((batch_start, batch_end))
|
||||||
|
|
||||||
|
# If no speech detected, try transcribing the whole file
|
||||||
|
if not merged_batches:
|
||||||
|
return self._transcribe_whole_file(file_path, language)
|
||||||
|
|
||||||
|
# Transcribe each batch
|
||||||
|
all_words = []
|
||||||
|
for start_time, end_time in merged_batches:
|
||||||
|
s_idx = int(start_time * SAMPLE_RATE)
|
||||||
|
e_idx = int(end_time * SAMPLE_RATE)
|
||||||
|
segment = audio_array[s_idx:e_idx]
|
||||||
|
segment = _pad_audio(segment, SAMPLE_RATE)
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
segments, _ = self._model.transcribe(
|
||||||
|
segment,
|
||||||
|
language=language,
|
||||||
|
beam_size=5,
|
||||||
|
word_timestamps=True,
|
||||||
|
vad_filter=True,
|
||||||
|
vad_parameters={"min_silence_duration_ms": 500},
|
||||||
|
)
|
||||||
|
segments = list(segments)
|
||||||
|
|
||||||
|
for seg in segments:
|
||||||
|
for w in seg.words:
|
||||||
|
all_words.append(
|
||||||
|
{
|
||||||
|
"word": w.word,
|
||||||
|
"start": round(float(w.start) + start_time, 2),
|
||||||
|
"end": round(float(w.end) + start_time, 2),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
all_words = _enforce_word_timing_constraints(all_words)
|
||||||
|
|
||||||
|
words = [
|
||||||
|
Word(text=w["word"], start=w["start"], end=w["end"]) for w in all_words
|
||||||
|
]
|
||||||
|
words.sort(key=lambda w: w.start)
|
||||||
|
return Transcript(words=words)
|
||||||
|
|
||||||
|
def _transcribe_whole_file(self, file_path: str, language: str) -> Transcript:
|
||||||
|
"""Fallback: transcribe entire file without VAD segmentation."""
|
||||||
|
with self._lock:
|
||||||
|
segments, _ = self._model.transcribe(
|
||||||
|
file_path,
|
||||||
|
language=language,
|
||||||
|
beam_size=5,
|
||||||
|
word_timestamps=True,
|
||||||
|
vad_filter=True,
|
||||||
|
vad_parameters={"min_silence_duration_ms": 500},
|
||||||
|
)
|
||||||
|
segments = list(segments)
|
||||||
|
|
||||||
|
words = []
|
||||||
|
for seg in segments:
|
||||||
|
for w in seg.words:
|
||||||
|
words.append(
|
||||||
|
Word(
|
||||||
|
text=w.word,
|
||||||
|
start=round(float(w.start), 2),
|
||||||
|
end=round(float(w.end), 2),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return Transcript(words=words)
|
||||||
|
|
||||||
|
|
||||||
|
# --- VAD helpers (ported from gpu/self_hosted/app/services/transcriber.py) ---
|
||||||
|
# IMPORTANT: This VAD segment logic is duplicated for deployment isolation.
|
||||||
|
# If you modify this, consider updating the GPU service copy as well:
|
||||||
|
# - gpu/self_hosted/app/services/transcriber.py
|
||||||
|
# - gpu/modal_deployments/reflector_transcriber.py
|
||||||
|
# - gpu/modal_deployments/reflector_transcriber_parakeet.py
|
||||||
|
|
||||||
|
|
||||||
|
def _load_audio_via_ffmpeg(
|
||||||
|
input_path: str, sample_rate: int = SAMPLE_RATE
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Load audio file via ffmpeg, converting to mono float32 at target sample rate."""
|
||||||
|
ffmpeg_bin = shutil.which("ffmpeg") or "ffmpeg"
|
||||||
|
cmd = [
|
||||||
|
ffmpeg_bin,
|
||||||
|
"-nostdin",
|
||||||
|
"-threads",
|
||||||
|
"1",
|
||||||
|
"-i",
|
||||||
|
input_path,
|
||||||
|
"-f",
|
||||||
|
"f32le",
|
||||||
|
"-acodec",
|
||||||
|
"pcm_f32le",
|
||||||
|
"-ac",
|
||||||
|
"1",
|
||||||
|
"-ar",
|
||||||
|
str(sample_rate),
|
||||||
|
"pipe:1",
|
||||||
|
]
|
||||||
|
proc = subprocess.run(
|
||||||
|
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
|
||||||
|
)
|
||||||
|
return np.frombuffer(proc.stdout, dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
|
def _vad_segments(
|
||||||
|
audio_array: np.ndarray,
|
||||||
|
sample_rate: int = SAMPLE_RATE,
|
||||||
|
window_size: int = VAD_CONFIG["window_size"],
|
||||||
|
) -> Generator[tuple[float, float], None, None]:
|
||||||
|
"""Detect speech segments using Silero VAD."""
|
||||||
|
vad_model = load_silero_vad(onnx=False)
|
||||||
|
iterator = VADIterator(vad_model, sampling_rate=sample_rate)
|
||||||
|
start = None
|
||||||
|
|
||||||
|
for i in range(0, len(audio_array), window_size):
|
||||||
|
chunk = audio_array[i : i + window_size]
|
||||||
|
if len(chunk) < window_size:
|
||||||
|
chunk = np.pad(chunk, (0, window_size - len(chunk)), mode="constant")
|
||||||
|
speech = iterator(chunk)
|
||||||
|
if not speech:
|
||||||
|
continue
|
||||||
|
if "start" in speech:
|
||||||
|
start = speech["start"]
|
||||||
|
continue
|
||||||
|
if "end" in speech and start is not None:
|
||||||
|
end = speech["end"]
|
||||||
|
yield (start / float(SAMPLE_RATE), end / float(SAMPLE_RATE))
|
||||||
|
start = None
|
||||||
|
|
||||||
|
# Handle case where audio ends while speech is still active
|
||||||
|
if start is not None:
|
||||||
|
audio_duration = len(audio_array) / float(sample_rate)
|
||||||
|
yield (start / float(SAMPLE_RATE), audio_duration)
|
||||||
|
|
||||||
|
iterator.reset_states()
|
||||||
|
|
||||||
|
|
||||||
|
def _pad_audio(audio_array: np.ndarray, sample_rate: int = SAMPLE_RATE) -> np.ndarray:
|
||||||
|
"""Pad short audio with silence for VAD compatibility."""
|
||||||
|
audio_duration = len(audio_array) / sample_rate
|
||||||
|
if audio_duration < VAD_CONFIG["silence_padding"]:
|
||||||
|
silence_samples = int(sample_rate * VAD_CONFIG["silence_padding"])
|
||||||
|
silence = np.zeros(silence_samples, dtype=np.float32)
|
||||||
|
return np.concatenate([audio_array, silence])
|
||||||
|
return audio_array
|
||||||
|
|
||||||
|
|
||||||
|
def _enforce_word_timing_constraints(words: list[dict]) -> list[dict]:
|
||||||
|
"""Ensure no word end time exceeds the next word's start time."""
|
||||||
|
if len(words) <= 1:
|
||||||
|
return words
|
||||||
|
enforced: list[dict] = []
|
||||||
|
for i, word in enumerate(words):
|
||||||
|
current = dict(word)
|
||||||
|
if i < len(words) - 1:
|
||||||
|
next_start = words[i + 1]["start"]
|
||||||
|
if current["end"] > next_start:
|
||||||
|
current["end"] = next_start
|
||||||
|
enforced.append(current)
|
||||||
|
return enforced
|
||||||
|
|
||||||
|
|
||||||
|
FileTranscriptAutoProcessor.register("whisper", FileTranscriptWhisperProcessor)
|
||||||
50
server/reflector/processors/transcript_translator_marian.py
Normal file
50
server/reflector/processors/transcript_translator_marian.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
"""
|
||||||
|
MarianMT transcript translator processor using HuggingFace MarianMT in-process.
|
||||||
|
|
||||||
|
Translates transcript text using HuggingFace MarianMT models
|
||||||
|
locally. No HTTP backend needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from reflector.processors._marian_translator_service import translator_service
|
||||||
|
from reflector.processors.transcript_translator import TranscriptTranslatorProcessor
|
||||||
|
from reflector.processors.transcript_translator_auto import (
|
||||||
|
TranscriptTranslatorAutoProcessor,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import TranslationLanguages
|
||||||
|
|
||||||
|
|
||||||
|
class TranscriptTranslatorMarianProcessor(TranscriptTranslatorProcessor):
|
||||||
|
"""Translate transcript text using MarianMT models."""
|
||||||
|
|
||||||
|
async def _translate(self, text: str) -> str | None:
|
||||||
|
source_language = self.get_pref("audio:source_language", "en")
|
||||||
|
target_language = self.get_pref("audio:target_language", "en")
|
||||||
|
|
||||||
|
languages = TranslationLanguages()
|
||||||
|
assert languages.is_supported(target_language)
|
||||||
|
|
||||||
|
self.logger.debug(f"MarianMT translate {text=}")
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
translator_service.translate,
|
||||||
|
text,
|
||||||
|
source_language,
|
||||||
|
target_language,
|
||||||
|
)
|
||||||
|
|
||||||
|
if target_language in result["text"]:
|
||||||
|
translation = result["text"][target_language]
|
||||||
|
else:
|
||||||
|
translation = None
|
||||||
|
|
||||||
|
self.logger.debug(f"Translation result: {text=}, {translation=}")
|
||||||
|
return translation
|
||||||
|
|
||||||
|
|
||||||
|
TranscriptTranslatorAutoProcessor.register(
|
||||||
|
"marian", TranscriptTranslatorMarianProcessor
|
||||||
|
)
|
||||||
@@ -40,14 +40,24 @@ class Settings(BaseSettings):
|
|||||||
# backends: silero, frames
|
# backends: silero, frames
|
||||||
AUDIO_CHUNKER_BACKEND: str = "frames"
|
AUDIO_CHUNKER_BACKEND: str = "frames"
|
||||||
|
|
||||||
|
# HuggingFace token for gated models (pyannote diarization in --cpu mode)
|
||||||
|
HF_TOKEN: str | None = None
|
||||||
|
|
||||||
# Audio Transcription
|
# Audio Transcription
|
||||||
# backends:
|
# backends:
|
||||||
# - whisper: in-process model loading (no HTTP, runs in same process)
|
# - whisper: in-process model loading (no HTTP, runs in same process)
|
||||||
# - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
|
# - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
|
||||||
TRANSCRIPT_BACKEND: str = "whisper"
|
TRANSCRIPT_BACKEND: str = "whisper"
|
||||||
|
|
||||||
|
# Whisper model sizes for local transcription
|
||||||
|
# Options: "tiny", "base", "small", "medium", "large-v2"
|
||||||
|
WHISPER_CHUNK_MODEL: str = "tiny"
|
||||||
|
WHISPER_FILE_MODEL: str = "tiny"
|
||||||
TRANSCRIPT_URL: str | None = None
|
TRANSCRIPT_URL: str | None = None
|
||||||
TRANSCRIPT_TIMEOUT: int = 90
|
TRANSCRIPT_TIMEOUT: int = 90
|
||||||
TRANSCRIPT_FILE_TIMEOUT: int = 600
|
TRANSCRIPT_FILE_TIMEOUT: int = (
|
||||||
|
540 # Below Hatchet TIMEOUT_HEAVY (600) to avoid timeout race
|
||||||
|
)
|
||||||
|
|
||||||
# Audio Transcription: modal backend
|
# Audio Transcription: modal backend
|
||||||
TRANSCRIPT_MODAL_API_KEY: str | None = None
|
TRANSCRIPT_MODAL_API_KEY: str | None = None
|
||||||
@@ -100,7 +110,7 @@ class Settings(BaseSettings):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Diarization
|
# Diarization
|
||||||
# backend: modal — HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
|
# backends: modal — HTTP API client, pyannote — in-process pyannote.audio
|
||||||
DIARIZATION_ENABLED: bool = True
|
DIARIZATION_ENABLED: bool = True
|
||||||
DIARIZATION_BACKEND: str = "modal"
|
DIARIZATION_BACKEND: str = "modal"
|
||||||
DIARIZATION_URL: str | None = None
|
DIARIZATION_URL: str | None = None
|
||||||
@@ -111,9 +121,9 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
# Audio Padding
|
# Audio Padding
|
||||||
# backends:
|
# backends:
|
||||||
# - local: in-process PyAV padding (no HTTP, runs in same process)
|
# - pyav: in-process PyAV padding (no HTTP, runs in same process)
|
||||||
# - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
|
# - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
|
||||||
PADDING_BACKEND: str = "local"
|
PADDING_BACKEND: str = "pyav"
|
||||||
PADDING_URL: str | None = None
|
PADDING_URL: str | None = None
|
||||||
PADDING_MODAL_API_KEY: str | None = None
|
PADDING_MODAL_API_KEY: str | None = None
|
||||||
|
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ def retry(fn):
|
|||||||
"retry_httpx_status_stop",
|
"retry_httpx_status_stop",
|
||||||
(
|
(
|
||||||
401, # auth issue
|
401, # auth issue
|
||||||
|
402, # payment required / no credits — needs human action
|
||||||
404, # not found
|
404, # not found
|
||||||
413, # payload too large
|
413, # payload too large
|
||||||
418, # teapot
|
418, # teapot
|
||||||
@@ -58,8 +59,9 @@ def retry(fn):
|
|||||||
result = await fn(*args, **kwargs)
|
result = await fn(*args, **kwargs)
|
||||||
if isinstance(result, Response):
|
if isinstance(result, Response):
|
||||||
result.raise_for_status()
|
result.raise_for_status()
|
||||||
if result:
|
# Return any result including falsy (e.g. "" from get_response);
|
||||||
return result
|
# only retry on exception, not on empty string.
|
||||||
|
return result
|
||||||
except HTTPStatusError as e:
|
except HTTPStatusError as e:
|
||||||
retry_logger.exception(e)
|
retry_logger.exception(e)
|
||||||
status_code = e.response.status_code
|
status_code = e.response.status_code
|
||||||
|
|||||||
@@ -89,14 +89,16 @@ class StartRecordingRequest(BaseModel):
|
|||||||
|
|
||||||
@router.post("/meetings/{meeting_id}/recordings/start")
|
@router.post("/meetings/{meeting_id}/recordings/start")
|
||||||
async def start_recording(
|
async def start_recording(
|
||||||
meeting_id: NonEmptyString, body: StartRecordingRequest
|
meeting_id: NonEmptyString,
|
||||||
|
body: StartRecordingRequest,
|
||||||
|
user: Annotated[
|
||||||
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
|
],
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Start cloud or raw-tracks recording via Daily.co REST API.
|
"""Start cloud or raw-tracks recording via Daily.co REST API.
|
||||||
|
|
||||||
Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation of allowing only 1 recording at a time.
|
Both cloud and raw-tracks are started via REST API to bypass enable_recording limitation of allowing only 1 recording at a time.
|
||||||
Uses different instanceIds for cloud vs raw-tracks (same won't work)
|
Uses different instanceIds for cloud vs raw-tracks (same won't work)
|
||||||
|
|
||||||
Note: No authentication required - anonymous users supported. TODO this is a DOS vector
|
|
||||||
"""
|
"""
|
||||||
meeting = await meetings_controller.get_by_id(meeting_id)
|
meeting = await meetings_controller.get_by_id(meeting_id)
|
||||||
if not meeting:
|
if not meeting:
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ from reflector.db.rooms import rooms_controller
|
|||||||
from reflector.redis_cache import RedisAsyncLock
|
from reflector.redis_cache import RedisAsyncLock
|
||||||
from reflector.schemas.platform import Platform
|
from reflector.schemas.platform import Platform
|
||||||
from reflector.services.ics_sync import ics_sync_service
|
from reflector.services.ics_sync import ics_sync_service
|
||||||
from reflector.settings import settings
|
|
||||||
from reflector.utils.url import add_query_param
|
from reflector.utils.url import add_query_param
|
||||||
from reflector.video_platforms.factory import create_platform_client
|
from reflector.video_platforms.factory import create_platform_client
|
||||||
from reflector.worker.webhook import test_webhook
|
from reflector.worker.webhook import test_webhook
|
||||||
@@ -178,11 +177,10 @@ router = APIRouter()
|
|||||||
|
|
||||||
@router.get("/rooms", response_model=Page[RoomDetails])
|
@router.get("/rooms", response_model=Page[RoomDetails])
|
||||||
async def rooms_list(
|
async def rooms_list(
|
||||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
user: Annotated[
|
||||||
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
|
],
|
||||||
) -> list[RoomDetails]:
|
) -> list[RoomDetails]:
|
||||||
if not user and not settings.PUBLIC_MODE:
|
|
||||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
||||||
|
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
|
|
||||||
paginated = await apaginate(
|
paginated = await apaginate(
|
||||||
|
|||||||
@@ -263,16 +263,15 @@ class SearchResponse(BaseModel):
|
|||||||
|
|
||||||
@router.get("/transcripts", response_model=Page[GetTranscriptMinimal])
|
@router.get("/transcripts", response_model=Page[GetTranscriptMinimal])
|
||||||
async def transcripts_list(
|
async def transcripts_list(
|
||||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
user: Annotated[
|
||||||
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
|
],
|
||||||
source_kind: SourceKind | None = None,
|
source_kind: SourceKind | None = None,
|
||||||
room_id: str | None = None,
|
room_id: str | None = None,
|
||||||
search_term: str | None = None,
|
search_term: str | None = None,
|
||||||
change_seq_from: int | None = None,
|
change_seq_from: int | None = None,
|
||||||
sort_by: Literal["created_at", "change_seq"] | None = None,
|
sort_by: Literal["created_at", "change_seq"] | None = None,
|
||||||
):
|
):
|
||||||
if not user and not settings.PUBLIC_MODE:
|
|
||||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
||||||
|
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
|
|
||||||
# Default behavior preserved: sort_by=None → "-created_at"
|
# Default behavior preserved: sort_by=None → "-created_at"
|
||||||
@@ -307,13 +306,10 @@ async def transcripts_search(
|
|||||||
from_datetime: SearchFromDatetimeParam = None,
|
from_datetime: SearchFromDatetimeParam = None,
|
||||||
to_datetime: SearchToDatetimeParam = None,
|
to_datetime: SearchToDatetimeParam = None,
|
||||||
user: Annotated[
|
user: Annotated[
|
||||||
Optional[auth.UserInfo], Depends(auth.current_user_optional)
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
] = None,
|
] = None,
|
||||||
):
|
):
|
||||||
"""Full-text search across transcript titles and content."""
|
"""Full-text search across transcript titles and content."""
|
||||||
if not user and not settings.PUBLIC_MODE:
|
|
||||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
||||||
|
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
|
|
||||||
if from_datetime and to_datetime and from_datetime > to_datetime:
|
if from_datetime and to_datetime and from_datetime > to_datetime:
|
||||||
@@ -346,7 +342,9 @@ async def transcripts_search(
|
|||||||
@router.post("/transcripts", response_model=GetTranscriptWithParticipants)
|
@router.post("/transcripts", response_model=GetTranscriptWithParticipants)
|
||||||
async def transcripts_create(
|
async def transcripts_create(
|
||||||
info: CreateTranscript,
|
info: CreateTranscript,
|
||||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
user: Annotated[
|
||||||
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
|
],
|
||||||
):
|
):
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
transcript = await transcripts_controller.add(
|
transcript = await transcripts_controller.add(
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
|
|||||||
import reflector.auth as auth
|
import reflector.auth as auth
|
||||||
from reflector.db.transcripts import AudioWaveform, transcripts_controller
|
from reflector.db.transcripts import AudioWaveform, transcripts_controller
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.views.transcripts import ALGORITHM
|
|
||||||
|
|
||||||
from ._range_requests_response import range_requests_response
|
from ._range_requests_response import range_requests_response
|
||||||
|
|
||||||
@@ -36,16 +35,23 @@ async def transcript_get_audio_mp3(
|
|||||||
):
|
):
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
if not user_id and token:
|
if not user_id and token:
|
||||||
unauthorized_exception = HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Invalid or expired token",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[ALGORITHM])
|
token_user = await auth.verify_raw_token(token)
|
||||||
user_id: str = payload.get("sub")
|
except Exception:
|
||||||
except jwt.PyJWTError:
|
token_user = None
|
||||||
raise unauthorized_exception
|
# Fallback: try as internal HS256 token (created by _generate_local_audio_link)
|
||||||
|
if not token_user:
|
||||||
|
try:
|
||||||
|
payload = jwt.decode(token, settings.SECRET_KEY, algorithms=["HS256"])
|
||||||
|
user_id = payload.get("sub")
|
||||||
|
except jwt.PyJWTError:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="Invalid or expired token",
|
||||||
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
user_id = token_user["sub"]
|
||||||
|
|
||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
|
|||||||
@@ -62,8 +62,7 @@ async def transcript_add_participant(
|
|||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||||
raise HTTPException(status_code=403, detail="Not authorized")
|
|
||||||
|
|
||||||
# ensure the speaker is unique
|
# ensure the speaker is unique
|
||||||
if participant.speaker is not None and transcript.participants is not None:
|
if participant.speaker is not None and transcript.participants is not None:
|
||||||
@@ -109,8 +108,7 @@ async def transcript_update_participant(
|
|||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||||
raise HTTPException(status_code=403, detail="Not authorized")
|
|
||||||
|
|
||||||
# ensure the speaker is unique
|
# ensure the speaker is unique
|
||||||
for p in transcript.participants:
|
for p in transcript.participants:
|
||||||
@@ -148,7 +146,6 @@ async def transcript_delete_participant(
|
|||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||||
raise HTTPException(status_code=403, detail="Not authorized")
|
|
||||||
await transcripts_controller.delete_participant(transcript, participant_id)
|
await transcripts_controller.delete_participant(transcript, participant_id)
|
||||||
return DeletionStatus(status="ok")
|
return DeletionStatus(status="ok")
|
||||||
|
|||||||
@@ -26,7 +26,9 @@ class ProcessStatus(BaseModel):
|
|||||||
@router.post("/transcripts/{transcript_id}/process")
|
@router.post("/transcripts/{transcript_id}/process")
|
||||||
async def transcript_process(
|
async def transcript_process(
|
||||||
transcript_id: str,
|
transcript_id: str,
|
||||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
user: Annotated[
|
||||||
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
|
],
|
||||||
) -> ProcessStatus:
|
) -> ProcessStatus:
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
@@ -50,5 +52,8 @@ async def transcript_process(
|
|||||||
if isinstance(config, ProcessError):
|
if isinstance(config, ProcessError):
|
||||||
raise HTTPException(status_code=500, detail=config.detail)
|
raise HTTPException(status_code=500, detail=config.detail)
|
||||||
else:
|
else:
|
||||||
await dispatch_transcript_processing(config)
|
# When transcript is in error state, force a new workflow instead of replaying
|
||||||
|
# (replay would re-run from failure point with same conditions and likely fail again)
|
||||||
|
force = transcript.status == "error"
|
||||||
|
await dispatch_transcript_processing(config, force=force)
|
||||||
return ProcessStatus(status="ok")
|
return ProcessStatus(status="ok")
|
||||||
|
|||||||
@@ -41,8 +41,7 @@ async def transcript_assign_speaker(
|
|||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||||
raise HTTPException(status_code=403, detail="Not authorized")
|
|
||||||
|
|
||||||
if not transcript:
|
if not transcript:
|
||||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||||
@@ -121,8 +120,7 @@ async def transcript_merge_speaker(
|
|||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
transcript_id, user_id=user_id
|
transcript_id, user_id=user_id
|
||||||
)
|
)
|
||||||
if transcript.user_id is not None and transcript.user_id != user_id:
|
transcripts_controller.check_can_mutate(transcript, user_id)
|
||||||
raise HTTPException(status_code=403, detail="Not authorized")
|
|
||||||
|
|
||||||
if not transcript:
|
if not transcript:
|
||||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||||
|
|||||||
@@ -21,7 +21,9 @@ async def transcript_record_upload(
|
|||||||
chunk_number: int,
|
chunk_number: int,
|
||||||
total_chunks: int,
|
total_chunks: int,
|
||||||
chunk: UploadFile,
|
chunk: UploadFile,
|
||||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
user: Annotated[
|
||||||
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
|
],
|
||||||
):
|
):
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
|
|||||||
@@ -15,7 +15,9 @@ async def transcript_record_webrtc(
|
|||||||
transcript_id: str,
|
transcript_id: str,
|
||||||
params: RtcOffer,
|
params: RtcOffer,
|
||||||
request: Request,
|
request: Request,
|
||||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
user: Annotated[
|
||||||
|
Optional[auth.UserInfo], Depends(auth.current_user_optional_if_public_mode)
|
||||||
|
],
|
||||||
):
|
):
|
||||||
user_id = user["sub"] if user else None
|
user_id = user["sub"] if user else None
|
||||||
transcript = await transcripts_controller.get_by_id_for_http(
|
transcript = await transcripts_controller.get_by_id_for_http(
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ async def process_recording(bucket_name: str, object_key: str):
|
|||||||
target_language="en",
|
target_language="en",
|
||||||
user_id=room.user_id,
|
user_id=room.user_id,
|
||||||
recording_id=recording.id,
|
recording_id=recording.id,
|
||||||
share_mode="public",
|
share_mode="semi-private",
|
||||||
meeting_id=meeting.id,
|
meeting_id=meeting.id,
|
||||||
room_id=room.id,
|
room_id=room.id,
|
||||||
)
|
)
|
||||||
@@ -343,7 +343,7 @@ async def _process_multitrack_recording_inner(
|
|||||||
target_language="en",
|
target_language="en",
|
||||||
user_id=room.user_id,
|
user_id=room.user_id,
|
||||||
recording_id=recording.id,
|
recording_id=recording.id,
|
||||||
share_mode="public",
|
share_mode="semi-private",
|
||||||
meeting_id=meeting.id,
|
meeting_id=meeting.id,
|
||||||
room_id=room.id,
|
room_id=room.id,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -437,6 +437,8 @@ async def ws_manager_in_memory(monkeypatch):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
fastapi_app.dependency_overrides[auth.current_user_optional] = lambda: None
|
fastapi_app.dependency_overrides[auth.current_user_optional] = lambda: None
|
||||||
|
# current_user_optional_if_public_mode is NOT overridden here so the real
|
||||||
|
# implementation runs and enforces the PUBLIC_MODE check during tests.
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -491,37 +493,39 @@ async def authenticated_client2():
|
|||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def authenticated_client_ctx():
|
async def authenticated_client_ctx():
|
||||||
from reflector.app import app
|
from reflector.app import app
|
||||||
from reflector.auth import current_user, current_user_optional
|
from reflector.auth import (
|
||||||
|
current_user,
|
||||||
|
current_user_optional,
|
||||||
|
current_user_optional_if_public_mode,
|
||||||
|
)
|
||||||
|
|
||||||
app.dependency_overrides[current_user] = lambda: {
|
_user = lambda: {"sub": "randomuserid", "email": "test@mail.com"}
|
||||||
"sub": "randomuserid",
|
app.dependency_overrides[current_user] = _user
|
||||||
"email": "test@mail.com",
|
app.dependency_overrides[current_user_optional] = _user
|
||||||
}
|
app.dependency_overrides[current_user_optional_if_public_mode] = _user
|
||||||
app.dependency_overrides[current_user_optional] = lambda: {
|
|
||||||
"sub": "randomuserid",
|
|
||||||
"email": "test@mail.com",
|
|
||||||
}
|
|
||||||
yield
|
yield
|
||||||
del app.dependency_overrides[current_user]
|
del app.dependency_overrides[current_user]
|
||||||
del app.dependency_overrides[current_user_optional]
|
del app.dependency_overrides[current_user_optional]
|
||||||
|
del app.dependency_overrides[current_user_optional_if_public_mode]
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def authenticated_client2_ctx():
|
async def authenticated_client2_ctx():
|
||||||
from reflector.app import app
|
from reflector.app import app
|
||||||
from reflector.auth import current_user, current_user_optional
|
from reflector.auth import (
|
||||||
|
current_user,
|
||||||
|
current_user_optional,
|
||||||
|
current_user_optional_if_public_mode,
|
||||||
|
)
|
||||||
|
|
||||||
app.dependency_overrides[current_user] = lambda: {
|
_user = lambda: {"sub": "randomuserid2", "email": "test@mail.com"}
|
||||||
"sub": "randomuserid2",
|
app.dependency_overrides[current_user] = _user
|
||||||
"email": "test@mail.com",
|
app.dependency_overrides[current_user_optional] = _user
|
||||||
}
|
app.dependency_overrides[current_user_optional_if_public_mode] = _user
|
||||||
app.dependency_overrides[current_user_optional] = lambda: {
|
|
||||||
"sub": "randomuserid2",
|
|
||||||
"email": "test@mail.com",
|
|
||||||
}
|
|
||||||
yield
|
yield
|
||||||
del app.dependency_overrides[current_user]
|
del app.dependency_overrides[current_user]
|
||||||
del app.dependency_overrides[current_user_optional]
|
del app.dependency_overrides[current_user_optional]
|
||||||
|
del app.dependency_overrides[current_user_optional_if_public_mode]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
@@ -550,7 +554,7 @@ def reset_hatchet_client():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
async def fake_transcript_with_topics(tmpdir, client):
|
async def fake_transcript_with_topics(tmpdir, client, monkeypatch):
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -559,6 +563,9 @@ async def fake_transcript_with_topics(tmpdir, client):
|
|||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.views.transcripts import transcripts_controller
|
from reflector.views.transcripts import transcripts_controller
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
settings.DATA_DIR = Path(tmpdir)
|
settings.DATA_DIR = Path(tmpdir)
|
||||||
|
|
||||||
# create a transcript
|
# create a transcript
|
||||||
|
|||||||
17
server/tests/test_app.py
Normal file
17
server/tests/test_app.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
"""Tests for app-level endpoints (root, not under /v1)."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_health_endpoint_returns_healthy():
|
||||||
|
"""GET /health returns 200 and {"status": "healthy"} for probes and CI."""
|
||||||
|
from httpx import AsyncClient
|
||||||
|
|
||||||
|
from reflector.app import app
|
||||||
|
|
||||||
|
# Health is at app root, not under /v1
|
||||||
|
async with AsyncClient(app=app, base_url="http://test") as root_client:
|
||||||
|
response = await root_client.get("/health")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json() == {"status": "healthy"}
|
||||||
303
server/tests/test_hatchet_error_handling.py
Normal file
303
server/tests/test_hatchet_error_handling.py
Normal file
@@ -0,0 +1,303 @@
|
|||||||
|
"""
|
||||||
|
Tests for Hatchet error handling: NonRetryable classification and error status.
|
||||||
|
|
||||||
|
These tests encode the desired behavior from the Hatchet Workflow Analysis doc:
|
||||||
|
- Transient exceptions: do NOT set error status (let Hatchet retry; user stays on "processing").
|
||||||
|
- Hard-fail exceptions: set error status and re-raise as NonRetryableException (stop retries).
|
||||||
|
- on_failure_task: sets error status when workflow is truly dead.
|
||||||
|
|
||||||
|
Run before the fix: some tests fail (reproducing the issues).
|
||||||
|
Run after the fix: all tests pass.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
from hatchet_sdk import NonRetryableException
|
||||||
|
|
||||||
|
from reflector.hatchet.error_classification import is_non_retryable
|
||||||
|
from reflector.llm import LLMParseError
|
||||||
|
|
||||||
|
# --- Tests for is_non_retryable() (pass once error_classification exists) ---
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_true_for_value_error():
|
||||||
|
"""ValueError (e.g. missing config) should stop retries."""
|
||||||
|
assert is_non_retryable(ValueError("DAILY_API_KEY must be set")) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_true_for_type_error():
|
||||||
|
"""TypeError (bad input) should stop retries."""
|
||||||
|
assert is_non_retryable(TypeError("expected str")) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_true_for_http_401():
|
||||||
|
"""HTTP 401 auth error should stop retries."""
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.status_code = 401
|
||||||
|
err = httpx.HTTPStatusError("Unauthorized", request=MagicMock(), response=resp)
|
||||||
|
assert is_non_retryable(err) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_true_for_http_402():
|
||||||
|
"""HTTP 402 (no credits) should stop retries."""
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.status_code = 402
|
||||||
|
err = httpx.HTTPStatusError("Payment Required", request=MagicMock(), response=resp)
|
||||||
|
assert is_non_retryable(err) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_true_for_http_404():
|
||||||
|
"""HTTP 404 should stop retries."""
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.status_code = 404
|
||||||
|
err = httpx.HTTPStatusError("Not Found", request=MagicMock(), response=resp)
|
||||||
|
assert is_non_retryable(err) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_false_for_http_503():
|
||||||
|
"""HTTP 503 is transient; retries are useful."""
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.status_code = 503
|
||||||
|
err = httpx.HTTPStatusError(
|
||||||
|
"Service Unavailable", request=MagicMock(), response=resp
|
||||||
|
)
|
||||||
|
assert is_non_retryable(err) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_false_for_timeout():
|
||||||
|
"""Timeout is transient."""
|
||||||
|
assert is_non_retryable(httpx.TimeoutException("timed out")) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_true_for_llm_parse_error():
|
||||||
|
"""LLMParseError after internal retries should stop."""
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
class _Dummy(BaseModel):
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert is_non_retryable(LLMParseError(_Dummy, "Failed to parse", 3)) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_non_retryable_returns_true_for_non_retryable_exception():
|
||||||
|
"""Already-wrapped NonRetryableException should stay non-retryable."""
|
||||||
|
assert is_non_retryable(NonRetryableException("custom")) is True
|
||||||
|
|
||||||
|
|
||||||
|
# --- Tests for with_error_handling (need pipeline module with patch) ---
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def pipeline_module():
|
||||||
|
"""Import daily_multitrack_pipeline with Hatchet client mocked."""
|
||||||
|
with patch("reflector.hatchet.client.settings") as s:
|
||||||
|
s.HATCHET_CLIENT_TOKEN = "test-token"
|
||||||
|
s.HATCHET_DEBUG = False
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.workflow.return_value = MagicMock()
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.client.HatchetClientManager.get_client",
|
||||||
|
return_value=mock_client,
|
||||||
|
):
|
||||||
|
from reflector.hatchet.workflows import daily_multitrack_pipeline
|
||||||
|
|
||||||
|
return daily_multitrack_pipeline
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_input():
|
||||||
|
"""Minimal PipelineInput for decorator tests."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import PipelineInput
|
||||||
|
|
||||||
|
return PipelineInput(
|
||||||
|
recording_id="rec-1",
|
||||||
|
tracks=[],
|
||||||
|
bucket_name="bucket",
|
||||||
|
transcript_id="ts-123",
|
||||||
|
room_id=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_ctx():
|
||||||
|
"""Minimal Context-like object."""
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.log = MagicMock()
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_with_error_handling_transient_does_not_set_error_status(
|
||||||
|
pipeline_module, mock_input, mock_ctx
|
||||||
|
):
|
||||||
|
"""Transient exception must NOT set error status (so user stays on 'processing' during retries).
|
||||||
|
|
||||||
|
Before fix: set_workflow_error_status is called on every exception → FAIL.
|
||||||
|
After fix: not called for transient → PASS.
|
||||||
|
"""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
TaskName,
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def failing_task(input, ctx):
|
||||||
|
raise httpx.TimeoutException("timed out")
|
||||||
|
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING)(failing_task)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_set_error:
|
||||||
|
with pytest.raises(httpx.TimeoutException):
|
||||||
|
await wrapped(mock_input, mock_ctx)
|
||||||
|
|
||||||
|
# Desired: do NOT set error status for transient (Hatchet will retry)
|
||||||
|
mock_set_error.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_with_error_handling_hard_fail_raises_non_retryable_and_sets_status(
|
||||||
|
pipeline_module, mock_input, mock_ctx
|
||||||
|
):
|
||||||
|
"""Hard-fail (e.g. ValueError) must set error status and re-raise NonRetryableException.
|
||||||
|
|
||||||
|
Before fix: raises ValueError, set_workflow_error_status called → test would need to expect ValueError.
|
||||||
|
After fix: raises NonRetryableException, set_workflow_error_status called → PASS.
|
||||||
|
"""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
TaskName,
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def failing_task(input, ctx):
|
||||||
|
raise ValueError("PADDING_URL must be set")
|
||||||
|
|
||||||
|
wrapped = with_error_handling(TaskName.GET_RECORDING)(failing_task)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_set_error:
|
||||||
|
with pytest.raises(NonRetryableException) as exc_info:
|
||||||
|
await wrapped(mock_input, mock_ctx)
|
||||||
|
|
||||||
|
assert "PADDING_URL" in str(exc_info.value)
|
||||||
|
mock_set_error.assert_called_once_with("ts-123")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_with_error_handling_set_error_status_false_never_sets_status(
|
||||||
|
pipeline_module, mock_input, mock_ctx
|
||||||
|
):
|
||||||
|
"""When set_error_status=False, we must never set error status (e.g. cleanup_consent)."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
TaskName,
|
||||||
|
with_error_handling,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def failing_task(input, ctx):
|
||||||
|
raise ValueError("something went wrong")
|
||||||
|
|
||||||
|
wrapped = with_error_handling(TaskName.CLEANUP_CONSENT, set_error_status=False)(
|
||||||
|
failing_task
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_set_error:
|
||||||
|
with pytest.raises((ValueError, NonRetryableException)):
|
||||||
|
await wrapped(mock_input, mock_ctx)
|
||||||
|
|
||||||
|
mock_set_error.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _noop_db_context():
|
||||||
|
"""Async context manager that yields without touching the DB (for unit tests)."""
|
||||||
|
yield None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_failure_task_sets_error_status(pipeline_module, mock_input, mock_ctx):
|
||||||
|
"""When workflow fails and transcript is not yet 'ended', on_failure sets status to 'error'."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
on_workflow_failure,
|
||||||
|
)
|
||||||
|
|
||||||
|
transcript_processing = MagicMock()
|
||||||
|
transcript_processing.status = "processing"
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
|
||||||
|
_noop_db_context,
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=transcript_processing,
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_set_error:
|
||||||
|
await on_workflow_failure(mock_input, mock_ctx)
|
||||||
|
mock_set_error.assert_called_once_with(mock_input.transcript_id)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_failure_task_does_not_overwrite_ended(
|
||||||
|
pipeline_module, mock_input, mock_ctx
|
||||||
|
):
|
||||||
|
"""When workflow fails after finalize (e.g. post_zulip), do not overwrite 'ended' with 'error'.
|
||||||
|
|
||||||
|
cleanup_consent, post_zulip, send_webhook use set_error_status=False; if one fails,
|
||||||
|
on_workflow_failure must not set status to 'error' when transcript is already 'ended'.
|
||||||
|
"""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
on_workflow_failure,
|
||||||
|
)
|
||||||
|
|
||||||
|
transcript_ended = MagicMock()
|
||||||
|
transcript_ended.status = "ended"
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.fresh_db_connection",
|
||||||
|
_noop_db_context,
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"reflector.db.transcripts.transcripts_controller.get_by_id",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=transcript_ended,
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"reflector.hatchet.workflows.daily_multitrack_pipeline.set_workflow_error_status",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_set_error:
|
||||||
|
await on_workflow_failure(mock_input, mock_ctx)
|
||||||
|
mock_set_error.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
# --- Tests for fan-out helper (_successful_run_results) ---
|
||||||
|
|
||||||
|
|
||||||
|
def test_successful_run_results_filters_exceptions():
|
||||||
|
"""_successful_run_results returns only non-exception items from aio_run_many(return_exceptions=True)."""
|
||||||
|
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||||
|
_successful_run_results,
|
||||||
|
)
|
||||||
|
|
||||||
|
results = [
|
||||||
|
{"key": "ok1"},
|
||||||
|
ValueError("child failed"),
|
||||||
|
{"key": "ok2"},
|
||||||
|
RuntimeError("another"),
|
||||||
|
]
|
||||||
|
successful = _successful_run_results(results)
|
||||||
|
assert len(successful) == 2
|
||||||
|
assert successful[0] == {"key": "ok1"}
|
||||||
|
assert successful[1] == {"key": "ok2"}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
"""Tests for LLM structured output with astructured_predict + reflection retry"""
|
"""Tests for LLM structured output with astructured_predict + reflection retry"""
|
||||||
|
|
||||||
from unittest.mock import AsyncMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import BaseModel, Field, ValidationError
|
from pydantic import BaseModel, Field, ValidationError
|
||||||
@@ -252,6 +252,63 @@ class TestNetworkErrorRetries:
|
|||||||
assert mock_settings.llm.astructured_predict.call_count == 3
|
assert mock_settings.llm.astructured_predict.call_count == 3
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetResponseRetries:
|
||||||
|
"""Test that get_response() uses the same retry() wrapper for transient errors."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_response_retries_on_connection_error(self, test_settings):
|
||||||
|
"""Test that get_response retries on ConnectionError and returns on success."""
|
||||||
|
llm = LLM(settings=test_settings, temperature=0.4, max_tokens=100)
|
||||||
|
|
||||||
|
mock_instance = MagicMock()
|
||||||
|
mock_instance.aget_response = AsyncMock(
|
||||||
|
side_effect=[
|
||||||
|
ConnectionError("Connection refused"),
|
||||||
|
" Summary text ",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("reflector.llm.TreeSummarize", return_value=mock_instance):
|
||||||
|
result = await llm.get_response("Prompt", ["text"])
|
||||||
|
|
||||||
|
assert result == "Summary text"
|
||||||
|
assert mock_instance.aget_response.call_count == 2
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_response_exhausts_retries(self, test_settings):
|
||||||
|
"""Test that get_response raises RetryException after retry attempts exceeded."""
|
||||||
|
llm = LLM(settings=test_settings, temperature=0.4, max_tokens=100)
|
||||||
|
|
||||||
|
mock_instance = MagicMock()
|
||||||
|
mock_instance.aget_response = AsyncMock(
|
||||||
|
side_effect=ConnectionError("Connection refused")
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("reflector.llm.TreeSummarize", return_value=mock_instance):
|
||||||
|
with pytest.raises(RetryException, match="Retry attempts exceeded"):
|
||||||
|
await llm.get_response("Prompt", ["text"])
|
||||||
|
|
||||||
|
assert mock_instance.aget_response.call_count == 3
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_response_returns_empty_string_without_retry(self, test_settings):
|
||||||
|
"""Empty or whitespace-only LLM response must return '' and not raise RetryException.
|
||||||
|
|
||||||
|
retry() must return falsy results (e.g. '' from get_response) instead of
|
||||||
|
treating them as 'no result' and retrying until RetryException.
|
||||||
|
"""
|
||||||
|
llm = LLM(settings=test_settings, temperature=0.4, max_tokens=100)
|
||||||
|
|
||||||
|
mock_instance = MagicMock()
|
||||||
|
mock_instance.aget_response = AsyncMock(return_value=" \n ") # strip() -> ""
|
||||||
|
|
||||||
|
with patch("reflector.llm.TreeSummarize", return_value=mock_instance):
|
||||||
|
result = await llm.get_response("Prompt", ["text"])
|
||||||
|
|
||||||
|
assert result == ""
|
||||||
|
assert mock_instance.aget_response.call_count == 1
|
||||||
|
|
||||||
|
|
||||||
class TestTextsInclusion:
|
class TestTextsInclusion:
|
||||||
"""Test that texts parameter is included in the prompt sent to astructured_predict"""
|
"""Test that texts parameter is included in the prompt sent to astructured_predict"""
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ This test verifies the complete file processing pipeline without mocking much,
|
|||||||
ensuring all processors are correctly invoked and the happy path works correctly.
|
ensuring all processors are correctly invoked and the happy path works correctly.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
@@ -651,3 +652,43 @@ async def test_pipeline_file_process_no_audio_file(
|
|||||||
# This should fail when trying to open the file with av
|
# This should fail when trying to open the file with av
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
await pipeline.process(non_existent_path)
|
await pipeline.process(non_existent_path)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_title_does_not_overwrite_user_set_title():
|
||||||
|
"""When transcript already has a title, on_title does not call update."""
|
||||||
|
from reflector.db.transcripts import Transcript, TranscriptFinalTitle
|
||||||
|
from reflector.pipelines.main_file_pipeline import PipelineMainFile
|
||||||
|
|
||||||
|
transcript_id = str(uuid4())
|
||||||
|
transcript_with_title = Transcript(
|
||||||
|
id=transcript_id,
|
||||||
|
name="test",
|
||||||
|
source_kind="file",
|
||||||
|
title="User set title",
|
||||||
|
)
|
||||||
|
|
||||||
|
controller = "reflector.pipelines.main_live_pipeline.transcripts_controller"
|
||||||
|
with patch(f"{controller}.get_by_id", new_callable=AsyncMock) as mock_get:
|
||||||
|
with patch(f"{controller}.update", new_callable=AsyncMock) as mock_update:
|
||||||
|
with patch(
|
||||||
|
f"{controller}.append_event", new_callable=AsyncMock
|
||||||
|
) as mock_append:
|
||||||
|
with patch(f"{controller}.transaction") as mock_txn:
|
||||||
|
mock_get.return_value = transcript_with_title
|
||||||
|
mock_append.return_value = None
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def noop_txn():
|
||||||
|
yield
|
||||||
|
|
||||||
|
mock_txn.return_value = noop_txn()
|
||||||
|
|
||||||
|
pipeline = PipelineMainFile(transcript_id=transcript_id)
|
||||||
|
await pipeline.on_title(
|
||||||
|
TranscriptFinalTitle(title="Generated title")
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_get.assert_called_once()
|
||||||
|
mock_update.assert_not_called()
|
||||||
|
mock_append.assert_called_once()
|
||||||
|
|||||||
450
server/tests/test_processors_cpu.py
Normal file
450
server/tests/test_processors_cpu.py
Normal file
@@ -0,0 +1,450 @@
|
|||||||
|
"""
|
||||||
|
Tests for in-process processor backends (--cpu mode).
|
||||||
|
|
||||||
|
All ML model calls are mocked — no actual model loading needed.
|
||||||
|
Tests verify processor registration, wiring, error handling, and data flow.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from reflector.processors.file_diarization import (
|
||||||
|
FileDiarizationInput,
|
||||||
|
FileDiarizationOutput,
|
||||||
|
)
|
||||||
|
from reflector.processors.types import (
|
||||||
|
AudioDiarizationInput,
|
||||||
|
TitleSummaryWithId,
|
||||||
|
Transcript,
|
||||||
|
Word,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Registration Tests ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_audio_diarization_pyannote_registers():
|
||||||
|
"""Verify AudioDiarizationPyannoteProcessor registers with 'pyannote' backend."""
|
||||||
|
# Importing the module triggers registration
|
||||||
|
import reflector.processors.audio_diarization_pyannote # noqa: F401
|
||||||
|
from reflector.processors.audio_diarization_auto import (
|
||||||
|
AudioDiarizationAutoProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "pyannote" in AudioDiarizationAutoProcessor._registry
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_diarization_pyannote_registers():
|
||||||
|
"""Verify FileDiarizationPyannoteProcessor registers with 'pyannote' backend."""
|
||||||
|
import reflector.processors.file_diarization_pyannote # noqa: F401
|
||||||
|
from reflector.processors.file_diarization_auto import FileDiarizationAutoProcessor
|
||||||
|
|
||||||
|
assert "pyannote" in FileDiarizationAutoProcessor._registry
|
||||||
|
|
||||||
|
|
||||||
|
def test_transcript_translator_marian_registers():
|
||||||
|
"""Verify TranscriptTranslatorMarianProcessor registers with 'marian' backend."""
|
||||||
|
import reflector.processors.transcript_translator_marian # noqa: F401
|
||||||
|
from reflector.processors.transcript_translator_auto import (
|
||||||
|
TranscriptTranslatorAutoProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "marian" in TranscriptTranslatorAutoProcessor._registry
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_transcript_whisper_registers():
|
||||||
|
"""Verify FileTranscriptWhisperProcessor registers with 'whisper' backend."""
|
||||||
|
import reflector.processors.file_transcript_whisper # noqa: F401
|
||||||
|
from reflector.processors.file_transcript_auto import FileTranscriptAutoProcessor
|
||||||
|
|
||||||
|
assert "whisper" in FileTranscriptAutoProcessor._registry
|
||||||
|
|
||||||
|
|
||||||
|
# ── Audio Download Utility Tests ────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_download_audio_to_temp_success():
|
||||||
|
"""Verify download_audio_to_temp downloads to a temp file and returns path."""
|
||||||
|
from reflector.processors._audio_download import download_audio_to_temp
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.headers = {"content-type": "audio/wav"}
|
||||||
|
mock_response.iter_content.return_value = [b"fake audio data"]
|
||||||
|
mock_response.raise_for_status = MagicMock()
|
||||||
|
|
||||||
|
with patch("reflector.processors._audio_download.requests.get") as mock_get:
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
result = await download_audio_to_temp("https://example.com/test.wav")
|
||||||
|
|
||||||
|
assert isinstance(result, Path)
|
||||||
|
assert result.exists()
|
||||||
|
assert result.read_bytes() == b"fake audio data"
|
||||||
|
assert result.suffix == ".wav"
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
os.unlink(result)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_download_audio_to_temp_cleanup_on_error():
|
||||||
|
"""Verify temp file is cleaned up when download fails mid-write."""
|
||||||
|
from reflector.processors._audio_download import download_audio_to_temp
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.headers = {"content-type": "audio/wav"}
|
||||||
|
mock_response.raise_for_status = MagicMock()
|
||||||
|
|
||||||
|
def fail_iter(*args, **kwargs):
|
||||||
|
raise ConnectionError("Download interrupted")
|
||||||
|
|
||||||
|
mock_response.iter_content = fail_iter
|
||||||
|
|
||||||
|
with patch("reflector.processors._audio_download.requests.get") as mock_get:
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
with pytest.raises(ConnectionError, match="Download interrupted"):
|
||||||
|
await download_audio_to_temp("https://example.com/test.wav")
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_extension_from_url():
|
||||||
|
"""Verify extension detection from URL path."""
|
||||||
|
from reflector.processors._audio_download import _detect_extension
|
||||||
|
|
||||||
|
assert _detect_extension("https://example.com/test.wav", "") == ".wav"
|
||||||
|
assert _detect_extension("https://example.com/test.mp3?signed=1", "") == ".mp3"
|
||||||
|
assert _detect_extension("https://example.com/test.webm", "") == ".webm"
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_extension_from_content_type():
|
||||||
|
"""Verify extension detection from content-type header."""
|
||||||
|
from reflector.processors._audio_download import _detect_extension
|
||||||
|
|
||||||
|
assert _detect_extension("https://s3.aws/uuid", "audio/mpeg") == ".mp3"
|
||||||
|
assert _detect_extension("https://s3.aws/uuid", "audio/wav") == ".wav"
|
||||||
|
assert _detect_extension("https://s3.aws/uuid", "audio/webm") == ".webm"
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_extension_fallback():
|
||||||
|
"""Verify fallback extension when neither URL nor content-type is recognized."""
|
||||||
|
from reflector.processors._audio_download import _detect_extension
|
||||||
|
|
||||||
|
assert (
|
||||||
|
_detect_extension("https://s3.aws/uuid", "application/octet-stream") == ".audio"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Audio Diarization Pyannote Processor Tests ──────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_diarization_pyannote_diarize():
|
||||||
|
"""Verify pyannote audio diarization downloads, diarizes, and cleans up."""
|
||||||
|
from reflector.processors.audio_diarization_pyannote import (
|
||||||
|
AudioDiarizationPyannoteProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_diarization_result = {
|
||||||
|
"diarization": [
|
||||||
|
{"start": 0.0, "end": 2.5, "speaker": 0},
|
||||||
|
{"start": 2.5, "end": 5.0, "speaker": 1},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a temp file to simulate download
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||||
|
tmp.write(b"fake audio")
|
||||||
|
tmp.close()
|
||||||
|
tmp_path = Path(tmp.name)
|
||||||
|
|
||||||
|
processor = AudioDiarizationPyannoteProcessor()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.processors.audio_diarization_pyannote.download_audio_to_temp",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=tmp_path,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.processors.audio_diarization_pyannote.diarization_service"
|
||||||
|
) as mock_svc,
|
||||||
|
):
|
||||||
|
mock_svc.diarize_file.return_value = mock_diarization_result
|
||||||
|
|
||||||
|
data = AudioDiarizationInput(
|
||||||
|
audio_url="https://example.com/test.wav",
|
||||||
|
topics=[
|
||||||
|
TitleSummaryWithId(
|
||||||
|
id="topic-1",
|
||||||
|
title="Test Topic",
|
||||||
|
summary="A test topic",
|
||||||
|
timestamp=0.0,
|
||||||
|
duration=5.0,
|
||||||
|
transcript=Transcript(
|
||||||
|
words=[Word(text="hello", start=0.0, end=1.0)]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
result = await processor._diarize(data)
|
||||||
|
|
||||||
|
assert result == mock_diarization_result["diarization"]
|
||||||
|
mock_svc.diarize_file.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
# ── File Diarization Pyannote Processor Tests ───────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_file_diarization_pyannote_diarize():
|
||||||
|
"""Verify pyannote file diarization returns FileDiarizationOutput."""
|
||||||
|
from reflector.processors.file_diarization_pyannote import (
|
||||||
|
FileDiarizationPyannoteProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_diarization_result = {
|
||||||
|
"diarization": [
|
||||||
|
{"start": 0.0, "end": 3.0, "speaker": 0},
|
||||||
|
{"start": 3.0, "end": 6.0, "speaker": 1},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||||
|
tmp.write(b"fake audio")
|
||||||
|
tmp.close()
|
||||||
|
tmp_path = Path(tmp.name)
|
||||||
|
|
||||||
|
processor = FileDiarizationPyannoteProcessor()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.processors.file_diarization_pyannote.download_audio_to_temp",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=tmp_path,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"reflector.processors.file_diarization_pyannote.diarization_service"
|
||||||
|
) as mock_svc,
|
||||||
|
):
|
||||||
|
mock_svc.diarize_file.return_value = mock_diarization_result
|
||||||
|
|
||||||
|
data = FileDiarizationInput(audio_url="https://example.com/test.wav")
|
||||||
|
result = await processor._diarize(data)
|
||||||
|
|
||||||
|
assert isinstance(result, FileDiarizationOutput)
|
||||||
|
assert len(result.diarization) == 2
|
||||||
|
assert result.diarization[0]["start"] == 0.0
|
||||||
|
assert result.diarization[1]["speaker"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ── Transcript Translator Marian Processor Tests ───────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_transcript_translator_marian_translate():
|
||||||
|
"""Verify MarianMT translator calls service and extracts translation."""
|
||||||
|
from reflector.processors.transcript_translator_marian import (
|
||||||
|
TranscriptTranslatorMarianProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_result = {"text": {"en": "Hello world", "fr": "Bonjour le monde"}}
|
||||||
|
|
||||||
|
processor = TranscriptTranslatorMarianProcessor()
|
||||||
|
|
||||||
|
def fake_get_pref(key, default=None):
|
||||||
|
prefs = {"audio:source_language": "en", "audio:target_language": "fr"}
|
||||||
|
return prefs.get(key, default)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(processor, "get_pref", side_effect=fake_get_pref),
|
||||||
|
patch(
|
||||||
|
"reflector.processors.transcript_translator_marian.translator_service"
|
||||||
|
) as mock_svc,
|
||||||
|
):
|
||||||
|
mock_svc.translate.return_value = mock_result
|
||||||
|
|
||||||
|
result = await processor._translate("Hello world")
|
||||||
|
|
||||||
|
assert result == "Bonjour le monde"
|
||||||
|
mock_svc.translate.assert_called_once_with("Hello world", "en", "fr")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_transcript_translator_marian_no_translation():
|
||||||
|
"""Verify translator returns None when target language not in result."""
|
||||||
|
from reflector.processors.transcript_translator_marian import (
|
||||||
|
TranscriptTranslatorMarianProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_result = {"text": {"en": "Hello world"}}
|
||||||
|
|
||||||
|
processor = TranscriptTranslatorMarianProcessor()
|
||||||
|
|
||||||
|
def fake_get_pref(key, default=None):
|
||||||
|
prefs = {"audio:source_language": "en", "audio:target_language": "fr"}
|
||||||
|
return prefs.get(key, default)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(processor, "get_pref", side_effect=fake_get_pref),
|
||||||
|
patch(
|
||||||
|
"reflector.processors.transcript_translator_marian.translator_service"
|
||||||
|
) as mock_svc,
|
||||||
|
):
|
||||||
|
mock_svc.translate.return_value = mock_result
|
||||||
|
|
||||||
|
result = await processor._translate("Hello world")
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
# ── File Transcript Whisper Processor Tests ─────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_file_transcript_whisper_transcript():
|
||||||
|
"""Verify whisper file processor downloads, transcribes, and returns Transcript."""
|
||||||
|
from reflector.processors.file_transcript import FileTranscriptInput
|
||||||
|
from reflector.processors.file_transcript_whisper import (
|
||||||
|
FileTranscriptWhisperProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||||
|
tmp.write(b"fake audio")
|
||||||
|
tmp.close()
|
||||||
|
tmp_path = Path(tmp.name)
|
||||||
|
|
||||||
|
processor = FileTranscriptWhisperProcessor()
|
||||||
|
|
||||||
|
# Mock the blocking transcription method
|
||||||
|
mock_transcript = Transcript(
|
||||||
|
words=[
|
||||||
|
Word(text="Hello", start=0.0, end=0.5),
|
||||||
|
Word(text=" world", start=0.5, end=1.0),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.processors.file_transcript_whisper.download_audio_to_temp",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=tmp_path,
|
||||||
|
),
|
||||||
|
patch.object(
|
||||||
|
processor,
|
||||||
|
"_transcribe_file_blocking",
|
||||||
|
return_value=mock_transcript,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
data = FileTranscriptInput(
|
||||||
|
audio_url="https://example.com/test.wav", language="en"
|
||||||
|
)
|
||||||
|
result = await processor._transcript(data)
|
||||||
|
|
||||||
|
assert isinstance(result, Transcript)
|
||||||
|
assert len(result.words) == 2
|
||||||
|
assert result.words[0].text == "Hello"
|
||||||
|
|
||||||
|
|
||||||
|
# ── VAD Helper Tests ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_enforce_word_timing_constraints():
|
||||||
|
"""Verify word timing enforcement prevents overlapping times."""
|
||||||
|
from reflector.processors.file_transcript_whisper import (
|
||||||
|
_enforce_word_timing_constraints,
|
||||||
|
)
|
||||||
|
|
||||||
|
words = [
|
||||||
|
{"word": "hello", "start": 0.0, "end": 1.5},
|
||||||
|
{"word": "world", "start": 1.0, "end": 2.0}, # overlaps with previous
|
||||||
|
{"word": "test", "start": 2.0, "end": 3.0},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = _enforce_word_timing_constraints(words)
|
||||||
|
|
||||||
|
assert result[0]["end"] == 1.0 # Clamped to next word's start
|
||||||
|
assert result[1]["end"] == 2.0 # Clamped to next word's start
|
||||||
|
assert result[2]["end"] == 3.0 # Last word unchanged
|
||||||
|
|
||||||
|
|
||||||
|
def test_enforce_word_timing_constraints_empty():
|
||||||
|
"""Verify timing enforcement handles empty and single-word lists."""
|
||||||
|
from reflector.processors.file_transcript_whisper import (
|
||||||
|
_enforce_word_timing_constraints,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _enforce_word_timing_constraints([]) == []
|
||||||
|
assert _enforce_word_timing_constraints([{"word": "a", "start": 0, "end": 1}]) == [
|
||||||
|
{"word": "a", "start": 0, "end": 1}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_pad_audio_short():
|
||||||
|
"""Verify short audio gets padded with silence."""
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from reflector.processors.file_transcript_whisper import _pad_audio
|
||||||
|
|
||||||
|
short_audio = np.zeros(100, dtype=np.float32) # Very short
|
||||||
|
result = _pad_audio(short_audio, sample_rate=16000)
|
||||||
|
|
||||||
|
# Should be padded to at least silence_padding duration
|
||||||
|
assert len(result) > len(short_audio)
|
||||||
|
|
||||||
|
|
||||||
|
def test_pad_audio_long():
|
||||||
|
"""Verify long audio is not padded."""
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from reflector.processors.file_transcript_whisper import _pad_audio
|
||||||
|
|
||||||
|
long_audio = np.zeros(32000, dtype=np.float32) # 2 seconds
|
||||||
|
result = _pad_audio(long_audio, sample_rate=16000)
|
||||||
|
|
||||||
|
assert len(result) == len(long_audio)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Translator Service Tests ────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_translator_service_resolve_model():
|
||||||
|
"""Verify model resolution for known and unknown language pairs."""
|
||||||
|
from reflector.processors._marian_translator_service import MarianTranslatorService
|
||||||
|
|
||||||
|
svc = MarianTranslatorService()
|
||||||
|
|
||||||
|
assert svc._resolve_model_name("en", "fr") == "Helsinki-NLP/opus-mt-en-fr"
|
||||||
|
assert svc._resolve_model_name("es", "en") == "Helsinki-NLP/opus-mt-es-en"
|
||||||
|
assert svc._resolve_model_name("en", "de") == "Helsinki-NLP/opus-mt-en-de"
|
||||||
|
# Unknown pair falls back to en->fr
|
||||||
|
assert svc._resolve_model_name("ja", "ko") == "Helsinki-NLP/opus-mt-en-fr"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Diarization Service Tests ───────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_diarization_service_singleton():
|
||||||
|
"""Verify diarization_service is a module-level singleton."""
|
||||||
|
from reflector.processors._pyannote_diarization_service import (
|
||||||
|
PyannoteDiarizationService,
|
||||||
|
diarization_service,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(diarization_service, PyannoteDiarizationService)
|
||||||
|
assert diarization_service._pipeline is None # Not loaded until first use
|
||||||
|
|
||||||
|
|
||||||
|
def test_translator_service_singleton():
|
||||||
|
"""Verify translator_service is a module-level singleton."""
|
||||||
|
from reflector.processors._marian_translator_service import (
|
||||||
|
MarianTranslatorService,
|
||||||
|
translator_service,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(translator_service, MarianTranslatorService)
|
||||||
|
assert translator_service._pipeline is None # Not loaded until first use
|
||||||
@@ -49,6 +49,15 @@ async def test_retry_httpx(httpx_mock):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retry_402_stops_by_default(httpx_mock):
|
||||||
|
"""402 (payment required / no credits) is in default retry_httpx_status_stop — do not retry."""
|
||||||
|
httpx_mock.add_response(status_code=402, json={"error": "insufficient_credits"})
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
with pytest.raises(RetryHTTPException):
|
||||||
|
await retry(client.get)("https://test_url", retry_timeout=5)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_retry_normal():
|
async def test_retry_normal():
|
||||||
left = 3
|
left = 3
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import time
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from conftest import authenticated_client_ctx
|
||||||
from httpx_ws import aconnect_ws
|
from httpx_ws import aconnect_ws
|
||||||
from uvicorn import Config, Server
|
from uvicorn import Config, Server
|
||||||
|
|
||||||
@@ -382,3 +383,607 @@ async def test_audio_mp3_requires_token_for_owned_transcript(
|
|||||||
)
|
)
|
||||||
resp2 = await client.get(f"/transcripts/{t.id}/audio/mp3", params={"token": token})
|
resp2 = await client.get(f"/transcripts/{t.id}/audio/mp3", params={"token": token})
|
||||||
assert resp2.status_code == 200
|
assert resp2.status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Auth guards: anonymous blocked when PUBLIC_MODE=False
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_cannot_create_transcript_when_not_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
resp = await client.post("/transcripts", json={"name": "anon-test"})
|
||||||
|
assert resp.status_code == 401, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_cannot_process_transcript_when_not_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="process-test",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="public",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.post(f"/transcripts/{t.id}/process")
|
||||||
|
assert resp.status_code == 401, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_cannot_upload_when_not_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="upload-test",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="public",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Minimal multipart upload
|
||||||
|
resp = await client.post(
|
||||||
|
f"/transcripts/{t.id}/record/upload",
|
||||||
|
params={"chunk_number": 0, "total_chunks": 1},
|
||||||
|
files={"chunk": ("test.mp3", b"fake-audio", "audio/mpeg")},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 401, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_cannot_webrtc_record_when_not_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="webrtc-test",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="public",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
f"/transcripts/{t.id}/record/webrtc",
|
||||||
|
json={"sdp": "v=0\r\n", "type": "offer"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 401, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_cannot_start_meeting_recording_when_not_public(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
room = await rooms_controller.add(
|
||||||
|
name="recording-auth-test",
|
||||||
|
user_id="owner-rec",
|
||||||
|
zulip_auto_post=False,
|
||||||
|
zulip_stream="",
|
||||||
|
zulip_topic="",
|
||||||
|
is_locked=False,
|
||||||
|
room_mode="normal",
|
||||||
|
recording_type="cloud",
|
||||||
|
recording_trigger="automatic-2nd-participant",
|
||||||
|
is_shared=True,
|
||||||
|
webhook_url="",
|
||||||
|
webhook_secret="",
|
||||||
|
)
|
||||||
|
|
||||||
|
meeting = await meetings_controller.create(
|
||||||
|
id="meeting-rec-test",
|
||||||
|
room_name="recording-auth-test",
|
||||||
|
room_url="room-url",
|
||||||
|
host_room_url="host-url",
|
||||||
|
start_date=Room.model_fields["created_at"].default_factory(),
|
||||||
|
end_date=Room.model_fields["created_at"].default_factory(),
|
||||||
|
room=room,
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
f"/meetings/{meeting.id}/recordings/start",
|
||||||
|
json={"type": "cloud", "instanceId": "00000000-0000-0000-0000-000000000001"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 401, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Public mode: anonymous IS allowed when PUBLIC_MODE=True
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_can_create_transcript_when_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
resp = await client.post("/transcripts", json={"name": "anon-public-test"})
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_can_list_transcripts_when_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
resp = await client.get("/transcripts")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_can_read_public_transcript(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="readable-test",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="public",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_can_upload_when_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="upload-public-test",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="public",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
f"/transcripts/{t.id}/record/upload",
|
||||||
|
params={"chunk_number": 0, "total_chunks": 2},
|
||||||
|
files={"chunk": ("test.mp3", b"fake-audio", "audio/mpeg")},
|
||||||
|
)
|
||||||
|
# Chunk 0 of 2 won't trigger av.open validation, so should succeed with "ok"
|
||||||
|
# The key assertion: auth did NOT block us (no 401)
|
||||||
|
assert resp.status_code != 401, f"Should not get 401 in public mode: {resp.text}"
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_can_start_meeting_recording_when_public(client, monkeypatch):
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
room = await rooms_controller.add(
|
||||||
|
name="recording-public-test",
|
||||||
|
user_id="owner-pub",
|
||||||
|
zulip_auto_post=False,
|
||||||
|
zulip_stream="",
|
||||||
|
zulip_topic="",
|
||||||
|
is_locked=False,
|
||||||
|
room_mode="normal",
|
||||||
|
recording_type="cloud",
|
||||||
|
recording_trigger="automatic-2nd-participant",
|
||||||
|
is_shared=True,
|
||||||
|
webhook_url="",
|
||||||
|
webhook_secret="",
|
||||||
|
)
|
||||||
|
|
||||||
|
meeting = await meetings_controller.create(
|
||||||
|
id="meeting-pub-test",
|
||||||
|
room_name="recording-public-test",
|
||||||
|
room_url="room-url",
|
||||||
|
host_room_url="host-url",
|
||||||
|
start_date=Room.model_fields["created_at"].default_factory(),
|
||||||
|
end_date=Room.model_fields["created_at"].default_factory(),
|
||||||
|
room=room,
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
f"/meetings/{meeting.id}/recordings/start",
|
||||||
|
json={"type": "cloud", "instanceId": "00000000-0000-0000-0000-000000000002"},
|
||||||
|
)
|
||||||
|
# Should not be 401 (may fail for other reasons like no Daily API, but auth passes)
|
||||||
|
assert resp.status_code != 401, f"Should not get 401 in public mode: {resp.text}"
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Authenticated user vs private data (own transcripts)
|
||||||
|
# Authenticated owner should be able to create, read, and process
|
||||||
|
# their own private transcripts even when PUBLIC_MODE=False
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_create_transcript_private_mode(client, monkeypatch):
|
||||||
|
"""Authenticated user can create transcripts even when PUBLIC_MODE=False."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.post("/transcripts", json={"name": "auth-private-create"})
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
assert resp.json()["user_id"] == "randomuserid"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_read_own_private_transcript(client, monkeypatch):
|
||||||
|
"""Authenticated owner can read their own private transcript."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
# Create transcript owned by "randomuserid"
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="auth-private-read",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="randomuserid",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_cannot_read_others_private_transcript(client, monkeypatch):
|
||||||
|
"""Authenticated user cannot read another user's private transcript."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
# Create transcript owned by someone else
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="other-private",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="other-owner",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 403, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_process_own_private_transcript(client, monkeypatch):
|
||||||
|
"""Authenticated owner can trigger processing on their own private transcript."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="auth-private-process",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="randomuserid",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.post(f"/transcripts/{t.id}/process")
|
||||||
|
# Should pass auth (may fail for other reasons like validation, but not 401/403)
|
||||||
|
assert resp.status_code not in (401, 403), resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_upload_to_own_private_transcript(client, monkeypatch):
|
||||||
|
"""Authenticated owner can upload audio to their own private transcript."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="auth-private-upload",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="randomuserid",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.post(
|
||||||
|
f"/transcripts/{t.id}/record/upload",
|
||||||
|
params={"chunk_number": 0, "total_chunks": 2},
|
||||||
|
files={"chunk": ("test.mp3", b"fake-audio", "audio/mpeg")},
|
||||||
|
)
|
||||||
|
# Auth passes, chunk accepted (not final chunk so no av validation)
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_webrtc_own_private_transcript(client, monkeypatch):
|
||||||
|
"""Authenticated owner can start WebRTC recording on their own private transcript."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="auth-private-webrtc",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="randomuserid",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.post(
|
||||||
|
f"/transcripts/{t.id}/record/webrtc",
|
||||||
|
json={"sdp": "v=0\r\n", "type": "offer"},
|
||||||
|
)
|
||||||
|
# Auth passes (may fail for other reasons like RTC setup, but not 401/403)
|
||||||
|
assert resp.status_code not in (401, 403), resp.text
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Authenticated user vs semi-private data (other user's transcripts)
|
||||||
|
# Any authenticated user should be able to READ semi-private transcripts
|
||||||
|
# but NOT write to them (upload, process) since they don't own them
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_read_others_semi_private_transcript(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""Any authenticated user can read a semi-private transcript (link sharing)."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
# Create transcript owned by someone else with semi-private share mode
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="semi-private-readable",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="other-owner",
|
||||||
|
share_mode="semi-private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_cannot_read_semi_private_transcript(client, monkeypatch):
|
||||||
|
"""Anonymous user cannot read a semi-private transcript."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="semi-private-blocked",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="some-owner",
|
||||||
|
share_mode="semi-private",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 403, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_list_own_transcripts_private_mode(client, monkeypatch):
|
||||||
|
"""Authenticated user can list their own transcripts when PUBLIC_MODE=False."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
await transcripts_controller.add(
|
||||||
|
name="my-transcript",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="randomuserid",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.get("/transcripts")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
items = resp.json()["items"]
|
||||||
|
assert len(items) >= 1
|
||||||
|
# All returned transcripts should belong to the user or be in shared rooms
|
||||||
|
for item in items:
|
||||||
|
assert item["user_id"] == "randomuserid" or item.get("room_id") is not None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_cannot_list_others_private_transcripts(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""Authenticated user should NOT see another user's private transcripts in the list."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
await transcripts_controller.add(
|
||||||
|
name="hidden-from-others",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="secret-owner",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.get("/transcripts")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
items = resp.json()["items"]
|
||||||
|
# Should not contain transcripts owned by "secret-owner"
|
||||||
|
for item in items:
|
||||||
|
assert (
|
||||||
|
item.get("user_id") != "secret-owner"
|
||||||
|
), f"Leaked private transcript: {item['id']}"
|
||||||
|
|
||||||
|
|
||||||
|
# ======================================================================
|
||||||
|
# Anonymous-created transcripts (user_id=None)
|
||||||
|
# These transcripts bypass share_mode checks entirely in get_by_id_for_http.
|
||||||
|
# They should always be accessible to everyone regardless of PUBLIC_MODE
|
||||||
|
# or share_mode setting, because there is no owner to restrict access.
|
||||||
|
# ======================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_transcript_accessible_when_public_mode_true(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""Anonymous transcript (user_id=None) is accessible even with default private share_mode
|
||||||
|
when PUBLIC_MODE=True."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="anon-transcript-public-mode",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="private", # share_mode is irrelevant for user_id=None
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_transcript_accessible_when_public_mode_false(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""Anonymous transcript (user_id=None) is accessible by authenticated users
|
||||||
|
even when PUBLIC_MODE=False. The transcript has no owner, so share_mode is bypassed."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="anon-transcript-private-mode",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_transcript_accessible_regardless_of_share_mode(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""Anonymous transcripts (user_id=None) are accessible regardless of share_mode value.
|
||||||
|
Tests all three share modes to confirm the user_id=None bypass works consistently."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
for mode in ("private", "semi-private", "public"):
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name=f"anon-share-{mode}",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode=mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 200, f"Failed for share_mode={mode}: {resp.text}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_transcript_readable_by_different_authenticated_user(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""An authenticated user can read anonymous transcripts (user_id=None) even with
|
||||||
|
private share_mode, because the no-owner bypass applies."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="anon-read-by-auth-user",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
assert resp.json()["user_id"] is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_transcript_in_list_when_public_mode(client, monkeypatch):
|
||||||
|
"""Anonymous transcripts appear in the transcript list when PUBLIC_MODE=True."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="anon-in-list",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.get("/transcripts")
|
||||||
|
assert resp.status_code == 200, resp.text
|
||||||
|
ids = [item["id"] for item in resp.json()["items"]]
|
||||||
|
assert t.id in ids, "Anonymous transcript should appear in the public list"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_anonymous_transcript_audio_accessible(client, monkeypatch, tmpdir):
|
||||||
|
"""Anonymous transcript audio (mp3) is accessible without authentication
|
||||||
|
because user_id=None bypasses share_mode checks."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
monkeypatch.setattr(settings, "DATA_DIR", Path(tmpdir).as_posix())
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="anon-audio-access",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id=None,
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
tr = await transcripts_controller.get_by_id(t.id)
|
||||||
|
await transcripts_controller.update(tr, {"status": "ended"})
|
||||||
|
|
||||||
|
# Copy fixture audio to transcript path
|
||||||
|
audio_path = Path(__file__).parent / "records" / "test_mathieu_hello.mp3"
|
||||||
|
tr.audio_mp3_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.copy(audio_path, tr.audio_mp3_filename)
|
||||||
|
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}/audio/mp3")
|
||||||
|
assert (
|
||||||
|
resp.status_code == 200
|
||||||
|
), f"Anonymous transcript audio should be accessible: {resp.text}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_owned_transcript_not_accessible_by_anon_when_not_public(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""Contrast test: owned transcript with private share_mode is NOT accessible
|
||||||
|
to anonymous users when PUBLIC_MODE=False. This confirms that the user_id=None
|
||||||
|
bypass only applies to anonymous transcripts, not to all transcripts."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
t = await transcripts_controller.add(
|
||||||
|
name="owned-private-contrast",
|
||||||
|
source_kind=SourceKind.LIVE,
|
||||||
|
user_id="some-owner",
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await client.get(f"/transcripts/{t.id}")
|
||||||
|
assert (
|
||||||
|
resp.status_code == 403
|
||||||
|
), f"Owned private transcript should be denied to anonymous: {resp.text}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_authenticated_can_start_meeting_recording_private_mode(
|
||||||
|
client, monkeypatch
|
||||||
|
):
|
||||||
|
"""Authenticated user can start recording in non-public mode."""
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", False)
|
||||||
|
|
||||||
|
room = await rooms_controller.add(
|
||||||
|
name="auth-recording-test",
|
||||||
|
user_id="randomuserid",
|
||||||
|
zulip_auto_post=False,
|
||||||
|
zulip_stream="",
|
||||||
|
zulip_topic="",
|
||||||
|
is_locked=False,
|
||||||
|
room_mode="normal",
|
||||||
|
recording_type="cloud",
|
||||||
|
recording_trigger="automatic-2nd-participant",
|
||||||
|
is_shared=True,
|
||||||
|
webhook_url="",
|
||||||
|
webhook_secret="",
|
||||||
|
)
|
||||||
|
|
||||||
|
meeting = await meetings_controller.create(
|
||||||
|
id="meeting-auth-rec",
|
||||||
|
room_name="auth-recording-test",
|
||||||
|
room_url="room-url",
|
||||||
|
host_room_url="host-url",
|
||||||
|
start_date=Room.model_fields["created_at"].default_factory(),
|
||||||
|
end_date=Room.model_fields["created_at"].default_factory(),
|
||||||
|
room=room,
|
||||||
|
)
|
||||||
|
|
||||||
|
async with authenticated_client_ctx():
|
||||||
|
resp = await client.post(
|
||||||
|
f"/meetings/{meeting.id}/recordings/start",
|
||||||
|
json={
|
||||||
|
"type": "cloud",
|
||||||
|
"instanceId": "00000000-0000-0000-0000-000000000003",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# Auth passes (may fail for Daily API reasons, but not 401)
|
||||||
|
assert resp.status_code != 401, resp.text
|
||||||
|
|||||||
@@ -340,8 +340,13 @@ async def test_transcript_formats_with_overlapping_speakers_multitrack():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_transcript_format_text(client):
|
async def test_api_transcript_format_text(monkeypatch, client):
|
||||||
"""Test GET /transcripts/{id} with transcript_format=text."""
|
"""Test GET /transcripts/{id} with transcript_format=text."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
tid = response.json()["id"]
|
tid = response.json()["id"]
|
||||||
@@ -390,8 +395,13 @@ async def test_api_transcript_format_text(client):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_transcript_format_text_timestamped(client):
|
async def test_api_transcript_format_text_timestamped(monkeypatch, client):
|
||||||
"""Test GET /transcripts/{id} with transcript_format=text-timestamped."""
|
"""Test GET /transcripts/{id} with transcript_format=text-timestamped."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
tid = response.json()["id"]
|
tid = response.json()["id"]
|
||||||
@@ -441,8 +451,13 @@ async def test_api_transcript_format_text_timestamped(client):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_transcript_format_webvtt_named(client):
|
async def test_api_transcript_format_webvtt_named(monkeypatch, client):
|
||||||
"""Test GET /transcripts/{id} with transcript_format=webvtt-named."""
|
"""Test GET /transcripts/{id} with transcript_format=webvtt-named."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
tid = response.json()["id"]
|
tid = response.json()["id"]
|
||||||
@@ -491,8 +506,13 @@ async def test_api_transcript_format_webvtt_named(client):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_transcript_format_json(client):
|
async def test_api_transcript_format_json(monkeypatch, client):
|
||||||
"""Test GET /transcripts/{id} with transcript_format=json."""
|
"""Test GET /transcripts/{id} with transcript_format=json."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
tid = response.json()["id"]
|
tid = response.json()["id"]
|
||||||
@@ -544,8 +564,13 @@ async def test_api_transcript_format_json(client):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_transcript_format_default_is_text(client):
|
async def test_api_transcript_format_default_is_text(monkeypatch, client):
|
||||||
"""Test GET /transcripts/{id} defaults to text format."""
|
"""Test GET /transcripts/{id} defaults to text format."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
response = await client.post("/transcripts", json={"name": "Test transcript"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
tid = response.json()["id"]
|
tid = response.json()["id"]
|
||||||
@@ -654,12 +679,18 @@ async def test_api_topics_endpoint_multitrack_segmentation(client):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_api_topics_endpoint_non_multitrack_segmentation(client):
|
async def test_api_topics_endpoint_non_multitrack_segmentation(monkeypatch, client):
|
||||||
"""Test GET /transcripts/{id}/topics uses default segmentation for non-multitrack.
|
"""Test GET /transcripts/{id}/topics uses default segmentation for non-multitrack.
|
||||||
|
|
||||||
Ensures backward compatibility - transcripts without multitrack recordings
|
Ensures backward compatibility - transcripts without multitrack recordings
|
||||||
should continue using the default speaker-change-based segmentation.
|
should continue using the default speaker-change-based segmentation.
|
||||||
"""
|
"""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
|
|
||||||
from reflector.db.transcripts import (
|
from reflector.db.transcripts import (
|
||||||
TranscriptParticipant,
|
TranscriptParticipant,
|
||||||
TranscriptTopic,
|
TranscriptTopic,
|
||||||
|
|||||||
@@ -5,7 +5,12 @@ from reflector.db.transcripts import transcripts_controller
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcript_create(client):
|
async def test_transcript_create(monkeypatch, client):
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post("/transcripts", json={"name": "test"})
|
response = await client.post("/transcripts", json={"name": "test"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json()["name"] == "test"
|
assert response.json()["name"] == "test"
|
||||||
@@ -110,6 +115,33 @@ async def test_transcript_get_update_title(authenticated_client, client):
|
|||||||
assert response.json()["title"] == "test_title"
|
assert response.json()["title"] == "test_title"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_set_status_emits_status_event_and_updates_transcript(
|
||||||
|
monkeypatch, client
|
||||||
|
):
|
||||||
|
"""set_status adds a STATUS event and updates the transcript status (broadcast for WebSocket)."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
|
response = await client.post("/transcripts", json={"name": "Status test"})
|
||||||
|
assert response.status_code == 200
|
||||||
|
transcript_id = response.json()["id"]
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(transcript_id)
|
||||||
|
assert transcript is not None
|
||||||
|
assert transcript.status == "idle"
|
||||||
|
|
||||||
|
event = await transcripts_controller.set_status(transcript_id, "processing")
|
||||||
|
assert event is not None
|
||||||
|
assert event.event == "STATUS"
|
||||||
|
assert event.data.get("value") == "processing"
|
||||||
|
|
||||||
|
updated = await transcripts_controller.get_by_id(transcript_id)
|
||||||
|
assert updated.status == "processing"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcripts_list_anonymous(client):
|
async def test_transcripts_list_anonymous(client):
|
||||||
# XXX this test is a bit fragile, as it depends on the storage which
|
# XXX this test is a bit fragile, as it depends on the storage which
|
||||||
@@ -233,3 +265,43 @@ async def test_transcript_get_returns_null_room_name_when_no_room(
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json()["room_id"] is None
|
assert response.json()["room_id"] is None
|
||||||
assert response.json()["room_name"] is None
|
assert response.json()["room_name"] is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_transcripts_list_filtered_by_room_id(authenticated_client, client):
|
||||||
|
"""GET /transcripts?room_id=X returns only transcripts for that room."""
|
||||||
|
# Use same user as authenticated_client (conftest uses "randomuserid")
|
||||||
|
user_id = "randomuserid"
|
||||||
|
room = await rooms_controller.add(
|
||||||
|
name="room-for-list-filter",
|
||||||
|
user_id=user_id,
|
||||||
|
zulip_auto_post=False,
|
||||||
|
zulip_stream="",
|
||||||
|
zulip_topic="",
|
||||||
|
is_locked=False,
|
||||||
|
room_mode="normal",
|
||||||
|
recording_type="cloud",
|
||||||
|
recording_trigger="automatic-2nd-participant",
|
||||||
|
is_shared=False,
|
||||||
|
webhook_url="",
|
||||||
|
webhook_secret="",
|
||||||
|
)
|
||||||
|
in_room = await transcripts_controller.add(
|
||||||
|
name="in-room",
|
||||||
|
source_kind="file",
|
||||||
|
room_id=room.id,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
other = await transcripts_controller.add(
|
||||||
|
name="no-room",
|
||||||
|
source_kind="file",
|
||||||
|
room_id=None,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.get("/transcripts", params={"room_id": room.id})
|
||||||
|
assert response.status_code == 200
|
||||||
|
items = response.json()["items"]
|
||||||
|
ids = [t["id"] for t in items]
|
||||||
|
assert in_room.id in ids
|
||||||
|
assert other.id not in ids
|
||||||
|
|||||||
@@ -5,10 +5,13 @@ import pytest
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
async def fake_transcript(tmpdir, client):
|
async def fake_transcript(tmpdir, client, monkeypatch):
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.views.transcripts import transcripts_controller
|
from reflector.views.transcripts import transcripts_controller
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
settings.DATA_DIR = Path(tmpdir)
|
settings.DATA_DIR = Path(tmpdir)
|
||||||
|
|
||||||
# create a transcript
|
# create a transcript
|
||||||
|
|||||||
327
server/tests/test_transcripts_audio_token_auth.py
Normal file
327
server/tests/test_transcripts_audio_token_auth.py
Normal file
@@ -0,0 +1,327 @@
|
|||||||
|
"""Tests for audio mp3 endpoint token query-param authentication.
|
||||||
|
|
||||||
|
Covers both password (HS256) and JWT/Authentik (RS256) auth backends,
|
||||||
|
verifying that private transcripts can be accessed via ?token= query param.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import jwt
|
||||||
|
import pytest
|
||||||
|
from cryptography.hazmat.primitives import serialization
|
||||||
|
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
OWNER_USER_ID = "test-owner-user-id"
|
||||||
|
|
||||||
|
|
||||||
|
def _create_hs256_token(user_id: str, secret: str, expired: bool = False) -> str:
|
||||||
|
"""Create an HS256 JWT like the password auth backend does."""
|
||||||
|
delta = timedelta(minutes=-5) if expired else timedelta(hours=24)
|
||||||
|
payload = {
|
||||||
|
"sub": user_id,
|
||||||
|
"email": "test@example.com",
|
||||||
|
"exp": datetime.now(timezone.utc) + delta,
|
||||||
|
}
|
||||||
|
return jwt.encode(payload, secret, algorithm="HS256")
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_rsa_keypair():
|
||||||
|
"""Generate a fresh RSA keypair for tests."""
|
||||||
|
private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
|
||||||
|
public_pem = private_key.public_key().public_bytes(
|
||||||
|
serialization.Encoding.PEM, serialization.PublicFormat.SubjectPublicKeyInfo
|
||||||
|
)
|
||||||
|
return private_key, public_pem.decode()
|
||||||
|
|
||||||
|
|
||||||
|
def _create_rs256_token(
|
||||||
|
authentik_uid: str,
|
||||||
|
private_key,
|
||||||
|
audience: str,
|
||||||
|
expired: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""Create an RS256 JWT like Authentik would issue."""
|
||||||
|
delta = timedelta(minutes=-5) if expired else timedelta(hours=1)
|
||||||
|
payload = {
|
||||||
|
"sub": authentik_uid,
|
||||||
|
"email": "authentik-user@example.com",
|
||||||
|
"aud": audience,
|
||||||
|
"exp": datetime.now(timezone.utc) + delta,
|
||||||
|
}
|
||||||
|
return jwt.encode(payload, private_key, algorithm="RS256")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fixtures
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def private_transcript(tmpdir):
|
||||||
|
"""Create a private transcript owned by OWNER_USER_ID with an mp3 file.
|
||||||
|
|
||||||
|
Created directly via the controller (not HTTP) so no auth override
|
||||||
|
leaks into the test scope.
|
||||||
|
"""
|
||||||
|
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
settings.DATA_DIR = Path(tmpdir)
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.add(
|
||||||
|
"Private audio test",
|
||||||
|
source_kind=SourceKind.FILE,
|
||||||
|
user_id=OWNER_USER_ID,
|
||||||
|
share_mode="private",
|
||||||
|
)
|
||||||
|
await transcripts_controller.update(transcript, {"status": "ended"})
|
||||||
|
|
||||||
|
# Copy a real mp3 to the expected location
|
||||||
|
audio_filename = transcript.audio_mp3_filename
|
||||||
|
mp3_source = Path(__file__).parent / "records" / "test_mathieu_hello.mp3"
|
||||||
|
audio_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.copy(mp3_source, audio_filename)
|
||||||
|
|
||||||
|
yield transcript
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Core access control tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_private_no_auth_returns_403(private_transcript, client):
|
||||||
|
"""Without auth, accessing a private transcript's audio returns 403."""
|
||||||
|
response = await client.get(f"/transcripts/{private_transcript.id}/audio/mp3")
|
||||||
|
assert response.status_code == 403
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_with_bearer_header(private_transcript, client):
|
||||||
|
"""Owner accessing audio via Authorization header works."""
|
||||||
|
from reflector.app import app
|
||||||
|
from reflector.auth import current_user_optional
|
||||||
|
|
||||||
|
# Temporarily override to simulate the owner being authenticated
|
||||||
|
app.dependency_overrides[current_user_optional] = lambda: {
|
||||||
|
"sub": OWNER_USER_ID,
|
||||||
|
"email": "test@example.com",
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = await client.get(f"/transcripts/{private_transcript.id}/audio/mp3")
|
||||||
|
finally:
|
||||||
|
del app.dependency_overrides[current_user_optional]
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers["content-type"] == "audio/mpeg"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_public_transcript_no_auth_ok(tmpdir, client):
|
||||||
|
"""Public transcripts are accessible without any auth."""
|
||||||
|
from reflector.db.transcripts import SourceKind, transcripts_controller
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
settings.DATA_DIR = Path(tmpdir)
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.add(
|
||||||
|
"Public audio test",
|
||||||
|
source_kind=SourceKind.FILE,
|
||||||
|
user_id=OWNER_USER_ID,
|
||||||
|
share_mode="public",
|
||||||
|
)
|
||||||
|
await transcripts_controller.update(transcript, {"status": "ended"})
|
||||||
|
|
||||||
|
audio_filename = transcript.audio_mp3_filename
|
||||||
|
mp3_source = Path(__file__).parent / "records" / "test_mathieu_hello.mp3"
|
||||||
|
audio_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.copy(mp3_source, audio_filename)
|
||||||
|
|
||||||
|
response = await client.get(f"/transcripts/{transcript.id}/audio/mp3")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers["content-type"] == "audio/mpeg"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Password auth backend tests (?token= with HS256)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_password_token_query_param(private_transcript, client):
|
||||||
|
"""Password backend: valid HS256 ?token= grants access to private audio."""
|
||||||
|
from reflector.auth.auth_password import UserInfo
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
token = _create_hs256_token(OWNER_USER_ID, settings.SECRET_KEY)
|
||||||
|
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
mock_verify.return_value = UserInfo(sub=OWNER_USER_ID, email="test@example.com")
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3?token={token}"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers["content-type"] == "audio/mpeg"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_password_expired_token_returns_401(private_transcript, client):
|
||||||
|
"""Password backend: expired HS256 ?token= returns 401."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
expired_token = _create_hs256_token(
|
||||||
|
OWNER_USER_ID, settings.SECRET_KEY, expired=True
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
mock_verify.side_effect = jwt.ExpiredSignatureError("token expired")
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3" f"?token={expired_token}"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 401
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_password_wrong_user_returns_403(private_transcript, client):
|
||||||
|
"""Password backend: valid token for a different user returns 403."""
|
||||||
|
from reflector.auth.auth_password import UserInfo
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
token = _create_hs256_token("other-user-id", settings.SECRET_KEY)
|
||||||
|
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
mock_verify.return_value = UserInfo(
|
||||||
|
sub="other-user-id", email="other@example.com"
|
||||||
|
)
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3?token={token}"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 403
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_invalid_token_returns_401(private_transcript, client):
|
||||||
|
"""Garbage token string returns 401."""
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
mock_verify.return_value = None
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3" "?token=not-a-real-token"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 401
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# JWT/Authentik auth backend tests (?token= with RS256)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_authentik_token_query_param(private_transcript, client):
|
||||||
|
"""Authentik backend: valid RS256 ?token= grants access to private audio."""
|
||||||
|
from reflector.auth.auth_password import UserInfo
|
||||||
|
|
||||||
|
private_key, _ = _generate_rsa_keypair()
|
||||||
|
token = _create_rs256_token("authentik-abc123", private_key, "test-audience")
|
||||||
|
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
# Authentik flow maps authentik_uid -> internal user id
|
||||||
|
mock_verify.return_value = UserInfo(
|
||||||
|
sub=OWNER_USER_ID, email="authentik-user@example.com"
|
||||||
|
)
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3?token={token}"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers["content-type"] == "audio/mpeg"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_authentik_expired_token_returns_401(
|
||||||
|
private_transcript, client
|
||||||
|
):
|
||||||
|
"""Authentik backend: expired RS256 ?token= returns 401."""
|
||||||
|
private_key, _ = _generate_rsa_keypair()
|
||||||
|
expired_token = _create_rs256_token(
|
||||||
|
"authentik-abc123", private_key, "test-audience", expired=True
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
mock_verify.side_effect = jwt.ExpiredSignatureError("token expired")
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3" f"?token={expired_token}"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 401
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_mp3_authentik_wrong_user_returns_403(private_transcript, client):
|
||||||
|
"""Authentik backend: valid RS256 token for different user returns 403."""
|
||||||
|
from reflector.auth.auth_password import UserInfo
|
||||||
|
|
||||||
|
private_key, _ = _generate_rsa_keypair()
|
||||||
|
token = _create_rs256_token("authentik-other", private_key, "test-audience")
|
||||||
|
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
mock_verify.return_value = UserInfo(
|
||||||
|
sub="different-user-id", email="other@example.com"
|
||||||
|
)
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3?token={token}"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 403
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _generate_local_audio_link produces HS256 tokens — must be verifiable
|
||||||
|
# by any auth backend
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_local_audio_link_token_works_with_authentik_backend(
|
||||||
|
private_transcript, client
|
||||||
|
):
|
||||||
|
"""_generate_local_audio_link creates an HS256 token via create_access_token.
|
||||||
|
|
||||||
|
When the Authentik (RS256) auth backend is active, verify_raw_token uses
|
||||||
|
JWTAuth which expects RS256 + public key. The HS256 token created by
|
||||||
|
_generate_local_audio_link will fail verification, returning 401.
|
||||||
|
|
||||||
|
This test documents the bug: the internal audio URL generated for the
|
||||||
|
diarization pipeline is unusable under the JWT auth backend.
|
||||||
|
"""
|
||||||
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
|
# Generate the internal audio link (uses create_access_token → HS256)
|
||||||
|
url = private_transcript._generate_local_audio_link()
|
||||||
|
parsed = urlparse(url)
|
||||||
|
token = parse_qs(parsed.query)["token"][0]
|
||||||
|
|
||||||
|
# Simulate what happens when the JWT/Authentik backend tries to verify
|
||||||
|
# this HS256 token: JWTAuth.verify_token expects RS256, so it raises.
|
||||||
|
with patch("reflector.auth.verify_raw_token") as mock_verify:
|
||||||
|
mock_verify.side_effect = jwt.exceptions.InvalidAlgorithmError(
|
||||||
|
"the specified alg value is not allowed"
|
||||||
|
)
|
||||||
|
response = await client.get(
|
||||||
|
f"/transcripts/{private_transcript.id}/audio/mp3?token={token}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# BUG: this should be 200 (the token was created by our own server),
|
||||||
|
# but the Authentik backend rejects it because it's HS256, not RS256.
|
||||||
|
assert response.status_code == 200
|
||||||
@@ -5,6 +5,8 @@ from unittest.mock import AsyncMock, patch
|
|||||||
import pytest
|
import pytest
|
||||||
from httpx import ASGITransport, AsyncClient
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
async def app_lifespan():
|
async def app_lifespan():
|
||||||
@@ -36,7 +38,11 @@ async def test_transcript_process(
|
|||||||
dummy_file_diarization,
|
dummy_file_diarization,
|
||||||
dummy_storage,
|
dummy_storage,
|
||||||
client,
|
client,
|
||||||
|
monkeypatch,
|
||||||
):
|
):
|
||||||
|
# public mode: this test uses an anonymous client; allow anonymous transcript creation
|
||||||
|
monkeypatch.setattr(settings, "PUBLIC_MODE", True)
|
||||||
|
|
||||||
# create a transcript
|
# create a transcript
|
||||||
response = await client.post("/transcripts", json={"name": "test"})
|
response = await client.post("/transcripts", json={"name": "test"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@@ -106,12 +112,17 @@ async def test_transcript_process(
|
|||||||
|
|
||||||
@pytest.mark.usefixtures("setup_database")
|
@pytest.mark.usefixtures("setup_database")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_whereby_recording_uses_file_pipeline(client):
|
async def test_whereby_recording_uses_file_pipeline(monkeypatch, client):
|
||||||
"""Test that Whereby recordings (bucket_name but no track_keys) use file pipeline"""
|
"""Test that Whereby recordings (bucket_name but no track_keys) use file pipeline"""
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from reflector.db.recordings import Recording, recordings_controller
|
from reflector.db.recordings import Recording, recordings_controller
|
||||||
from reflector.db.transcripts import transcripts_controller
|
from reflector.db.transcripts import transcripts_controller
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
|
|
||||||
# Create transcript with Whereby recording (has bucket_name, no track_keys)
|
# Create transcript with Whereby recording (has bucket_name, no track_keys)
|
||||||
transcript = await transcripts_controller.add(
|
transcript = await transcripts_controller.add(
|
||||||
@@ -157,13 +168,18 @@ async def test_whereby_recording_uses_file_pipeline(client):
|
|||||||
|
|
||||||
@pytest.mark.usefixtures("setup_database")
|
@pytest.mark.usefixtures("setup_database")
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
async def test_dailyco_recording_uses_multitrack_pipeline(monkeypatch, client):
|
||||||
"""Test that Daily.co recordings (bucket_name + track_keys) use multitrack pipeline"""
|
"""Test that Daily.co recordings (bucket_name + track_keys) use multitrack pipeline"""
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
from reflector.db.recordings import Recording, recordings_controller
|
from reflector.db.recordings import Recording, recordings_controller
|
||||||
from reflector.db.rooms import rooms_controller
|
from reflector.db.rooms import rooms_controller
|
||||||
from reflector.db.transcripts import transcripts_controller
|
from reflector.db.transcripts import transcripts_controller
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
|
|
||||||
room = await rooms_controller.add(
|
room = await rooms_controller.add(
|
||||||
name="test-room",
|
name="test-room",
|
||||||
@@ -231,3 +247,86 @@ async def test_dailyco_recording_uses_multitrack_pipeline(client):
|
|||||||
{"s3_key": k} for k in track_keys
|
{"s3_key": k} for k in track_keys
|
||||||
]
|
]
|
||||||
mock_file_pipeline.delay.assert_not_called()
|
mock_file_pipeline.delay.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures("setup_database")
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_reprocess_error_transcript_passes_force(monkeypatch, client):
|
||||||
|
"""When transcript status is 'error', reprocess passes force=True to start fresh workflow."""
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from reflector.db.recordings import Recording, recordings_controller
|
||||||
|
from reflector.db.rooms import rooms_controller
|
||||||
|
from reflector.db.transcripts import transcripts_controller
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
|
|
||||||
|
room = await rooms_controller.add(
|
||||||
|
name="test-room",
|
||||||
|
user_id="test-user",
|
||||||
|
zulip_auto_post=False,
|
||||||
|
zulip_stream="",
|
||||||
|
zulip_topic="",
|
||||||
|
is_locked=False,
|
||||||
|
room_mode="normal",
|
||||||
|
recording_type="cloud",
|
||||||
|
recording_trigger="automatic-2nd-participant",
|
||||||
|
is_shared=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.add(
|
||||||
|
"",
|
||||||
|
source_kind="room",
|
||||||
|
source_language="en",
|
||||||
|
target_language="en",
|
||||||
|
user_id="test-user",
|
||||||
|
share_mode="public",
|
||||||
|
room_id=room.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
track_keys = ["recordings/test-room/track1.webm"]
|
||||||
|
recording = await recordings_controller.create(
|
||||||
|
Recording(
|
||||||
|
bucket_name="daily-bucket",
|
||||||
|
object_key="recordings/test-room",
|
||||||
|
meeting_id="test-meeting",
|
||||||
|
track_keys=track_keys,
|
||||||
|
recorded_at=datetime.now(timezone.utc),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"recording_id": recording.id,
|
||||||
|
"status": "error",
|
||||||
|
"workflow_run_id": "old-failed-run",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"reflector.services.transcript_process.task_is_scheduled_or_active"
|
||||||
|
) as mock_celery,
|
||||||
|
patch(
|
||||||
|
"reflector.services.transcript_process.HatchetClientManager"
|
||||||
|
) as mock_hatchet,
|
||||||
|
patch(
|
||||||
|
"reflector.views.transcripts_process.dispatch_transcript_processing",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_dispatch,
|
||||||
|
):
|
||||||
|
mock_celery.return_value = False
|
||||||
|
from hatchet_sdk.clients.rest.models import V1TaskStatus
|
||||||
|
|
||||||
|
mock_hatchet.get_workflow_run_status = AsyncMock(
|
||||||
|
return_value=V1TaskStatus.FAILED
|
||||||
|
)
|
||||||
|
response = await client.post(f"/transcripts/{transcript.id}/process")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
mock_dispatch.assert_called_once()
|
||||||
|
assert mock_dispatch.call_args.kwargs["force"] is True
|
||||||
|
|||||||
@@ -133,10 +133,17 @@ async def test_transcript_rtc_and_websocket(
|
|||||||
fake_mp3_upload,
|
fake_mp3_upload,
|
||||||
appserver,
|
appserver,
|
||||||
client,
|
client,
|
||||||
|
monkeypatch,
|
||||||
):
|
):
|
||||||
# goal: start the server, exchange RTC, receive websocket events
|
# goal: start the server, exchange RTC, receive websocket events
|
||||||
# because of that, we need to start the server in a thread
|
# because of that, we need to start the server in a thread
|
||||||
# to be able to connect with aiortc
|
# to be able to connect with aiortc
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
|
|
||||||
server, host, port = appserver
|
server, host, port = appserver
|
||||||
|
|
||||||
# create a transcript
|
# create a transcript
|
||||||
@@ -298,11 +305,18 @@ async def test_transcript_rtc_and_websocket_and_fr(
|
|||||||
fake_mp3_upload,
|
fake_mp3_upload,
|
||||||
appserver,
|
appserver,
|
||||||
client,
|
client,
|
||||||
|
monkeypatch,
|
||||||
):
|
):
|
||||||
# goal: start the server, exchange RTC, receive websocket events
|
# goal: start the server, exchange RTC, receive websocket events
|
||||||
# because of that, we need to start the server in a thread
|
# because of that, we need to start the server in a thread
|
||||||
# to be able to connect with aiortc
|
# to be able to connect with aiortc
|
||||||
# with target french language
|
# with target french language
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
|
|
||||||
server, host, port = appserver
|
server, host, port = appserver
|
||||||
|
|
||||||
# create a transcript
|
# create a transcript
|
||||||
|
|||||||
@@ -1,8 +1,13 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcript_create_default_translation(client):
|
async def test_transcript_create_default_translation(monkeypatch, client):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post("/transcripts", json={"name": "test en"})
|
response = await client.post("/transcripts", json={"name": "test en"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json()["name"] == "test en"
|
assert response.json()["name"] == "test en"
|
||||||
@@ -18,7 +23,10 @@ async def test_transcript_create_default_translation(client):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcript_create_en_fr_translation(client):
|
async def test_transcript_create_en_fr_translation(monkeypatch, client):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
"/transcripts", json={"name": "test en/fr", "target_language": "fr"}
|
"/transcripts", json={"name": "test en/fr", "target_language": "fr"}
|
||||||
)
|
)
|
||||||
@@ -36,7 +44,10 @@ async def test_transcript_create_en_fr_translation(client):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_transcript_create_fr_en_translation(client):
|
async def test_transcript_create_fr_en_translation(monkeypatch, client):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
"/transcripts", json={"name": "test fr/en", "source_language": "fr"}
|
"/transcripts", json={"name": "test fr/en", "source_language": "fr"}
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -16,7 +16,13 @@ async def test_transcript_upload_file(
|
|||||||
dummy_file_diarization,
|
dummy_file_diarization,
|
||||||
dummy_storage,
|
dummy_storage,
|
||||||
client,
|
client,
|
||||||
|
monkeypatch,
|
||||||
):
|
):
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
settings, "PUBLIC_MODE", True
|
||||||
|
) # public mode: allow anonymous transcript creation for this test
|
||||||
# create a transcript
|
# create a transcript
|
||||||
response = await client.post("/transcripts", json={"name": "test"})
|
response = await client.post("/transcripts", json={"name": "test"})
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
|||||||
@@ -141,33 +141,19 @@ async def test_user_ws_accepts_valid_token_and_receives_events(appserver_ws_user
|
|||||||
await asyncio.sleep(0.2)
|
await asyncio.sleep(0.2)
|
||||||
|
|
||||||
# Emit an event to the user's room via a standard HTTP action
|
# Emit an event to the user's room via a standard HTTP action
|
||||||
|
# Use a real HTTP request to the server with the JWT token so that
|
||||||
|
# current_user_optional_if_public_mode is exercised without dependency overrides
|
||||||
from httpx import AsyncClient
|
from httpx import AsyncClient
|
||||||
|
|
||||||
from reflector.app import app
|
async with AsyncClient(base_url=f"http://{host}:{port}/v1") as ac:
|
||||||
from reflector.auth import current_user, current_user_optional
|
resp = await ac.post(
|
||||||
|
"/transcripts",
|
||||||
# Override auth dependencies so HTTP request is performed as the same user
|
json={"name": "WS Test"},
|
||||||
# Use the internal user.id (not the Authentik UID)
|
headers={"Authorization": f"Bearer {token}"},
|
||||||
app.dependency_overrides[current_user] = lambda: {
|
)
|
||||||
"sub": user.id,
|
|
||||||
"email": "user-abc@example.com",
|
|
||||||
}
|
|
||||||
app.dependency_overrides[current_user_optional] = lambda: {
|
|
||||||
"sub": user.id,
|
|
||||||
"email": "user-abc@example.com",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Use in-memory client (global singleton makes it share ws_manager)
|
|
||||||
async with AsyncClient(app=app, base_url=f"http://{host}:{port}/v1") as ac:
|
|
||||||
# Create a transcript as this user so that the server publishes TRANSCRIPT_CREATED to user room
|
|
||||||
resp = await ac.post("/transcripts", json={"name": "WS Test"})
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
|
|
||||||
# Receive the published event
|
# Receive the published event
|
||||||
msg = await ws.receive_json()
|
msg = await ws.receive_json()
|
||||||
assert msg["event"] == "TRANSCRIPT_CREATED"
|
assert msg["event"] == "TRANSCRIPT_CREATED"
|
||||||
assert "id" in msg["data"]
|
assert "id" in msg["data"]
|
||||||
|
|
||||||
# Clean overrides
|
|
||||||
del app.dependency_overrides[current_user]
|
|
||||||
del app.dependency_overrides[current_user_optional]
|
|
||||||
|
|||||||
857
server/uv.lock
generated
857
server/uv.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,26 @@
|
|||||||
|
/**
|
||||||
|
* Reconnection policy for WebSocket.
|
||||||
|
* Ensures exponential backoff is applied and capped at 30s.
|
||||||
|
*/
|
||||||
|
import { getReconnectDelayMs, MAX_RETRIES } from "../webSocketReconnect";
|
||||||
|
|
||||||
|
describe("webSocketReconnect", () => {
|
||||||
|
describe("getReconnectDelayMs", () => {
|
||||||
|
it("returns exponential backoff: 1s, 2s, 4s, 8s, 16s, then cap 30s", () => {
|
||||||
|
expect(getReconnectDelayMs(0)).toBe(1000);
|
||||||
|
expect(getReconnectDelayMs(1)).toBe(2000);
|
||||||
|
expect(getReconnectDelayMs(2)).toBe(4000);
|
||||||
|
expect(getReconnectDelayMs(3)).toBe(8000);
|
||||||
|
expect(getReconnectDelayMs(4)).toBe(16000);
|
||||||
|
expect(getReconnectDelayMs(5)).toBe(30000); // 32s capped to 30s
|
||||||
|
expect(getReconnectDelayMs(6)).toBe(30000);
|
||||||
|
expect(getReconnectDelayMs(9)).toBe(30000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("never exceeds 30s for any retry index", () => {
|
||||||
|
for (let i = 0; i <= MAX_RETRIES; i++) {
|
||||||
|
expect(getReconnectDelayMs(i)).toBeLessThanOrEqual(30000);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -78,7 +78,9 @@ const useMp3 = (transcriptId: string, waiting?: boolean): Mp3Response => {
|
|||||||
|
|
||||||
// Audio is not deleted, proceed to load it
|
// Audio is not deleted, proceed to load it
|
||||||
audioElement = document.createElement("audio");
|
audioElement = document.createElement("audio");
|
||||||
const audioUrl = `${API_URL}/v1/transcripts/${transcriptId}/audio/mp3`;
|
const audioUrl = accessTokenInfo
|
||||||
|
? `${API_URL}/v1/transcripts/${transcriptId}/audio/mp3?token=${encodeURIComponent(accessTokenInfo)}`
|
||||||
|
: `${API_URL}/v1/transcripts/${transcriptId}/audio/mp3`;
|
||||||
audioElement.src = audioUrl;
|
audioElement.src = audioUrl;
|
||||||
audioElement.crossOrigin = "anonymous";
|
audioElement.crossOrigin = "anonymous";
|
||||||
audioElement.preload = "auto";
|
audioElement.preload = "auto";
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import {
|
|||||||
} from "../../lib/apiHooks";
|
} from "../../lib/apiHooks";
|
||||||
import { useAuth } from "../../lib/AuthProvider";
|
import { useAuth } from "../../lib/AuthProvider";
|
||||||
import { parseNonEmptyString } from "../../lib/utils";
|
import { parseNonEmptyString } from "../../lib/utils";
|
||||||
|
import { getReconnectDelayMs, MAX_RETRIES } from "./webSocketReconnect";
|
||||||
|
|
||||||
type TranscriptWsEvent =
|
type TranscriptWsEvent =
|
||||||
operations["v1_transcript_get_websocket_events"]["responses"][200]["content"]["application/json"];
|
operations["v1_transcript_get_websocket_events"]["responses"][200]["content"]["application/json"];
|
||||||
@@ -338,7 +339,6 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
if (!transcriptId) return;
|
if (!transcriptId) return;
|
||||||
const tsId = parseNonEmptyString(transcriptId);
|
const tsId = parseNonEmptyString(transcriptId);
|
||||||
|
|
||||||
const MAX_RETRIES = 10;
|
|
||||||
const url = `${WEBSOCKET_URL}/v1/transcripts/${transcriptId}/events`;
|
const url = `${WEBSOCKET_URL}/v1/transcripts/${transcriptId}/events`;
|
||||||
let ws: WebSocket | null = null;
|
let ws: WebSocket | null = null;
|
||||||
let retryCount = 0;
|
let retryCount = 0;
|
||||||
@@ -472,7 +472,7 @@ export const useWebSockets = (transcriptId: string | null): UseWebSockets => {
|
|||||||
if (normalCodes.includes(event.code)) return;
|
if (normalCodes.includes(event.code)) return;
|
||||||
|
|
||||||
if (retryCount < MAX_RETRIES) {
|
if (retryCount < MAX_RETRIES) {
|
||||||
const delay = Math.min(1000 * Math.pow(2, retryCount), 30000);
|
const delay = getReconnectDelayMs(retryCount);
|
||||||
console.log(
|
console.log(
|
||||||
`WebSocket reconnecting in ${delay}ms (attempt ${retryCount + 1}/${MAX_RETRIES})`,
|
`WebSocket reconnecting in ${delay}ms (attempt ${retryCount + 1}/${MAX_RETRIES})`,
|
||||||
);
|
);
|
||||||
|
|||||||
10
www/app/(app)/transcripts/webSocketReconnect.ts
Normal file
10
www/app/(app)/transcripts/webSocketReconnect.ts
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
/** Reconnection policy for WebSocket: exponential backoff, capped at 30s. */
|
||||||
|
export const MAX_RETRIES = 10;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delay in ms before reconnecting. retryIndex is 0-based (0 = first retry).
|
||||||
|
* Returns 1000, 2000, 4000, ... up to 30000 max.
|
||||||
|
*/
|
||||||
|
export function getReconnectDelayMs(retryIndex: number): number {
|
||||||
|
return Math.min(1000 * Math.pow(2, retryIndex), 30000);
|
||||||
|
}
|
||||||
@@ -10,7 +10,8 @@
|
|||||||
"lint": "next lint",
|
"lint": "next lint",
|
||||||
"format": "prettier --write .",
|
"format": "prettier --write .",
|
||||||
"openapi": "openapi-typescript http://127.0.0.1:1250/openapi.json -o ./app/reflector-api.d.ts",
|
"openapi": "openapi-typescript http://127.0.0.1:1250/openapi.json -o ./app/reflector-api.d.ts",
|
||||||
"test": "jest"
|
"test": "jest",
|
||||||
|
"typecheck": "tsc --noEmit"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@chakra-ui/react": "^3.33.0",
|
"@chakra-ui/react": "^3.33.0",
|
||||||
@@ -72,7 +73,9 @@
|
|||||||
"overrides": {
|
"overrides": {
|
||||||
"minimatch@>=5.0.0 <5.1.8": "5.1.8",
|
"minimatch@>=5.0.0 <5.1.8": "5.1.8",
|
||||||
"js-yaml@<4.1.1": "4.1.1",
|
"js-yaml@<4.1.1": "4.1.1",
|
||||||
"webpack": "5.105.3"
|
"webpack": "5.105.3",
|
||||||
|
"serialize-javascript": "7.0.4",
|
||||||
|
"immutable": "5.1.5"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
76
www/pnpm-lock.yaml
generated
76
www/pnpm-lock.yaml
generated
@@ -8,6 +8,8 @@ overrides:
|
|||||||
minimatch@>=5.0.0 <5.1.8: 5.1.8
|
minimatch@>=5.0.0 <5.1.8: 5.1.8
|
||||||
js-yaml@<4.1.1: 4.1.1
|
js-yaml@<4.1.1: 4.1.1
|
||||||
webpack: 5.105.3
|
webpack: 5.105.3
|
||||||
|
serialize-javascript: 7.0.4
|
||||||
|
immutable: 5.1.5
|
||||||
|
|
||||||
importers:
|
importers:
|
||||||
.:
|
.:
|
||||||
@@ -760,6 +762,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-libvips-linux-arm@1.2.4":
|
"@img/sharp-libvips-linux-arm@1.2.4":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -768,6 +771,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-libvips-linux-ppc64@1.2.4":
|
"@img/sharp-libvips-linux-ppc64@1.2.4":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -776,6 +780,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [ppc64]
|
cpu: [ppc64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-libvips-linux-riscv64@1.2.4":
|
"@img/sharp-libvips-linux-riscv64@1.2.4":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -784,6 +789,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [riscv64]
|
cpu: [riscv64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-libvips-linux-s390x@1.2.4":
|
"@img/sharp-libvips-linux-s390x@1.2.4":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -792,6 +798,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [s390x]
|
cpu: [s390x]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-libvips-linux-x64@1.2.4":
|
"@img/sharp-libvips-linux-x64@1.2.4":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -800,6 +807,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-libvips-linuxmusl-arm64@1.2.4":
|
"@img/sharp-libvips-linuxmusl-arm64@1.2.4":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -808,6 +816,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@img/sharp-libvips-linuxmusl-x64@1.2.4":
|
"@img/sharp-libvips-linuxmusl-x64@1.2.4":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -816,6 +825,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@img/sharp-linux-arm64@0.34.5":
|
"@img/sharp-linux-arm64@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -825,6 +835,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-linux-arm@0.34.5":
|
"@img/sharp-linux-arm@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -834,6 +845,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-linux-ppc64@0.34.5":
|
"@img/sharp-linux-ppc64@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -843,6 +855,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [ppc64]
|
cpu: [ppc64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-linux-riscv64@0.34.5":
|
"@img/sharp-linux-riscv64@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -852,6 +865,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [riscv64]
|
cpu: [riscv64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-linux-s390x@0.34.5":
|
"@img/sharp-linux-s390x@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -861,6 +875,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [s390x]
|
cpu: [s390x]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-linux-x64@0.34.5":
|
"@img/sharp-linux-x64@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -870,6 +885,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@img/sharp-linuxmusl-arm64@0.34.5":
|
"@img/sharp-linuxmusl-arm64@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -879,6 +895,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@img/sharp-linuxmusl-x64@0.34.5":
|
"@img/sharp-linuxmusl-x64@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -888,6 +905,7 @@ packages:
|
|||||||
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
engines: { node: ^18.17.0 || ^20.3.0 || >=21.0.0 }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@img/sharp-wasm32@0.34.5":
|
"@img/sharp-wasm32@0.34.5":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1194,6 +1212,7 @@ packages:
|
|||||||
engines: { node: ">= 10" }
|
engines: { node: ">= 10" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@next/swc-linux-arm64-musl@16.1.6":
|
"@next/swc-linux-arm64-musl@16.1.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1203,6 +1222,7 @@ packages:
|
|||||||
engines: { node: ">= 10" }
|
engines: { node: ">= 10" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@next/swc-linux-x64-gnu@16.1.6":
|
"@next/swc-linux-x64-gnu@16.1.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1212,6 +1232,7 @@ packages:
|
|||||||
engines: { node: ">= 10" }
|
engines: { node: ">= 10" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@next/swc-linux-x64-musl@16.1.6":
|
"@next/swc-linux-x64-musl@16.1.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1221,6 +1242,7 @@ packages:
|
|||||||
engines: { node: ">= 10" }
|
engines: { node: ">= 10" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@next/swc-win32-arm64-msvc@16.1.6":
|
"@next/swc-win32-arm64-msvc@16.1.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1645,6 +1667,7 @@ packages:
|
|||||||
engines: { node: ">= 10.0.0" }
|
engines: { node: ">= 10.0.0" }
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@parcel/watcher-linux-arm-musl@2.5.6":
|
"@parcel/watcher-linux-arm-musl@2.5.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1654,6 +1677,7 @@ packages:
|
|||||||
engines: { node: ">= 10.0.0" }
|
engines: { node: ">= 10.0.0" }
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@parcel/watcher-linux-arm64-glibc@2.5.6":
|
"@parcel/watcher-linux-arm64-glibc@2.5.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1663,6 +1687,7 @@ packages:
|
|||||||
engines: { node: ">= 10.0.0" }
|
engines: { node: ">= 10.0.0" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@parcel/watcher-linux-arm64-musl@2.5.6":
|
"@parcel/watcher-linux-arm64-musl@2.5.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1672,6 +1697,7 @@ packages:
|
|||||||
engines: { node: ">= 10.0.0" }
|
engines: { node: ">= 10.0.0" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@parcel/watcher-linux-x64-glibc@2.5.6":
|
"@parcel/watcher-linux-x64-glibc@2.5.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1681,6 +1707,7 @@ packages:
|
|||||||
engines: { node: ">= 10.0.0" }
|
engines: { node: ">= 10.0.0" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@parcel/watcher-linux-x64-musl@2.5.6":
|
"@parcel/watcher-linux-x64-musl@2.5.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -1690,6 +1717,7 @@ packages:
|
|||||||
engines: { node: ">= 10.0.0" }
|
engines: { node: ">= 10.0.0" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@parcel/watcher-win32-arm64@2.5.6":
|
"@parcel/watcher-win32-arm64@2.5.6":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2143,6 +2171,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@rollup/rollup-linux-arm-musleabihf@4.59.0":
|
"@rollup/rollup-linux-arm-musleabihf@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2151,6 +2180,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm]
|
cpu: [arm]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@rollup/rollup-linux-arm64-gnu@4.59.0":
|
"@rollup/rollup-linux-arm64-gnu@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2159,6 +2189,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@rollup/rollup-linux-arm64-musl@4.59.0":
|
"@rollup/rollup-linux-arm64-musl@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2167,6 +2198,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@rollup/rollup-linux-loong64-gnu@4.59.0":
|
"@rollup/rollup-linux-loong64-gnu@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2175,6 +2207,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [loong64]
|
cpu: [loong64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@rollup/rollup-linux-loong64-musl@4.59.0":
|
"@rollup/rollup-linux-loong64-musl@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2183,6 +2216,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [loong64]
|
cpu: [loong64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@rollup/rollup-linux-ppc64-gnu@4.59.0":
|
"@rollup/rollup-linux-ppc64-gnu@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2191,6 +2225,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [ppc64]
|
cpu: [ppc64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@rollup/rollup-linux-ppc64-musl@4.59.0":
|
"@rollup/rollup-linux-ppc64-musl@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2199,6 +2234,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [ppc64]
|
cpu: [ppc64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@rollup/rollup-linux-riscv64-gnu@4.59.0":
|
"@rollup/rollup-linux-riscv64-gnu@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2207,6 +2243,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [riscv64]
|
cpu: [riscv64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@rollup/rollup-linux-riscv64-musl@4.59.0":
|
"@rollup/rollup-linux-riscv64-musl@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2215,6 +2252,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [riscv64]
|
cpu: [riscv64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@rollup/rollup-linux-s390x-gnu@4.59.0":
|
"@rollup/rollup-linux-s390x-gnu@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2223,6 +2261,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [s390x]
|
cpu: [s390x]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@rollup/rollup-linux-x64-gnu@4.59.0":
|
"@rollup/rollup-linux-x64-gnu@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2231,6 +2270,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@rollup/rollup-linux-x64-musl@4.59.0":
|
"@rollup/rollup-linux-x64-musl@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2239,6 +2279,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@rollup/rollup-openbsd-x64@4.59.0":
|
"@rollup/rollup-openbsd-x64@4.59.0":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2668,6 +2709,7 @@ packages:
|
|||||||
engines: { node: ">= 20" }
|
engines: { node: ">= 20" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@tailwindcss/oxide-linux-arm64-musl@4.2.1":
|
"@tailwindcss/oxide-linux-arm64-musl@4.2.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2677,6 +2719,7 @@ packages:
|
|||||||
engines: { node: ">= 20" }
|
engines: { node: ">= 20" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@tailwindcss/oxide-linux-x64-gnu@4.2.1":
|
"@tailwindcss/oxide-linux-x64-gnu@4.2.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2686,6 +2729,7 @@ packages:
|
|||||||
engines: { node: ">= 20" }
|
engines: { node: ">= 20" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@tailwindcss/oxide-linux-x64-musl@4.2.1":
|
"@tailwindcss/oxide-linux-x64-musl@4.2.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -2695,6 +2739,7 @@ packages:
|
|||||||
engines: { node: ">= 20" }
|
engines: { node: ">= 20" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@tailwindcss/oxide-wasm32-wasi@4.2.1":
|
"@tailwindcss/oxide-wasm32-wasi@4.2.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3162,6 +3207,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@unrs/resolver-binding-linux-arm64-musl@1.11.1":
|
"@unrs/resolver-binding-linux-arm64-musl@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3170,6 +3216,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@unrs/resolver-binding-linux-ppc64-gnu@1.11.1":
|
"@unrs/resolver-binding-linux-ppc64-gnu@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3178,6 +3225,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [ppc64]
|
cpu: [ppc64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@unrs/resolver-binding-linux-riscv64-gnu@1.11.1":
|
"@unrs/resolver-binding-linux-riscv64-gnu@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3186,6 +3234,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [riscv64]
|
cpu: [riscv64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@unrs/resolver-binding-linux-riscv64-musl@1.11.1":
|
"@unrs/resolver-binding-linux-riscv64-musl@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3194,6 +3243,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [riscv64]
|
cpu: [riscv64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@unrs/resolver-binding-linux-s390x-gnu@1.11.1":
|
"@unrs/resolver-binding-linux-s390x-gnu@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3202,6 +3252,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [s390x]
|
cpu: [s390x]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@unrs/resolver-binding-linux-x64-gnu@1.11.1":
|
"@unrs/resolver-binding-linux-x64-gnu@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3210,6 +3261,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
"@unrs/resolver-binding-linux-x64-musl@1.11.1":
|
"@unrs/resolver-binding-linux-x64-musl@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -3218,6 +3270,7 @@ packages:
|
|||||||
}
|
}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
"@unrs/resolver-binding-wasm32-wasi@1.11.1":
|
"@unrs/resolver-binding-wasm32-wasi@1.11.1":
|
||||||
resolution:
|
resolution:
|
||||||
@@ -5602,10 +5655,10 @@ packages:
|
|||||||
integrity: sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw==,
|
integrity: sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw==,
|
||||||
}
|
}
|
||||||
|
|
||||||
immutable@5.1.4:
|
immutable@5.1.5:
|
||||||
resolution:
|
resolution:
|
||||||
{
|
{
|
||||||
integrity: sha512-p6u1bG3YSnINT5RQmx/yRZBpenIl30kVxkTLDyHLIMk0gict704Q9n+thfDI7lTRm9vXdDYutVzXhzcThxTnXA==,
|
integrity: sha512-t7xcm2siw+hlUM68I+UEOK+z84RzmN59as9DZ7P1l0994DKUWV7UXBMQZVxaoMSRQ+PBZbHCOoBt7a2wxOMt+A==,
|
||||||
}
|
}
|
||||||
|
|
||||||
import-fresh@3.3.1:
|
import-fresh@3.3.1:
|
||||||
@@ -6402,6 +6455,7 @@ packages:
|
|||||||
engines: { node: ">= 12.0.0" }
|
engines: { node: ">= 12.0.0" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
lightningcss-linux-arm64-musl@1.31.1:
|
lightningcss-linux-arm64-musl@1.31.1:
|
||||||
resolution:
|
resolution:
|
||||||
@@ -6411,6 +6465,7 @@ packages:
|
|||||||
engines: { node: ">= 12.0.0" }
|
engines: { node: ">= 12.0.0" }
|
||||||
cpu: [arm64]
|
cpu: [arm64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
lightningcss-linux-x64-gnu@1.31.1:
|
lightningcss-linux-x64-gnu@1.31.1:
|
||||||
resolution:
|
resolution:
|
||||||
@@ -6420,6 +6475,7 @@ packages:
|
|||||||
engines: { node: ">= 12.0.0" }
|
engines: { node: ">= 12.0.0" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [glibc]
|
||||||
|
|
||||||
lightningcss-linux-x64-musl@1.31.1:
|
lightningcss-linux-x64-musl@1.31.1:
|
||||||
resolution:
|
resolution:
|
||||||
@@ -6429,6 +6485,7 @@ packages:
|
|||||||
engines: { node: ">= 12.0.0" }
|
engines: { node: ">= 12.0.0" }
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
os: [linux]
|
os: [linux]
|
||||||
|
libc: [musl]
|
||||||
|
|
||||||
lightningcss-win32-arm64-msvc@1.31.1:
|
lightningcss-win32-arm64-msvc@1.31.1:
|
||||||
resolution:
|
resolution:
|
||||||
@@ -7875,11 +7932,12 @@ packages:
|
|||||||
engines: { node: ">=10" }
|
engines: { node: ">=10" }
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
serialize-javascript@6.0.2:
|
serialize-javascript@7.0.4:
|
||||||
resolution:
|
resolution:
|
||||||
{
|
{
|
||||||
integrity: sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==,
|
integrity: sha512-DuGdB+Po43Q5Jxwpzt1lhyFSYKryqoNjQSA9M92tyw0lyHIOur+XCalOUe0KTJpyqzT8+fQ5A0Jf7vCx/NKmIg==,
|
||||||
}
|
}
|
||||||
|
engines: { node: ">=20.0.0" }
|
||||||
|
|
||||||
set-function-length@1.2.2:
|
set-function-length@1.2.2:
|
||||||
resolution:
|
resolution:
|
||||||
@@ -12920,7 +12978,7 @@ snapshots:
|
|||||||
|
|
||||||
immer@11.1.4: {}
|
immer@11.1.4: {}
|
||||||
|
|
||||||
immutable@5.1.4: {}
|
immutable@5.1.5: {}
|
||||||
|
|
||||||
import-fresh@3.3.1:
|
import-fresh@3.3.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -14539,7 +14597,7 @@ snapshots:
|
|||||||
sass@1.97.3:
|
sass@1.97.3:
|
||||||
dependencies:
|
dependencies:
|
||||||
chokidar: 4.0.3
|
chokidar: 4.0.3
|
||||||
immutable: 5.1.4
|
immutable: 5.1.5
|
||||||
source-map-js: 1.2.1
|
source-map-js: 1.2.1
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
"@parcel/watcher": 2.5.6
|
"@parcel/watcher": 2.5.6
|
||||||
@@ -14561,9 +14619,7 @@ snapshots:
|
|||||||
|
|
||||||
semver@7.7.4: {}
|
semver@7.7.4: {}
|
||||||
|
|
||||||
serialize-javascript@6.0.2:
|
serialize-javascript@7.0.4: {}
|
||||||
dependencies:
|
|
||||||
randombytes: 2.1.0
|
|
||||||
|
|
||||||
set-function-length@1.2.2:
|
set-function-length@1.2.2:
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -14861,7 +14917,7 @@ snapshots:
|
|||||||
"@jridgewell/trace-mapping": 0.3.31
|
"@jridgewell/trace-mapping": 0.3.31
|
||||||
jest-worker: 27.5.1
|
jest-worker: 27.5.1
|
||||||
schema-utils: 4.3.3
|
schema-utils: 4.3.3
|
||||||
serialize-javascript: 6.0.2
|
serialize-javascript: 7.0.4
|
||||||
terser: 5.46.0
|
terser: 5.46.0
|
||||||
webpack: 5.105.3
|
webpack: 5.105.3
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,13 @@
|
|||||||
let authToken = null;
|
let authToken = null;
|
||||||
|
|
||||||
|
self.addEventListener("install", () => {
|
||||||
|
self.skipWaiting();
|
||||||
|
});
|
||||||
|
|
||||||
|
self.addEventListener("activate", (event) => {
|
||||||
|
event.waitUntil(self.clients.claim());
|
||||||
|
});
|
||||||
|
|
||||||
self.addEventListener("message", (event) => {
|
self.addEventListener("message", (event) => {
|
||||||
if (event.data && event.data.type === "SET_AUTH_TOKEN") {
|
if (event.data && event.data.type === "SET_AUTH_TOKEN") {
|
||||||
authToken = event.data.token;
|
authToken = event.data.token;
|
||||||
@@ -7,8 +15,8 @@ self.addEventListener("message", (event) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
self.addEventListener("fetch", function (event) {
|
self.addEventListener("fetch", function (event) {
|
||||||
// Check if the request is for a media file
|
// Check if the request is for a media file (allow optional query params)
|
||||||
if (/\/v1\/transcripts\/.*\/audio\/mp3$/.test(event.request.url)) {
|
if (/\/v1\/transcripts\/.*\/audio\/mp3(\?|$)/.test(event.request.url)) {
|
||||||
// Modify the request to add the Authorization header
|
// Modify the request to add the Authorization header
|
||||||
const modifiedHeaders = new Headers(event.request.headers);
|
const modifiedHeaders = new Headers(event.request.headers);
|
||||||
if (authToken) {
|
if (authToken) {
|
||||||
|
|||||||
Reference in New Issue
Block a user