mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
Compare commits
58 Commits
0.1.0
...
mathieu/ca
| Author | SHA1 | Date | |
|---|---|---|---|
| 311d453e41 | |||
| f286f0882c | |||
| ffcafb3bf2 | |||
| 27075d840c | |||
| 30b5cd45e3 | |||
| 2fccd81bcd | |||
| 1311714451 | |||
| b9d891d342 | |||
| 9eab952c63 | |||
|
|
6fb5cb21c2 | ||
|
|
a42ed12982 | ||
| 1aa52a99b6 | |||
|
|
2a97290f2e | ||
| 7963cc8a52 | |||
| d12424848d | |||
|
|
6e765875d5 | ||
|
|
e0f4acf28b | ||
|
|
12359ea4eb | ||
| 267b7401ea | |||
| aea9de393c | |||
| dc177af3ff | |||
| 5bd8233657 | |||
| 28ac031ff6 | |||
| 1878834ce6 | |||
| f5b82d44e3 | |||
| ad56165b54 | |||
| 4ee19ed015 | |||
| 406164033d | |||
| 81d316cb56 | |||
| db3beae5cd | |||
|
|
03b9a18c1b | ||
|
|
7e3027adb6 | ||
|
|
27b43d85ab | ||
| 2289a1a231 | |||
| d0e130eb13 | |||
| 24fabe3e86 | |||
| 6fedbbe63f | |||
| b39175cdc9 | |||
| 2a2af5fff2 | |||
| ad44492cae | |||
| 901a239952 | |||
| d77b5611f8 | |||
| fc38345d65 | |||
| 5a1d662dc4 | |||
| 033bd4bc48 | |||
| 0eb670ca19 | |||
| 4a340c797b | |||
| c1e10f4dab | |||
| 2516d4085f | |||
| 4d21fd1754 | |||
| b05fc9c36a | |||
| 0e2ae5fca8 | |||
| 86ce68651f | |||
| 4895160181 | |||
| d3498ae669 | |||
| 4764dfc219 | |||
| 9b67deb9fe | |||
| aea8773057 |
30
.github/pull_request_template.md
vendored
30
.github/pull_request_template.md
vendored
@@ -1,19 +1,21 @@
|
|||||||
## ⚠️ Insert the PR TITLE replacing this text ⚠️
|
<!--- Provide a general summary of your changes in the Title above -->
|
||||||
|
|
||||||
⚠️ Describe your PR replacing this text. Post screenshots or videos whenever possible. ⚠️
|
## Description
|
||||||
|
<!--- Describe your changes in detail -->
|
||||||
|
|
||||||
### Checklist
|
## Related Issue
|
||||||
|
<!--- This project only accepts pull requests related to open issues -->
|
||||||
|
<!--- If suggesting a new feature or change, please discuss it in an issue first -->
|
||||||
|
<!--- If fixing a bug, there should be an issue describing it with steps to reproduce -->
|
||||||
|
<!--- Please link to the issue here: -->
|
||||||
|
|
||||||
- [ ] My branch is updated with main (mandatory)
|
## Motivation and Context
|
||||||
- [ ] I wrote unit tests for this (if applies)
|
<!--- Why is this change required? What problem does it solve? -->
|
||||||
- [ ] I have included migrations and tested them locally (if applies)
|
<!--- If it fixes an open issue, please link to the issue here. -->
|
||||||
- [ ] I have manually tested this feature locally
|
|
||||||
|
|
||||||
> IMPORTANT: Remember that you are responsible for merging this PR after it's been reviewed, and once deployed
|
## How Has This Been Tested?
|
||||||
> you should perform manual testing to make sure everything went smoothly.
|
<!--- Please describe in detail how you tested your changes. -->
|
||||||
|
<!--- Include details of your testing environment, and the tests you ran to -->
|
||||||
### Urgency
|
<!--- see how your change affects other areas of the code, etc. -->
|
||||||
|
|
||||||
- [ ] Urgent (deploy ASAP)
|
|
||||||
- [ ] Non-urgent (deploying in next release is ok)
|
|
||||||
|
|
||||||
|
## Screenshots (if appropriate):
|
||||||
|
|||||||
21
.github/workflows/conventional_commit_pr_title.yml
vendored
Normal file
21
.github/workflows/conventional_commit_pr_title.yml
vendored
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
name: "Lint PR"
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request_target:
|
||||||
|
types:
|
||||||
|
- opened
|
||||||
|
- edited
|
||||||
|
- synchronize
|
||||||
|
- reopened
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
pull-requests: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
main:
|
||||||
|
name: Validate PR title
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: amannn/action-semantic-pull-request@v5
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
55
.github/workflows/db_migrations.yml
vendored
55
.github/workflows/db_migrations.yml
vendored
@@ -17,39 +17,60 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
test-migrations:
|
test-migrations:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:17
|
||||||
|
env:
|
||||||
|
POSTGRES_USER: reflector
|
||||||
|
POSTGRES_PASSWORD: reflector
|
||||||
|
POSTGRES_DB: reflector
|
||||||
|
ports:
|
||||||
|
- 5432:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready -h 127.0.0.1 -p 5432
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
env:
|
||||||
|
DATABASE_URL: postgresql://reflector:reflector@localhost:5432/reflector
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Install poetry
|
- name: Install PostgreSQL client
|
||||||
run: pipx install poetry
|
run: sudo apt-get update && sudo apt-get install -y postgresql-client | cat
|
||||||
|
|
||||||
- name: Set up Python 3.x
|
- name: Wait for Postgres
|
||||||
uses: actions/setup-python@v4
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
cache: "poetry"
|
|
||||||
cache-dependency-path: "server/poetry.lock"
|
|
||||||
|
|
||||||
- name: Install requirements
|
|
||||||
working-directory: ./server
|
|
||||||
run: |
|
run: |
|
||||||
poetry install --no-root
|
for i in {1..30}; do
|
||||||
|
if pg_isready -h localhost -p 5432; then
|
||||||
|
echo "Postgres is ready"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "Waiting for Postgres... ($i)" && sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
working-directory: server
|
||||||
|
|
||||||
- name: Test migrations from scratch
|
- name: Test migrations from scratch
|
||||||
working-directory: ./server
|
working-directory: server
|
||||||
run: |
|
run: |
|
||||||
echo "Testing migrations from clean database..."
|
echo "Testing migrations from clean database..."
|
||||||
poetry run alembic upgrade head
|
uv run alembic upgrade head
|
||||||
echo "✅ Fresh migration successful"
|
echo "✅ Fresh migration successful"
|
||||||
|
|
||||||
- name: Test migration rollback and re-apply
|
- name: Test migration rollback and re-apply
|
||||||
working-directory: ./server
|
working-directory: server
|
||||||
run: |
|
run: |
|
||||||
echo "Testing rollback to base..."
|
echo "Testing rollback to base..."
|
||||||
poetry run alembic downgrade base
|
uv run alembic downgrade base
|
||||||
echo "✅ Rollback successful"
|
echo "✅ Rollback successful"
|
||||||
|
|
||||||
echo "Testing re-apply of all migrations..."
|
echo "Testing re-apply of all migrations..."
|
||||||
poetry run alembic upgrade head
|
uv run alembic upgrade head
|
||||||
echo "✅ Re-apply successful"
|
echo "✅ Re-apply successful"
|
||||||
|
|||||||
24
.github/workflows/pre-commit.yml
vendored
Normal file
24
.github/workflows/pre-commit.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
name: pre-commit
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
pre-commit:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v5
|
||||||
|
- uses: actions/setup-python@v5
|
||||||
|
- uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 10
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 22
|
||||||
|
cache: "pnpm"
|
||||||
|
cache-dependency-path: "www/pnpm-lock.yaml"
|
||||||
|
- name: Install dependencies
|
||||||
|
run: cd www && pnpm install --frozen-lockfile
|
||||||
|
- uses: pre-commit/action@v3.0.1
|
||||||
19
.github/workflows/release-please.yml
vendored
Normal file
19
.github/workflows/release-please.yml
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
name: release-please
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
release-please:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: googleapis/release-please-action@v4
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.MY_RELEASE_PLEASE_TOKEN }}
|
||||||
|
release-type: simple
|
||||||
50
.github/workflows/test_server.yml
vendored
50
.github/workflows/test_server.yml
vendored
@@ -17,56 +17,22 @@ jobs:
|
|||||||
ports:
|
ports:
|
||||||
- 6379:6379
|
- 6379:6379
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Install poetry
|
- name: Install uv
|
||||||
run: pipx install poetry
|
uses: astral-sh/setup-uv@v3
|
||||||
- name: Set up Python 3.x
|
|
||||||
uses: actions/setup-python@v4
|
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
enable-cache: true
|
||||||
cache: "poetry"
|
working-directory: server
|
||||||
cache-dependency-path: "server/poetry.lock"
|
|
||||||
- name: Install requirements
|
|
||||||
run: |
|
|
||||||
cd server
|
|
||||||
poetry install --no-root
|
|
||||||
- name: Tests
|
- name: Tests
|
||||||
run: |
|
run: |
|
||||||
cd server
|
cd server
|
||||||
poetry run python -m pytest -v tests
|
uv run -m pytest -v tests
|
||||||
|
|
||||||
formatting:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
- name: Set up Python 3.x
|
|
||||||
uses: actions/setup-python@v4
|
|
||||||
with:
|
|
||||||
python-version: 3.11
|
|
||||||
- name: Validate formatting
|
|
||||||
run: |
|
|
||||||
pip install black
|
|
||||||
cd server
|
|
||||||
black --check reflector tests
|
|
||||||
|
|
||||||
linting:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v3
|
|
||||||
- name: Set up Python 3.x
|
|
||||||
uses: actions/setup-python@v4
|
|
||||||
with:
|
|
||||||
python-version: 3.11
|
|
||||||
- name: Validate formatting
|
|
||||||
run: |
|
|
||||||
pip install ruff
|
|
||||||
cd server
|
|
||||||
ruff check reflector tests
|
|
||||||
|
|
||||||
docker:
|
docker:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v2
|
uses: docker/setup-qemu-action@v2
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
|
|||||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -9,4 +9,9 @@ dump.rdb
|
|||||||
ngrok.log
|
ngrok.log
|
||||||
.claude/settings.local.json
|
.claude/settings.local.json
|
||||||
restart-dev.sh
|
restart-dev.sh
|
||||||
*.log
|
*.log
|
||||||
|
data/
|
||||||
|
www/REFACTOR.md
|
||||||
|
www/reload-frontend
|
||||||
|
server/test.sqlite
|
||||||
|
CLAUDE.local.md
|
||||||
@@ -3,10 +3,10 @@
|
|||||||
repos:
|
repos:
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: yarn-format
|
- id: format
|
||||||
name: run yarn format
|
name: run format
|
||||||
language: system
|
language: system
|
||||||
entry: bash -c 'cd www && yarn format'
|
entry: bash -c 'cd www && pnpm format'
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
files: ^www/
|
files: ^www/
|
||||||
|
|
||||||
@@ -15,25 +15,15 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: debug-statements
|
- id: debug-statements
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
exclude: ^server/trials
|
|
||||||
- id: detect-private-key
|
- id: detect-private-key
|
||||||
|
|
||||||
- repo: https://github.com/psf/black
|
|
||||||
rev: 24.1.1
|
|
||||||
hooks:
|
|
||||||
- id: black
|
|
||||||
files: ^server/(reflector|tests)/
|
|
||||||
|
|
||||||
- repo: https://github.com/pycqa/isort
|
|
||||||
rev: 5.12.0
|
|
||||||
hooks:
|
|
||||||
- id: isort
|
|
||||||
name: isort (python)
|
|
||||||
files: ^server/(gpu|evaluate|reflector)/
|
|
||||||
args: [ "--profile", "black", "--filter-files" ]
|
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.6.5
|
rev: v0.8.2
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
files: ^server/(reflector|tests)/
|
args:
|
||||||
|
- --fix
|
||||||
|
# Uses select rules from server/pyproject.toml
|
||||||
|
files: ^server/
|
||||||
|
- id: ruff-format
|
||||||
|
files: ^server/
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
3.11.6
|
|
||||||
105
CHANGELOG.md
Normal file
105
CHANGELOG.md
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
# Changelog
|
||||||
|
|
||||||
|
## [0.6.1](https://github.com/Monadical-SAS/reflector/compare/v0.6.0...v0.6.1) (2025-08-06)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* delayed waveform loading ([#538](https://github.com/Monadical-SAS/reflector/issues/538)) ([ef64146](https://github.com/Monadical-SAS/reflector/commit/ef64146325d03f64dd9a1fe40234fb3e7e957ae2))
|
||||||
|
|
||||||
|
## [0.6.0](https://github.com/Monadical-SAS/reflector/compare/v0.5.0...v0.6.0) (2025-08-05)
|
||||||
|
|
||||||
|
|
||||||
|
### ⚠ BREAKING CHANGES
|
||||||
|
|
||||||
|
* Configuration keys have changed. Update your .env file:
|
||||||
|
- TRANSCRIPT_MODAL_API_KEY → TRANSCRIPT_API_KEY
|
||||||
|
- LLM_MODAL_API_KEY → (removed, use TRANSCRIPT_API_KEY)
|
||||||
|
- Add DIARIZATION_API_KEY and TRANSLATE_API_KEY if using those services
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* implement service-specific Modal API keys with auto processor pattern ([#528](https://github.com/Monadical-SAS/reflector/issues/528)) ([650befb](https://github.com/Monadical-SAS/reflector/commit/650befb291c47a1f49e94a01ab37d8fdfcd2b65d))
|
||||||
|
* use llamaindex everywhere ([#525](https://github.com/Monadical-SAS/reflector/issues/525)) ([3141d17](https://github.com/Monadical-SAS/reflector/commit/3141d172bc4d3b3d533370c8e6e351ea762169bf))
|
||||||
|
|
||||||
|
|
||||||
|
### Miscellaneous Chores
|
||||||
|
|
||||||
|
* **main:** release 0.6.0 ([ecdbf00](https://github.com/Monadical-SAS/reflector/commit/ecdbf003ea2476c3e95fd231adaeb852f2943df0))
|
||||||
|
|
||||||
|
## [0.5.0](https://github.com/Monadical-SAS/reflector/compare/v0.4.0...v0.5.0) (2025-07-31)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* new summary using phi-4 and llama-index ([#519](https://github.com/Monadical-SAS/reflector/issues/519)) ([1bf9ce0](https://github.com/Monadical-SAS/reflector/commit/1bf9ce07c12f87f89e68a1dbb3b2c96c5ee62466))
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* remove unused settings and utils files ([#522](https://github.com/Monadical-SAS/reflector/issues/522)) ([2af4790](https://github.com/Monadical-SAS/reflector/commit/2af4790e4be9e588f282fbc1bb171c88a03d6479))
|
||||||
|
|
||||||
|
## [0.4.0](https://github.com/Monadical-SAS/reflector/compare/v0.3.2...v0.4.0) (2025-07-25)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* Diarization cli ([#509](https://github.com/Monadical-SAS/reflector/issues/509)) ([ffc8003](https://github.com/Monadical-SAS/reflector/commit/ffc8003e6dad236930a27d0fe3e2f2adfb793890))
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* remove faulty import Meeting ([#512](https://github.com/Monadical-SAS/reflector/issues/512)) ([0e68c79](https://github.com/Monadical-SAS/reflector/commit/0e68c798434e1b481f9482cc3a4702ea00365df4))
|
||||||
|
* room concurrency (theoretically) ([#511](https://github.com/Monadical-SAS/reflector/issues/511)) ([7bb3676](https://github.com/Monadical-SAS/reflector/commit/7bb367653afeb2778cff697a0eb217abf0b81b84))
|
||||||
|
|
||||||
|
## [0.3.2](https://github.com/Monadical-SAS/reflector/compare/v0.3.1...v0.3.2) (2025-07-22)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* match font size for the filter sidebar ([#507](https://github.com/Monadical-SAS/reflector/issues/507)) ([4b8ba5d](https://github.com/Monadical-SAS/reflector/commit/4b8ba5db1733557e27b098ad3d1cdecadf97ae52))
|
||||||
|
* whereby consent not displaying ([#505](https://github.com/Monadical-SAS/reflector/issues/505)) ([1120552](https://github.com/Monadical-SAS/reflector/commit/1120552c2c83d084d3a39272ad49b6aeda1af98f))
|
||||||
|
|
||||||
|
## [0.3.1](https://github.com/Monadical-SAS/reflector/compare/v0.3.0...v0.3.1) (2025-07-22)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* remove fief out of the source code ([#502](https://github.com/Monadical-SAS/reflector/issues/502)) ([890dd15](https://github.com/Monadical-SAS/reflector/commit/890dd15ba5a2be10dbb841e9aeb75d377885f4af))
|
||||||
|
* remove primary color for room action menu ([#504](https://github.com/Monadical-SAS/reflector/issues/504)) ([2e33f89](https://github.com/Monadical-SAS/reflector/commit/2e33f89c0f9e5fbaafa80e8d2ae9788450ea2f31))
|
||||||
|
|
||||||
|
## [0.3.0](https://github.com/Monadical-SAS/reflector/compare/v0.2.1...v0.3.0) (2025-07-21)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* migrate from chakra 2 to chakra 3 ([#500](https://github.com/Monadical-SAS/reflector/issues/500)) ([a858464](https://github.com/Monadical-SAS/reflector/commit/a858464c7a80e5497acf801d933bf04092f8b526))
|
||||||
|
|
||||||
|
## [0.2.1](https://github.com/Monadical-SAS/reflector/compare/v0.2.0...v0.2.1) (2025-07-18)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* separate browsing page into different components, limit to 10 by default ([#498](https://github.com/Monadical-SAS/reflector/issues/498)) ([c752da6](https://github.com/Monadical-SAS/reflector/commit/c752da6b97c96318aff079a5b2a6eceadfbfcad1))
|
||||||
|
|
||||||
|
## [0.2.0](https://github.com/Monadical-SAS/reflector/compare/0.1.1...v0.2.0) (2025-07-17)
|
||||||
|
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* improve transcript listing with room_id ([#496](https://github.com/Monadical-SAS/reflector/issues/496)) ([d2b5de5](https://github.com/Monadical-SAS/reflector/commit/d2b5de543fc0617fc220caa6a8a290e4040cb10b))
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* don't attempt to load waveform/mp3 if audio was deleted ([#495](https://github.com/Monadical-SAS/reflector/issues/495)) ([f4578a7](https://github.com/Monadical-SAS/reflector/commit/f4578a743fd0f20312fbd242fa9cccdfaeb20a9e))
|
||||||
|
|
||||||
|
## [0.1.1](https://github.com/Monadical-SAS/reflector/compare/0.1.0...v0.1.1) (2025-07-17)
|
||||||
|
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
* postgres database not connecting in worker ([#492](https://github.com/Monadical-SAS/reflector/issues/492)) ([123d09f](https://github.com/Monadical-SAS/reflector/commit/123d09fdacef7f5a84541cf01732d4f5b6b9d2d0))
|
||||||
|
* process meetings with utc ([#493](https://github.com/Monadical-SAS/reflector/issues/493)) ([f3c85e1](https://github.com/Monadical-SAS/reflector/commit/f3c85e1eb97cd893840125ed056dcb290fccb612))
|
||||||
|
* punkt -> punkt_tab + pre-download nltk packages to prevent runtime not working ([#489](https://github.com/Monadical-SAS/reflector/issues/489)) ([c22487b](https://github.com/Monadical-SAS/reflector/commit/c22487b41f311a3fdba2eac04c7637bd396cccee))
|
||||||
|
* rename averaged_perceptron_tagger to averaged_perceptron_tagger_eng ([#491](https://github.com/Monadical-SAS/reflector/issues/491)) ([a7b7846](https://github.com/Monadical-SAS/reflector/commit/a7b78462419b3af81c6dbf1ddfccb3d532f660a3))
|
||||||
180
CLAUDE.md
Normal file
180
CLAUDE.md
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
Reflector is an AI-powered audio transcription and meeting analysis platform with real-time processing capabilities. The system consists of:
|
||||||
|
|
||||||
|
- **Frontend**: Next.js 14 React application (`www/`) with Chakra UI, real-time WebSocket integration
|
||||||
|
- **Backend**: Python FastAPI server (`server/`) with async database operations and background processing
|
||||||
|
- **Processing**: GPU-accelerated ML pipeline for transcription, diarization, summarization via Modal.com
|
||||||
|
- **Infrastructure**: Redis, PostgreSQL/SQLite, Celery workers, WebRTC streaming
|
||||||
|
|
||||||
|
## Development Commands
|
||||||
|
|
||||||
|
### Backend (Python) - `cd server/`
|
||||||
|
|
||||||
|
**Setup and Dependencies:**
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
uv sync
|
||||||
|
|
||||||
|
# Database migrations (first run or schema changes)
|
||||||
|
uv run alembic upgrade head
|
||||||
|
|
||||||
|
# Start services
|
||||||
|
docker compose up -d redis
|
||||||
|
```
|
||||||
|
|
||||||
|
**Development:**
|
||||||
|
```bash
|
||||||
|
# Start FastAPI server
|
||||||
|
uv run -m reflector.app --reload
|
||||||
|
|
||||||
|
# Start Celery worker for background tasks
|
||||||
|
uv run celery -A reflector.worker.app worker --loglevel=info
|
||||||
|
|
||||||
|
# Start Celery beat scheduler (optional, for cron jobs)
|
||||||
|
uv run celery -A reflector.worker.app beat
|
||||||
|
```
|
||||||
|
|
||||||
|
**Testing:**
|
||||||
|
```bash
|
||||||
|
# Run all tests with coverage
|
||||||
|
uv run pytest
|
||||||
|
|
||||||
|
# Run specific test file
|
||||||
|
uv run pytest tests/test_transcripts.py
|
||||||
|
|
||||||
|
# Run tests with verbose output
|
||||||
|
uv run pytest -v
|
||||||
|
```
|
||||||
|
|
||||||
|
**Process Audio Files:**
|
||||||
|
```bash
|
||||||
|
# Process local audio file manually
|
||||||
|
uv run python -m reflector.tools.process path/to/audio.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
### Frontend (Next.js) - `cd www/`
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
pnpm install
|
||||||
|
|
||||||
|
# Copy configuration templates
|
||||||
|
cp .env_template .env
|
||||||
|
cp config-template.ts config.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
**Development:**
|
||||||
|
```bash
|
||||||
|
# Start development server
|
||||||
|
pnpm dev
|
||||||
|
|
||||||
|
# Generate TypeScript API client from OpenAPI spec
|
||||||
|
pnpm openapi
|
||||||
|
|
||||||
|
# Lint code
|
||||||
|
pnpm lint
|
||||||
|
|
||||||
|
# Format code
|
||||||
|
pnpm format
|
||||||
|
|
||||||
|
# Build for production
|
||||||
|
pnpm build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Compose (Full Stack)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start all services
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# Start specific services
|
||||||
|
docker compose up -d redis server worker
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
### Backend Processing Pipeline
|
||||||
|
|
||||||
|
The audio processing follows a modular pipeline architecture:
|
||||||
|
|
||||||
|
1. **Audio Input**: WebRTC streaming, file upload, or cloud recording ingestion
|
||||||
|
2. **Chunking**: Audio split into processable segments (`AudioChunkerProcessor`)
|
||||||
|
3. **Transcription**: Whisper or Modal.com GPU processing (`AudioTranscriptAutoProcessor`)
|
||||||
|
4. **Diarization**: Speaker identification (`AudioDiarizationAutoProcessor`)
|
||||||
|
5. **Text Processing**: Formatting, translation, topic detection
|
||||||
|
6. **Summarization**: AI-powered summaries and title generation
|
||||||
|
7. **Storage**: Database persistence with optional S3 backend
|
||||||
|
|
||||||
|
### Database Models
|
||||||
|
|
||||||
|
Core entities:
|
||||||
|
- `transcript`: Main table with processing results, summaries, topics, participants
|
||||||
|
- `meeting`: Live meeting sessions with consent management
|
||||||
|
- `room`: Virtual meeting spaces with configuration
|
||||||
|
- `recording`: Audio/video file metadata and processing status
|
||||||
|
|
||||||
|
### API Structure
|
||||||
|
|
||||||
|
All endpoints prefixed `/v1/`:
|
||||||
|
- `transcripts/` - CRUD operations for transcripts
|
||||||
|
- `transcripts_audio/` - Audio streaming and download
|
||||||
|
- `transcripts_webrtc/` - Real-time WebRTC endpoints
|
||||||
|
- `transcripts_websocket/` - WebSocket for live updates
|
||||||
|
- `meetings/` - Meeting lifecycle management
|
||||||
|
- `rooms/` - Virtual room management
|
||||||
|
|
||||||
|
### Frontend Architecture
|
||||||
|
|
||||||
|
- **App Router**: Next.js 14 with route groups for organization
|
||||||
|
- **State**: React Context pattern, no Redux
|
||||||
|
- **Real-time**: WebSocket integration for live transcription updates
|
||||||
|
- **Auth**: NextAuth.js with Authentik OAuth/OIDC provider
|
||||||
|
- **UI**: Chakra UI components with Tailwind CSS utilities
|
||||||
|
|
||||||
|
## Key Configuration
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
**Backend** (`server/.env`):
|
||||||
|
- `DATABASE_URL` - Database connection string
|
||||||
|
- `REDIS_URL` - Redis broker for Celery
|
||||||
|
- `TRANSCRIPT_BACKEND=modal` + `TRANSCRIPT_MODAL_API_KEY` - Modal.com transcription
|
||||||
|
- `DIARIZATION_BACKEND=modal` + `DIARIZATION_MODAL_API_KEY` - Modal.com diarization
|
||||||
|
- `TRANSLATION_BACKEND=modal` + `TRANSLATION_MODAL_API_KEY` - Modal.com translation
|
||||||
|
- `WHEREBY_API_KEY` - Video platform integration
|
||||||
|
- `REFLECTOR_AUTH_BACKEND` - Authentication method (none, jwt)
|
||||||
|
|
||||||
|
**Frontend** (`www/.env`):
|
||||||
|
- `NEXTAUTH_URL`, `NEXTAUTH_SECRET` - Authentication configuration
|
||||||
|
- `NEXT_PUBLIC_REFLECTOR_API_URL` - Backend API endpoint
|
||||||
|
- `REFLECTOR_DOMAIN_CONFIG` - Feature flags and domain settings
|
||||||
|
|
||||||
|
## Testing Strategy
|
||||||
|
|
||||||
|
- **Backend**: pytest with async support, HTTP client mocking, audio processing tests
|
||||||
|
- **Frontend**: No current test suite - opportunities for Jest/React Testing Library
|
||||||
|
- **Coverage**: Backend maintains test coverage reports in `htmlcov/`
|
||||||
|
|
||||||
|
## GPU Processing
|
||||||
|
|
||||||
|
Modal.com integration for scalable ML processing:
|
||||||
|
- Deploy changes: `modal run server/gpu/path/to/model.py`
|
||||||
|
- Requires Modal account with `REFLECTOR_GPU_APIKEY` secret
|
||||||
|
- Fallback to local processing when Modal unavailable
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
- **Permissions**: Browser microphone access required in System Preferences
|
||||||
|
- **Audio Routing**: Use BlackHole (Mac) for merging multiple audio sources
|
||||||
|
- **WebRTC**: Ensure proper CORS configuration for cross-origin streaming
|
||||||
|
- **Database**: Run `uv run alembic upgrade head` after pulling schema changes
|
||||||
|
|
||||||
|
## Pipeline/worker related info
|
||||||
|
|
||||||
|
If you need to do any worker/pipeline related work, search for "Pipeline" classes and their "create" or "build" methods to find the main processor sequence. Look for task orchestration patterns (like "chord", "group", or "chain") to identify the post-processing flow with parallel execution chains. This will give you abstract vision on how processing pipeling is organized.
|
||||||
497
ICS_IMPLEMENTATION.md
Normal file
497
ICS_IMPLEMENTATION.md
Normal file
@@ -0,0 +1,497 @@
|
|||||||
|
# ICS Calendar Integration - Implementation Guide
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
This document provides detailed implementation guidance for integrating ICS calendar feeds with Reflector rooms. Unlike CalDAV which requires complex authentication and protocol handling, ICS integration uses simple HTTP(S) fetching of calendar files.
|
||||||
|
|
||||||
|
## Key Differences from CalDAV Approach
|
||||||
|
|
||||||
|
| Aspect | CalDAV | ICS |
|
||||||
|
|--------|--------|-----|
|
||||||
|
| Protocol | WebDAV extension | HTTP/HTTPS GET |
|
||||||
|
| Authentication | Username/password, OAuth | Tokens embedded in URL |
|
||||||
|
| Data Access | Selective event queries | Full calendar download |
|
||||||
|
| Implementation | Complex (caldav library) | Simple (requests + icalendar) |
|
||||||
|
| Real-time Updates | Supported | Polling only |
|
||||||
|
| Write Access | Yes | No (read-only) |
|
||||||
|
|
||||||
|
## Technical Architecture
|
||||||
|
|
||||||
|
### 1. ICS Fetching Service
|
||||||
|
|
||||||
|
```python
|
||||||
|
# reflector/services/ics_sync.py
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from icalendar import Calendar
|
||||||
|
from typing import List, Optional
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
class ICSFetchService:
|
||||||
|
def __init__(self):
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.session.headers.update({'User-Agent': 'Reflector/1.0'})
|
||||||
|
|
||||||
|
def fetch_ics(self, url: str) -> str:
|
||||||
|
"""Fetch ICS file from URL (authentication via URL token if needed)."""
|
||||||
|
response = self.session.get(url, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
def parse_ics(self, ics_content: str) -> Calendar:
|
||||||
|
"""Parse ICS content into calendar object."""
|
||||||
|
return Calendar.from_ical(ics_content)
|
||||||
|
|
||||||
|
def extract_room_events(self, calendar: Calendar, room_url: str) -> List[dict]:
|
||||||
|
"""Extract events that match the room URL."""
|
||||||
|
events = []
|
||||||
|
|
||||||
|
for component in calendar.walk():
|
||||||
|
if component.name == "VEVENT":
|
||||||
|
# Check if event matches this room
|
||||||
|
if self._event_matches_room(component, room_url):
|
||||||
|
events.append(self._parse_event(component))
|
||||||
|
|
||||||
|
return events
|
||||||
|
|
||||||
|
def _event_matches_room(self, event, room_url: str) -> bool:
|
||||||
|
"""Check if event location or description contains room URL."""
|
||||||
|
location = str(event.get('LOCATION', ''))
|
||||||
|
description = str(event.get('DESCRIPTION', ''))
|
||||||
|
|
||||||
|
# Support various URL formats
|
||||||
|
patterns = [
|
||||||
|
room_url,
|
||||||
|
room_url.replace('https://', ''),
|
||||||
|
room_url.split('/')[-1], # Just room name
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
if pattern in location or pattern in description:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Database Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Modify room table
|
||||||
|
ALTER TABLE room ADD COLUMN ics_url TEXT; -- encrypted to protect embedded tokens
|
||||||
|
ALTER TABLE room ADD COLUMN ics_fetch_interval INTEGER DEFAULT 300; -- seconds
|
||||||
|
ALTER TABLE room ADD COLUMN ics_enabled BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE room ADD COLUMN ics_last_sync TIMESTAMP;
|
||||||
|
ALTER TABLE room ADD COLUMN ics_last_etag TEXT; -- for caching
|
||||||
|
|
||||||
|
-- Calendar events table
|
||||||
|
CREATE TABLE calendar_event (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
room_id UUID REFERENCES room(id) ON DELETE CASCADE,
|
||||||
|
external_id TEXT NOT NULL, -- ICS UID
|
||||||
|
title TEXT,
|
||||||
|
description TEXT,
|
||||||
|
start_time TIMESTAMP NOT NULL,
|
||||||
|
end_time TIMESTAMP NOT NULL,
|
||||||
|
attendees JSONB,
|
||||||
|
location TEXT,
|
||||||
|
ics_raw_data TEXT, -- Store raw VEVENT for reference
|
||||||
|
last_synced TIMESTAMP DEFAULT NOW(),
|
||||||
|
is_deleted BOOLEAN DEFAULT FALSE,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
UNIQUE(room_id, external_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for efficient queries
|
||||||
|
CREATE INDEX idx_calendar_event_room_start ON calendar_event(room_id, start_time);
|
||||||
|
CREATE INDEX idx_calendar_event_deleted ON calendar_event(is_deleted) WHERE NOT is_deleted;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Background Tasks
|
||||||
|
|
||||||
|
```python
|
||||||
|
# reflector/worker/tasks/ics_sync.py
|
||||||
|
|
||||||
|
from celery import shared_task
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
@shared_task
|
||||||
|
def sync_ics_calendars():
|
||||||
|
"""Sync all enabled ICS calendars based on their fetch intervals."""
|
||||||
|
rooms = Room.query.filter_by(ics_enabled=True).all()
|
||||||
|
|
||||||
|
for room in rooms:
|
||||||
|
# Check if it's time to sync based on fetch interval
|
||||||
|
if should_sync(room):
|
||||||
|
sync_room_calendar.delay(room.id)
|
||||||
|
|
||||||
|
@shared_task
|
||||||
|
def sync_room_calendar(room_id: str):
|
||||||
|
"""Sync calendar for a specific room."""
|
||||||
|
room = Room.query.get(room_id)
|
||||||
|
if not room or not room.ics_enabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Fetch ICS file (decrypt URL first)
|
||||||
|
service = ICSFetchService()
|
||||||
|
decrypted_url = decrypt_ics_url(room.ics_url)
|
||||||
|
ics_content = service.fetch_ics(decrypted_url)
|
||||||
|
|
||||||
|
# Check if content changed (using ETag or hash)
|
||||||
|
content_hash = hashlib.md5(ics_content.encode()).hexdigest()
|
||||||
|
if room.ics_last_etag == content_hash:
|
||||||
|
logger.info(f"No changes in ICS for room {room_id}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Parse and extract events
|
||||||
|
calendar = service.parse_ics(ics_content)
|
||||||
|
events = service.extract_room_events(calendar, room.url)
|
||||||
|
|
||||||
|
# Update database
|
||||||
|
sync_events_to_database(room_id, events)
|
||||||
|
|
||||||
|
# Update sync metadata
|
||||||
|
room.ics_last_sync = datetime.utcnow()
|
||||||
|
room.ics_last_etag = content_hash
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to sync ICS for room {room_id}: {e}")
|
||||||
|
|
||||||
|
def should_sync(room) -> bool:
|
||||||
|
"""Check if room calendar should be synced."""
|
||||||
|
if not room.ics_last_sync:
|
||||||
|
return True
|
||||||
|
|
||||||
|
time_since_sync = datetime.utcnow() - room.ics_last_sync
|
||||||
|
return time_since_sync.total_seconds() >= room.ics_fetch_interval
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Celery Beat Schedule
|
||||||
|
|
||||||
|
```python
|
||||||
|
# reflector/worker/celeryconfig.py
|
||||||
|
|
||||||
|
from celery.schedules import crontab
|
||||||
|
|
||||||
|
beat_schedule = {
|
||||||
|
'sync-ics-calendars': {
|
||||||
|
'task': 'reflector.worker.tasks.ics_sync.sync_ics_calendars',
|
||||||
|
'schedule': 60.0, # Check every minute which calendars need syncing
|
||||||
|
},
|
||||||
|
'pre-create-meetings': {
|
||||||
|
'task': 'reflector.worker.tasks.ics_sync.pre_create_calendar_meetings',
|
||||||
|
'schedule': 60.0, # Check every minute for upcoming meetings
|
||||||
|
},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Room ICS Configuration
|
||||||
|
|
||||||
|
```python
|
||||||
|
# PATCH /v1/rooms/{room_id}
|
||||||
|
{
|
||||||
|
"ics_url": "https://calendar.google.com/calendar/ical/.../private-token/basic.ics",
|
||||||
|
"ics_fetch_interval": 300, # seconds
|
||||||
|
"ics_enabled": true
|
||||||
|
# URL will be encrypted in database to protect embedded tokens
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Manual Sync Trigger
|
||||||
|
|
||||||
|
```python
|
||||||
|
# POST /v1/rooms/{room_name}/ics/sync
|
||||||
|
# Response:
|
||||||
|
{
|
||||||
|
"status": "syncing",
|
||||||
|
"last_sync": "2024-01-15T10:30:00Z",
|
||||||
|
"events_found": 5
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### ICS Status
|
||||||
|
|
||||||
|
```python
|
||||||
|
# GET /v1/rooms/{room_name}/ics/status
|
||||||
|
# Response:
|
||||||
|
{
|
||||||
|
"enabled": true,
|
||||||
|
"last_sync": "2024-01-15T10:30:00Z",
|
||||||
|
"next_sync": "2024-01-15T10:35:00Z",
|
||||||
|
"fetch_interval": 300,
|
||||||
|
"events_count": 12,
|
||||||
|
"upcoming_events": 3
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## ICS Parsing Details
|
||||||
|
|
||||||
|
### Event Field Mapping
|
||||||
|
|
||||||
|
| ICS Field | Database Field | Notes |
|
||||||
|
|-----------|---------------|-------|
|
||||||
|
| UID | external_id | Unique identifier |
|
||||||
|
| SUMMARY | title | Event title |
|
||||||
|
| DESCRIPTION | description | Full description |
|
||||||
|
| DTSTART | start_time | Convert to UTC |
|
||||||
|
| DTEND | end_time | Convert to UTC |
|
||||||
|
| LOCATION | location | Check for room URL |
|
||||||
|
| ATTENDEE | attendees | Parse into JSON |
|
||||||
|
| ORGANIZER | attendees | Add as organizer |
|
||||||
|
| STATUS | (internal) | Filter cancelled events |
|
||||||
|
|
||||||
|
### Handling Recurring Events
|
||||||
|
|
||||||
|
```python
|
||||||
|
def expand_recurring_events(event, start_date, end_date):
|
||||||
|
"""Expand recurring events into individual occurrences."""
|
||||||
|
from dateutil.rrule import rrulestr
|
||||||
|
|
||||||
|
if 'RRULE' not in event:
|
||||||
|
return [event]
|
||||||
|
|
||||||
|
# Parse recurrence rule
|
||||||
|
rrule_str = event['RRULE'].to_ical().decode()
|
||||||
|
dtstart = event['DTSTART'].dt
|
||||||
|
|
||||||
|
# Generate occurrences
|
||||||
|
rrule = rrulestr(rrule_str, dtstart=dtstart)
|
||||||
|
occurrences = []
|
||||||
|
|
||||||
|
for dt in rrule.between(start_date, end_date):
|
||||||
|
# Clone event with new date
|
||||||
|
occurrence = event.copy()
|
||||||
|
occurrence['DTSTART'].dt = dt
|
||||||
|
if 'DTEND' in event:
|
||||||
|
duration = event['DTEND'].dt - event['DTSTART'].dt
|
||||||
|
occurrence['DTEND'].dt = dt + duration
|
||||||
|
|
||||||
|
# Unique ID for each occurrence
|
||||||
|
occurrence['UID'] = f"{event['UID']}_{dt.isoformat()}"
|
||||||
|
occurrences.append(occurrence)
|
||||||
|
|
||||||
|
return occurrences
|
||||||
|
```
|
||||||
|
|
||||||
|
### Timezone Handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
def normalize_datetime(dt):
|
||||||
|
"""Convert various datetime formats to UTC."""
|
||||||
|
import pytz
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
if hasattr(dt, 'dt'): # icalendar property
|
||||||
|
dt = dt.dt
|
||||||
|
|
||||||
|
if isinstance(dt, datetime):
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
# Assume local timezone if naive
|
||||||
|
dt = pytz.timezone('UTC').localize(dt)
|
||||||
|
else:
|
||||||
|
# Convert to UTC
|
||||||
|
dt = dt.astimezone(pytz.UTC)
|
||||||
|
|
||||||
|
return dt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
### 1. URL Validation
|
||||||
|
|
||||||
|
```python
|
||||||
|
def validate_ics_url(url: str) -> bool:
|
||||||
|
"""Validate ICS URL for security."""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
parsed = urlparse(url)
|
||||||
|
|
||||||
|
# Must be HTTPS in production
|
||||||
|
if not settings.DEBUG and parsed.scheme != 'https':
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Prevent local file access
|
||||||
|
if parsed.scheme in ('file', 'ftp'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Prevent internal network access
|
||||||
|
if is_internal_ip(parsed.hostname):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Rate Limiting
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Implement per-room rate limiting
|
||||||
|
RATE_LIMITS = {
|
||||||
|
'min_fetch_interval': 60, # Minimum 1 minute between fetches
|
||||||
|
'max_requests_per_hour': 60, # Max 60 requests per hour per room
|
||||||
|
'max_file_size': 10 * 1024 * 1024, # Max 10MB ICS file
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. ICS URL Encryption
|
||||||
|
|
||||||
|
```python
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
|
||||||
|
class URLEncryption:
|
||||||
|
def __init__(self):
|
||||||
|
self.cipher = Fernet(settings.ENCRYPTION_KEY)
|
||||||
|
|
||||||
|
def encrypt_url(self, url: str) -> str:
|
||||||
|
"""Encrypt ICS URL to protect embedded tokens."""
|
||||||
|
return self.cipher.encrypt(url.encode()).decode()
|
||||||
|
|
||||||
|
def decrypt_url(self, encrypted: str) -> str:
|
||||||
|
"""Decrypt ICS URL for fetching."""
|
||||||
|
return self.cipher.decrypt(encrypted.encode()).decode()
|
||||||
|
|
||||||
|
def mask_url(self, url: str) -> str:
|
||||||
|
"""Mask sensitive parts of URL for display."""
|
||||||
|
from urllib.parse import urlparse, urlunparse
|
||||||
|
|
||||||
|
parsed = urlparse(url)
|
||||||
|
# Keep scheme, host, and path structure but mask tokens
|
||||||
|
if '/private-' in parsed.path:
|
||||||
|
# Google Calendar format
|
||||||
|
parts = parsed.path.split('/private-')
|
||||||
|
masked_path = parts[0] + '/private-***' + parts[1].split('/')[-1]
|
||||||
|
elif 'token=' in url:
|
||||||
|
# Query parameter token
|
||||||
|
masked_path = parsed.path
|
||||||
|
parsed = parsed._replace(query='token=***')
|
||||||
|
else:
|
||||||
|
# Generic masking of path segments that look like tokens
|
||||||
|
import re
|
||||||
|
masked_path = re.sub(r'/[a-zA-Z0-9]{20,}/', '/***/', parsed.path)
|
||||||
|
|
||||||
|
return urlunparse(parsed._replace(path=masked_path))
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing Strategy
|
||||||
|
|
||||||
|
### 1. Unit Tests
|
||||||
|
|
||||||
|
```python
|
||||||
|
# tests/test_ics_sync.py
|
||||||
|
|
||||||
|
def test_ics_parsing():
|
||||||
|
"""Test ICS file parsing."""
|
||||||
|
ics_content = """BEGIN:VCALENDAR
|
||||||
|
VERSION:2.0
|
||||||
|
BEGIN:VEVENT
|
||||||
|
UID:test-123
|
||||||
|
SUMMARY:Team Meeting
|
||||||
|
LOCATION:https://reflector.monadical.com/engineering
|
||||||
|
DTSTART:20240115T100000Z
|
||||||
|
DTEND:20240115T110000Z
|
||||||
|
END:VEVENT
|
||||||
|
END:VCALENDAR"""
|
||||||
|
|
||||||
|
service = ICSFetchService()
|
||||||
|
calendar = service.parse_ics(ics_content)
|
||||||
|
events = service.extract_room_events(
|
||||||
|
calendar,
|
||||||
|
"https://reflector.monadical.com/engineering"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(events) == 1
|
||||||
|
assert events[0]['title'] == 'Team Meeting'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Integration Tests
|
||||||
|
|
||||||
|
```python
|
||||||
|
def test_full_sync_flow():
|
||||||
|
"""Test complete sync workflow."""
|
||||||
|
# Create room with ICS URL (encrypt URL to protect tokens)
|
||||||
|
encryption = URLEncryption()
|
||||||
|
room = Room(
|
||||||
|
name="test-room",
|
||||||
|
ics_url=encryption.encrypt_url("https://example.com/calendar.ics?token=secret"),
|
||||||
|
ics_enabled=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock ICS fetch
|
||||||
|
with patch('requests.get') as mock_get:
|
||||||
|
mock_get.return_value.text = sample_ics_content
|
||||||
|
|
||||||
|
# Run sync
|
||||||
|
sync_room_calendar(room.id)
|
||||||
|
|
||||||
|
# Verify events created
|
||||||
|
events = CalendarEvent.query.filter_by(room_id=room.id).all()
|
||||||
|
assert len(events) > 0
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common ICS Provider Configurations
|
||||||
|
|
||||||
|
### Google Calendar
|
||||||
|
- URL Format: `https://calendar.google.com/calendar/ical/{calendar_id}/private-{token}/basic.ics`
|
||||||
|
- Authentication via token embedded in URL
|
||||||
|
- Updates every 3-8 hours by default
|
||||||
|
|
||||||
|
### Outlook/Office 365
|
||||||
|
- URL Format: `https://outlook.office365.com/owa/calendar/{id}/calendar.ics`
|
||||||
|
- May include token in URL path or query parameters
|
||||||
|
- Real-time updates
|
||||||
|
|
||||||
|
### Apple iCloud
|
||||||
|
- URL Format: `webcal://p{XX}-caldav.icloud.com/published/2/{token}`
|
||||||
|
- Convert webcal:// to https://
|
||||||
|
- Token embedded in URL path
|
||||||
|
- Public calendars only
|
||||||
|
|
||||||
|
### Nextcloud/ownCloud
|
||||||
|
- URL Format: `https://cloud.example.com/remote.php/dav/public-calendars/{token}`
|
||||||
|
- Token embedded in URL path
|
||||||
|
- Configurable update frequency
|
||||||
|
|
||||||
|
## Migration from CalDAV
|
||||||
|
|
||||||
|
If migrating from an existing CalDAV implementation:
|
||||||
|
|
||||||
|
1. **Database Migration**: Rename fields from `caldav_*` to `ics_*`
|
||||||
|
2. **URL Conversion**: Most CalDAV servers provide ICS export endpoints
|
||||||
|
3. **Authentication**: Convert from username/password to URL-embedded tokens
|
||||||
|
4. **Remove Dependencies**: Uninstall caldav library, add icalendar
|
||||||
|
5. **Update Background Tasks**: Replace CalDAV sync with ICS fetch
|
||||||
|
|
||||||
|
## Performance Optimizations
|
||||||
|
|
||||||
|
1. **Caching**: Use ETag/Last-Modified headers to avoid refetching unchanged calendars
|
||||||
|
2. **Incremental Sync**: Store last sync timestamp, only process new/modified events
|
||||||
|
3. **Batch Processing**: Process multiple room calendars in parallel
|
||||||
|
4. **Connection Pooling**: Reuse HTTP connections for multiple requests
|
||||||
|
5. **Compression**: Support gzip encoding for large ICS files
|
||||||
|
|
||||||
|
## Monitoring and Debugging
|
||||||
|
|
||||||
|
### Metrics to Track
|
||||||
|
- Sync success/failure rate per room
|
||||||
|
- Average sync duration
|
||||||
|
- ICS file sizes
|
||||||
|
- Number of events processed
|
||||||
|
- Failed event matches
|
||||||
|
|
||||||
|
### Debug Logging
|
||||||
|
```python
|
||||||
|
logger.debug(f"Fetching ICS from {room.ics_url}")
|
||||||
|
logger.debug(f"ICS content size: {len(ics_content)} bytes")
|
||||||
|
logger.debug(f"Found {len(events)} matching events")
|
||||||
|
logger.debug(f"Event UIDs: {[e['external_id'] for e in events]}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
1. **SSL Certificate Errors**: Add certificate validation options
|
||||||
|
2. **Timeout Issues**: Increase timeout for large calendars
|
||||||
|
3. **Encoding Problems**: Handle various character encodings
|
||||||
|
4. **Timezone Mismatches**: Always convert to UTC
|
||||||
|
5. **Memory Issues**: Stream large ICS files instead of loading entirely
|
||||||
9
LICENSE
Normal file
9
LICENSE
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 Monadical SAS
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
337
PLAN.md
Normal file
337
PLAN.md
Normal file
@@ -0,0 +1,337 @@
|
|||||||
|
# ICS Calendar Integration Plan
|
||||||
|
|
||||||
|
## Core Concept
|
||||||
|
ICS calendar URLs are attached to rooms (not users) to enable automatic meeting tracking and management through periodic fetching of calendar data.
|
||||||
|
|
||||||
|
## Database Schema Updates
|
||||||
|
|
||||||
|
### 1. Add ICS configuration to rooms
|
||||||
|
- Add `ics_url` field to room table (URL to .ics file, may include auth token)
|
||||||
|
- Add `ics_fetch_interval` field to room table (default: 5 minutes, configurable)
|
||||||
|
- Add `ics_enabled` boolean field to room table
|
||||||
|
- Add `ics_last_sync` timestamp field to room table
|
||||||
|
|
||||||
|
### 2. Create calendar_events table
|
||||||
|
- `id` - UUID primary key
|
||||||
|
- `room_id` - Foreign key to room
|
||||||
|
- `external_id` - ICS event UID
|
||||||
|
- `title` - Event title
|
||||||
|
- `description` - Event description
|
||||||
|
- `start_time` - Event start timestamp
|
||||||
|
- `end_time` - Event end timestamp
|
||||||
|
- `attendees` - JSON field with attendee list and status
|
||||||
|
- `location` - Meeting location (should contain room name)
|
||||||
|
- `last_synced` - Last sync timestamp
|
||||||
|
- `is_deleted` - Boolean flag for soft delete (preserve past events)
|
||||||
|
- `ics_raw_data` - TEXT field to store raw VEVENT data for reference
|
||||||
|
|
||||||
|
### 3. Update meeting table
|
||||||
|
- Add `calendar_event_id` - Foreign key to calendar_events
|
||||||
|
- Add `calendar_metadata` - JSON field for additional calendar data
|
||||||
|
- Remove unique constraint on room_id + active status (allow multiple active meetings per room)
|
||||||
|
|
||||||
|
## Backend Implementation
|
||||||
|
|
||||||
|
### 1. ICS Sync Service
|
||||||
|
- Create background task that runs based on room's `ics_fetch_interval` (default: 5 minutes)
|
||||||
|
- For each room with ICS enabled, fetch the .ics file via HTTP/HTTPS
|
||||||
|
- Parse ICS file using icalendar library
|
||||||
|
- Extract VEVENT components and filter events looking for room URL (e.g., "https://reflector.monadical.com/max")
|
||||||
|
- Store matching events in calendar_events table
|
||||||
|
- Mark events as "upcoming" if start_time is within next 30 minutes
|
||||||
|
- Pre-create Whereby meetings 1 minute before start (ensures no delay when users join)
|
||||||
|
- Soft-delete future events that were removed from calendar (set is_deleted=true)
|
||||||
|
- Never delete past events (preserve for historical record)
|
||||||
|
- Support authenticated ICS feeds via tokens embedded in URL
|
||||||
|
|
||||||
|
### 2. Meeting Management Updates
|
||||||
|
- Allow multiple active meetings per room
|
||||||
|
- Pre-create meeting record 1 minute before calendar event starts (ensures meeting is ready)
|
||||||
|
- Link meeting to calendar_event for metadata
|
||||||
|
- Keep meeting active for 15 minutes after last participant leaves (grace period)
|
||||||
|
- Don't auto-close if new participant joins within grace period
|
||||||
|
|
||||||
|
### 3. API Endpoints
|
||||||
|
- `GET /v1/rooms/{room_name}/meetings` - List all active and upcoming meetings for a room
|
||||||
|
- Returns filtered data based on user role (owner vs participant)
|
||||||
|
- `GET /v1/rooms/{room_name}/meetings/upcoming` - List upcoming meetings (next 30 min)
|
||||||
|
- Returns filtered data based on user role
|
||||||
|
- `POST /v1/rooms/{room_name}/meetings/{meeting_id}/join` - Join specific meeting
|
||||||
|
- `PATCH /v1/rooms/{room_id}` - Update room settings (including ICS configuration)
|
||||||
|
- ICS fields only visible/editable by room owner
|
||||||
|
- `POST /v1/rooms/{room_name}/ics/sync` - Trigger manual ICS sync
|
||||||
|
- Only accessible by room owner
|
||||||
|
- `GET /v1/rooms/{room_name}/ics/status` - Get ICS sync status and last fetch time
|
||||||
|
- Only accessible by room owner
|
||||||
|
|
||||||
|
## Frontend Implementation
|
||||||
|
|
||||||
|
### 1. Room Settings Page
|
||||||
|
- Add ICS configuration section
|
||||||
|
- Field for ICS URL (e.g., Google Calendar private URL, Outlook ICS export)
|
||||||
|
- Field for fetch interval (dropdown: 1 min, 5 min, 10 min, 30 min, 1 hour)
|
||||||
|
- Test connection button (validates ICS file can be fetched and parsed)
|
||||||
|
- Manual sync button
|
||||||
|
- Show last sync time and next scheduled sync
|
||||||
|
|
||||||
|
### 2. Meeting Selection Page (New)
|
||||||
|
- Show when accessing `/room/{room_name}`
|
||||||
|
- **Host view** (room owner):
|
||||||
|
- Full calendar event details
|
||||||
|
- Meeting title and description
|
||||||
|
- Complete attendee list with RSVP status
|
||||||
|
- Number of current participants
|
||||||
|
- Duration (how long it's been running)
|
||||||
|
- **Participant view** (non-owners):
|
||||||
|
- Meeting title only
|
||||||
|
- Date and time
|
||||||
|
- Number of current participants
|
||||||
|
- Duration (how long it's been running)
|
||||||
|
- No attendee list or description (privacy)
|
||||||
|
- Display upcoming meetings (visible 30min before):
|
||||||
|
- Show countdown to start
|
||||||
|
- Can click to join early → redirected to waiting page
|
||||||
|
- Waiting page shows countdown until meeting starts
|
||||||
|
- Meeting pre-created by background task (ready when users arrive)
|
||||||
|
- Option to create unscheduled meeting (uses existing flow)
|
||||||
|
|
||||||
|
### 3. Meeting Room Updates
|
||||||
|
- Show calendar metadata in meeting info
|
||||||
|
- Display invited attendees vs actual participants
|
||||||
|
- Show meeting title from calendar event
|
||||||
|
|
||||||
|
## Meeting Lifecycle
|
||||||
|
|
||||||
|
### 1. Meeting Creation
|
||||||
|
- Automatic: Pre-created 1 minute before calendar event starts (ensures Whereby room is ready)
|
||||||
|
- Manual: User creates unscheduled meeting (existing `/rooms/{room_name}/meeting` endpoint)
|
||||||
|
- Background task handles pre-creation to avoid delays when users join
|
||||||
|
|
||||||
|
### 2. Meeting Join Rules
|
||||||
|
- Can join active meetings immediately
|
||||||
|
- Can see upcoming meetings 30 minutes before start
|
||||||
|
- Can click to join upcoming meetings early → sent to waiting page
|
||||||
|
- Waiting page automatically transitions to meeting at scheduled time
|
||||||
|
- Unscheduled meetings always joinable (current behavior)
|
||||||
|
|
||||||
|
### 3. Meeting Closure Rules
|
||||||
|
- All meetings: 15-minute grace period after last participant leaves
|
||||||
|
- If participant rejoins within grace period, keep meeting active
|
||||||
|
- Calendar meetings: Force close 30 minutes after scheduled end time
|
||||||
|
- Unscheduled meetings: Keep active for 8 hours (current behavior)
|
||||||
|
|
||||||
|
## ICS Parsing Logic
|
||||||
|
|
||||||
|
### 1. Event Matching
|
||||||
|
- Parse ICS file using Python icalendar library
|
||||||
|
- Iterate through VEVENT components
|
||||||
|
- Check LOCATION field for full FQDN URL (e.g., "https://reflector.monadical.com/max")
|
||||||
|
- Check DESCRIPTION for room URL or mention
|
||||||
|
- Support multiple formats:
|
||||||
|
- Full URL: "https://reflector.monadical.com/max"
|
||||||
|
- With /room path: "https://reflector.monadical.com/room/max"
|
||||||
|
- Partial paths: "room/max", "/max room"
|
||||||
|
|
||||||
|
### 2. Attendee Extraction
|
||||||
|
- Parse ATTENDEE properties from VEVENT
|
||||||
|
- Extract email (MAILTO), name (CN parameter), and RSVP status (PARTSTAT)
|
||||||
|
- Store as JSON in calendar_events.attendees
|
||||||
|
|
||||||
|
### 3. Sync Strategy
|
||||||
|
- Fetch complete ICS file (contains all events)
|
||||||
|
- Filter events from (now - 1 hour) to (now + 24 hours) for processing
|
||||||
|
- Update existing events if LAST-MODIFIED or SEQUENCE changed
|
||||||
|
- Delete future events that no longer exist in ICS (start_time > now)
|
||||||
|
- Keep past events for historical record (never delete if start_time < now)
|
||||||
|
- Handle recurring events (RRULE) - expand to individual instances
|
||||||
|
- Track deleted calendar events to clean up future meetings
|
||||||
|
- Cache ICS file hash to detect changes and skip unnecessary processing
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
### 1. ICS URL Security
|
||||||
|
- ICS URLs may contain authentication tokens (e.g., Google Calendar private URLs)
|
||||||
|
- Store full ICS URLs encrypted using Fernet to protect embedded tokens
|
||||||
|
- Validate ICS URLs (must be HTTPS for production)
|
||||||
|
- Never expose full ICS URLs in API responses (return masked version)
|
||||||
|
- Rate limit ICS fetching to prevent abuse
|
||||||
|
|
||||||
|
### 2. Room Access
|
||||||
|
- Only room owner can configure ICS URL
|
||||||
|
- ICS URL shown as masked version to room owner (hides embedded tokens)
|
||||||
|
- ICS settings not visible to other users
|
||||||
|
- Meeting list visible to all room participants
|
||||||
|
- ICS fetch logs only visible to room owner
|
||||||
|
|
||||||
|
### 3. Meeting Privacy
|
||||||
|
- Full calendar details visible only to room owner
|
||||||
|
- Participants see limited info: title, date/time only
|
||||||
|
- Attendee list and description hidden from non-owners
|
||||||
|
- Meeting titles visible in room listing to all
|
||||||
|
|
||||||
|
## Implementation Phases
|
||||||
|
|
||||||
|
### Phase 1: Database and ICS Setup (Week 1) ✅ COMPLETED (2025-08-18)
|
||||||
|
1. ✅ Created database migrations for ICS fields and calendar_events table
|
||||||
|
- Added ics_url, ics_fetch_interval, ics_enabled, ics_last_sync, ics_last_etag to room table
|
||||||
|
- Created calendar_event table with ics_uid (instead of external_id) and proper typing
|
||||||
|
- Added calendar_event_id and calendar_metadata (JSONB) to meeting table
|
||||||
|
- Removed server_default from datetime fields for consistency
|
||||||
|
2. ✅ Installed icalendar Python library for ICS parsing
|
||||||
|
- Added icalendar>=6.0.0 to dependencies
|
||||||
|
- No encryption needed - ICS URLs are read-only
|
||||||
|
3. ✅ Built ICS fetch and sync service
|
||||||
|
- Simple HTTP fetching without unnecessary validation
|
||||||
|
- Proper TypedDict typing for event data structures
|
||||||
|
- Supports any standard ICS format
|
||||||
|
- Event matching on full room URL only
|
||||||
|
4. ✅ API endpoints for ICS configuration
|
||||||
|
- Room model updated to support ICS fields via existing PATCH endpoint
|
||||||
|
- POST /v1/rooms/{room_name}/ics/sync - Trigger manual sync (owner only)
|
||||||
|
- GET /v1/rooms/{room_name}/ics/status - Get sync status (owner only)
|
||||||
|
- GET /v1/rooms/{room_name}/meetings - List meetings with privacy controls
|
||||||
|
- GET /v1/rooms/{room_name}/meetings/upcoming - List upcoming meetings
|
||||||
|
5. ✅ Celery background tasks for periodic sync
|
||||||
|
- sync_room_ics - Sync individual room calendar
|
||||||
|
- sync_all_ics_calendars - Check all rooms and queue sync based on fetch intervals
|
||||||
|
- pre_create_upcoming_meetings - Pre-create Whereby meetings 1 minute before start
|
||||||
|
- Tasks scheduled in beat schedule (every minute for checking, respects individual intervals)
|
||||||
|
6. ✅ Tests written and passing
|
||||||
|
- 6 tests for Room ICS fields
|
||||||
|
- 7 tests for CalendarEvent model
|
||||||
|
- 7 tests for ICS sync service
|
||||||
|
- 11 tests for API endpoints
|
||||||
|
- 6 tests for background tasks
|
||||||
|
- All 31 ICS-related tests passing
|
||||||
|
|
||||||
|
### Phase 2: Meeting Management (Week 2) ✅ COMPLETED (2025-08-19)
|
||||||
|
1. ✅ Updated meeting lifecycle logic with grace period support
|
||||||
|
- 15-minute grace period after last participant leaves
|
||||||
|
- Automatic reactivation when participants rejoin
|
||||||
|
- Force close calendar meetings 30 minutes after scheduled end
|
||||||
|
2. ✅ Support multiple active meetings per room
|
||||||
|
- Removed unique constraint on active meetings
|
||||||
|
- Added get_all_active_for_room() method
|
||||||
|
- Added get_active_by_calendar_event() method
|
||||||
|
3. ✅ Implemented grace period logic
|
||||||
|
- Added last_participant_left_at and grace_period_minutes fields
|
||||||
|
- Process meetings task handles grace period checking
|
||||||
|
- Whereby webhooks clear grace period on participant join
|
||||||
|
4. ✅ Link meetings to calendar events
|
||||||
|
- Pre-created meetings properly linked via calendar_event_id
|
||||||
|
- Calendar metadata stored with meeting
|
||||||
|
- API endpoints for listing and joining specific meetings
|
||||||
|
|
||||||
|
### Phase 3: Frontend Meeting Selection (Week 3)
|
||||||
|
1. Build meeting selection page
|
||||||
|
2. Show active and upcoming meetings
|
||||||
|
3. Implement waiting page for early joiners
|
||||||
|
4. Add automatic transition from waiting to meeting
|
||||||
|
5. Support unscheduled meeting creation
|
||||||
|
|
||||||
|
### Phase 4: Calendar Integration UI (Week 4)
|
||||||
|
1. Add ICS settings to room configuration
|
||||||
|
2. Display calendar metadata in meetings
|
||||||
|
3. Show attendee information
|
||||||
|
4. Add sync status indicators
|
||||||
|
5. Show fetch interval and next sync time
|
||||||
|
|
||||||
|
## Success Metrics
|
||||||
|
- Zero merged meetings from consecutive calendar events
|
||||||
|
- Successful ICS sync from major providers (Google Calendar, Outlook, Apple Calendar, Nextcloud)
|
||||||
|
- Meeting join accuracy: correct meeting 100% of the time
|
||||||
|
- Grace period prevents 90% of accidental meeting closures
|
||||||
|
- Configurable fetch intervals reduce unnecessary API calls
|
||||||
|
|
||||||
|
## Design Decisions
|
||||||
|
1. **ICS attached to room, not user** - Prevents duplicate meetings from multiple calendars
|
||||||
|
2. **Multiple active meetings per room** - Supported with meeting selection page
|
||||||
|
3. **Grace period for rejoining** - 15 minutes after last participant leaves
|
||||||
|
4. **Upcoming meeting visibility** - Show 30 minutes before, join only on time
|
||||||
|
5. **Calendar data storage** - Attached to meeting record for full context
|
||||||
|
6. **No "ad-hoc" meetings** - Use existing meeting creation flow (unscheduled meetings)
|
||||||
|
7. **ICS configuration via room PATCH** - Reuse existing room configuration endpoint
|
||||||
|
8. **Event deletion handling** - Soft-delete future events, preserve past meetings
|
||||||
|
9. **Configurable fetch interval** - Balance between freshness and server load
|
||||||
|
10. **ICS over CalDAV** - Simpler implementation, wider compatibility, no complex auth
|
||||||
|
|
||||||
|
## Phase 2 Implementation Files
|
||||||
|
|
||||||
|
### Database Migrations
|
||||||
|
- `/server/migrations/versions/6025e9b2bef2_remove_one_active_meeting_per_room_.py` - Remove unique constraint
|
||||||
|
- `/server/migrations/versions/d4a1c446458c_add_grace_period_fields_to_meeting.py` - Add grace period fields
|
||||||
|
|
||||||
|
### Updated Models
|
||||||
|
- `/server/reflector/db/meetings.py` - Added grace period fields and new query methods
|
||||||
|
|
||||||
|
### Updated Services
|
||||||
|
- `/server/reflector/worker/process.py` - Enhanced with grace period logic and multiple meeting support
|
||||||
|
|
||||||
|
### Updated API
|
||||||
|
- `/server/reflector/views/rooms.py` - Added endpoints for listing active meetings and joining specific meetings
|
||||||
|
- `/server/reflector/views/whereby.py` - Clear grace period on participant join
|
||||||
|
|
||||||
|
### Tests
|
||||||
|
- `/server/tests/test_multiple_active_meetings.py` - Comprehensive tests for Phase 2 features (5 tests)
|
||||||
|
|
||||||
|
## Phase 1 Implementation Files Created
|
||||||
|
|
||||||
|
### Database Models
|
||||||
|
- `/server/reflector/db/rooms.py` - Updated with ICS fields (url, fetch_interval, enabled, last_sync, etag)
|
||||||
|
- `/server/reflector/db/calendar_events.py` - New CalendarEvent model with ics_uid and proper typing
|
||||||
|
- `/server/reflector/db/meetings.py` - Updated with calendar_event_id and calendar_metadata (JSONB)
|
||||||
|
|
||||||
|
### Services
|
||||||
|
- `/server/reflector/services/ics_sync.py` - ICS fetching and parsing with TypedDict for proper typing
|
||||||
|
|
||||||
|
### API Endpoints
|
||||||
|
- `/server/reflector/views/rooms.py` - Added ICS management endpoints with privacy controls
|
||||||
|
|
||||||
|
### Background Tasks
|
||||||
|
- `/server/reflector/worker/ics_sync.py` - Celery tasks for automatic periodic sync
|
||||||
|
- `/server/reflector/worker/app.py` - Updated beat schedule for ICS tasks
|
||||||
|
|
||||||
|
### Tests
|
||||||
|
- `/server/tests/test_room_ics.py` - Room model ICS fields tests (6 tests)
|
||||||
|
- `/server/tests/test_calendar_event.py` - CalendarEvent model tests (7 tests)
|
||||||
|
- `/server/tests/test_ics_sync.py` - ICS sync service tests (7 tests)
|
||||||
|
- `/server/tests/test_room_ics_api.py` - API endpoint tests (11 tests)
|
||||||
|
- `/server/tests/test_ics_background_tasks.py` - Background task tests (6 tests)
|
||||||
|
|
||||||
|
### Key Design Decisions
|
||||||
|
- No encryption needed - ICS URLs are read-only access
|
||||||
|
- Using ics_uid instead of external_id for clarity
|
||||||
|
- Proper TypedDict typing for event data structures
|
||||||
|
- Removed unnecessary URL validation and webcal handling
|
||||||
|
- calendar_metadata in meetings stores flexible calendar data (organizer, recurrence, etc)
|
||||||
|
- Background tasks query all rooms directly to avoid filtering issues
|
||||||
|
- Sync intervals respected per-room configuration
|
||||||
|
|
||||||
|
## Implementation Approach
|
||||||
|
|
||||||
|
### ICS Fetching vs CalDAV
|
||||||
|
- **ICS Benefits**:
|
||||||
|
- Simpler implementation (HTTP GET vs CalDAV protocol)
|
||||||
|
- Wider compatibility (all calendar apps can export ICS)
|
||||||
|
- No authentication complexity (simple URL with optional token)
|
||||||
|
- Easier debugging (ICS is plain text)
|
||||||
|
- Lower server requirements (no CalDAV library dependencies)
|
||||||
|
|
||||||
|
### Supported Calendar Providers
|
||||||
|
1. **Google Calendar**: Private ICS URL from calendar settings
|
||||||
|
2. **Outlook/Office 365**: ICS export URL from calendar sharing
|
||||||
|
3. **Apple Calendar**: Published calendar ICS URL
|
||||||
|
4. **Nextcloud**: Public/private calendar ICS export
|
||||||
|
5. **Any CalDAV server**: Via ICS export endpoint
|
||||||
|
|
||||||
|
### ICS URL Examples
|
||||||
|
- Google: `https://calendar.google.com/calendar/ical/{calendar_id}/private-{token}/basic.ics`
|
||||||
|
- Outlook: `https://outlook.live.com/owa/calendar/{id}/calendar.ics`
|
||||||
|
- Custom: `https://example.com/calendars/room-schedule.ics`
|
||||||
|
|
||||||
|
### Fetch Interval Configuration
|
||||||
|
- 1 minute: For critical/high-activity rooms
|
||||||
|
- 5 minutes (default): Balance of freshness and efficiency
|
||||||
|
- 10 minutes: Standard meeting rooms
|
||||||
|
- 30 minutes: Low-activity rooms
|
||||||
|
- 1 hour: Rarely-used rooms or stable schedules
|
||||||
206
README.md
206
README.md
@@ -1,48 +1,52 @@
|
|||||||
|
<div align="center">
|
||||||
|
|
||||||
# Reflector
|
# Reflector
|
||||||
|
|
||||||
Reflector Audio Management and Analysis is a cutting-edge web application under development by Monadical. It utilizes AI to record meetings, providing a permanent record with transcripts, translations, and automated summaries.
|
Reflector Audio Management and Analysis is a cutting-edge web application under development by Monadical. It utilizes AI to record meetings, providing a permanent record with transcripts, translations, and automated summaries.
|
||||||
|
|
||||||
|
[](https://github.com/monadical-sas/reflector/actions/workflows/pytests.yml)
|
||||||
|
[](https://opensource.org/licenses/MIT)
|
||||||
|
</div>
|
||||||
|
|
||||||
|
## Screenshots
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<a href="https://github.com/user-attachments/assets/3a976930-56c1-47ef-8c76-55d3864309e3">
|
||||||
|
<img width="700" alt="image" src="https://github.com/user-attachments/assets/3a976930-56c1-47ef-8c76-55d3864309e3" />
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a href="https://github.com/user-attachments/assets/bfe3bde3-08af-4426-a9a1-11ad5cd63b33">
|
||||||
|
<img width="700" alt="image" src="https://github.com/user-attachments/assets/bfe3bde3-08af-4426-a9a1-11ad5cd63b33" />
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a href="https://github.com/user-attachments/assets/7b60c9d0-efe4-474f-a27b-ea13bd0fabdc">
|
||||||
|
<img width="700" alt="image" src="https://github.com/user-attachments/assets/7b60c9d0-efe4-474f-a27b-ea13bd0fabdc" />
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
The project architecture consists of three primary components:
|
The project architecture consists of three primary components:
|
||||||
|
|
||||||
- **Front-End**: NextJS React project hosted on Vercel, located in `www/`.
|
- **Front-End**: NextJS React project hosted on Vercel, located in `www/`.
|
||||||
- **Back-End**: Python server that offers an API and data persistence, found in `server/`.
|
- **Back-End**: Python server that offers an API and data persistence, found in `server/`.
|
||||||
- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations. Most reliable option is Modal deployment
|
- **GPU implementation**: Providing services such as speech-to-text transcription, topic generation, automated summaries, and translations. Most reliable option is Modal deployment
|
||||||
|
|
||||||
It also uses https://github.com/fief-dev for authentication, and Vercel for deployment and configuration of the front-end.
|
It also uses authentik for authentication if activated, and Vercel for deployment and configuration of the front-end.
|
||||||
|
|
||||||
## Table of Contents
|
## Contribution Guidelines
|
||||||
|
|
||||||
- [Reflector](#reflector)
|
All new contributions should be made in a separate branch, and goes through a Pull Request.
|
||||||
- [Table of Contents](#table-of-contents)
|
[Conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) must be used for the PR title and commits.
|
||||||
- [Miscellaneous](#miscellaneous)
|
|
||||||
- [Contribution Guidelines](#contribution-guidelines)
|
|
||||||
- [How to Install Blackhole (Mac Only)](#how-to-install-blackhole-mac-only)
|
|
||||||
- [Front-End](#front-end)
|
|
||||||
- [Installation](#installation)
|
|
||||||
- [Run the Application](#run-the-application)
|
|
||||||
- [OpenAPI Code Generation](#openapi-code-generation)
|
|
||||||
- [Back-End](#back-end)
|
|
||||||
- [Installation](#installation-1)
|
|
||||||
- [Start the API/Backend](#start-the-apibackend)
|
|
||||||
- [Redis (Mac)](#redis-mac)
|
|
||||||
- [Redis (Windows)](#redis-windows)
|
|
||||||
- [Update the database schema (run on first install, and after each pull containing a migration)](#update-the-database-schema-run-on-first-install-and-after-each-pull-containing-a-migration)
|
|
||||||
- [Main Server](#main-server)
|
|
||||||
- [Crontab (optional)](#crontab-optional)
|
|
||||||
- [Using docker](#using-docker)
|
|
||||||
- [Using local GPT4All](#using-local-gpt4all)
|
|
||||||
- [Using local files](#using-local-files)
|
|
||||||
- [AI Models](#ai-models)
|
|
||||||
|
|
||||||
## Miscellaneous
|
## Usage
|
||||||
|
|
||||||
### Contribution Guidelines
|
To record both your voice and the meeting you're taking part in, you need:
|
||||||
|
|
||||||
All new contributions should be made in a separate branch. Before any code is merged into `main`, it requires a code review.
|
|
||||||
|
|
||||||
### Usage instructions
|
|
||||||
|
|
||||||
To record both your voice and the meeting you're taking part in, you need :
|
|
||||||
|
|
||||||
- For an in-person meeting, make sure your microphone is in range of all participants.
|
- For an in-person meeting, make sure your microphone is in range of all participants.
|
||||||
- If using several microphones, make sure to merge the audio feeds into one with an external tool.
|
- If using several microphones, make sure to merge the audio feeds into one with an external tool.
|
||||||
@@ -66,156 +70,82 @@ Note: We currently do not have instructions for Windows users.
|
|||||||
- Then goto `System Preferences -> Sound` and choose the devices created from the Output and Input tabs.
|
- Then goto `System Preferences -> Sound` and choose the devices created from the Output and Input tabs.
|
||||||
- The input from your local microphone, the browser run meeting should be aggregated into one virtual stream to listen to and the output should be fed back to your specified output devices if everything is configured properly.
|
- The input from your local microphone, the browser run meeting should be aggregated into one virtual stream to listen to and the output should be fed back to your specified output devices if everything is configured properly.
|
||||||
|
|
||||||
## Front-End
|
## Installation
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
|
||||||
Start with `cd www`.
|
Start with `cd www`.
|
||||||
|
|
||||||
### Installation
|
**Installation**
|
||||||
|
|
||||||
To install the application, run:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
yarn install
|
pnpm install
|
||||||
cp .env_template .env
|
cp .env_template .env
|
||||||
cp config-template.ts config.ts
|
cp config-template.ts config.ts
|
||||||
```
|
```
|
||||||
|
|
||||||
Then, fill in the environment variables in `.env` and the configuration in `config.ts` as needed. If you are unsure on how to proceed, ask in Zulip.
|
Then, fill in the environment variables in `.env` and the configuration in `config.ts` as needed. If you are unsure on how to proceed, ask in Zulip.
|
||||||
|
|
||||||
### Run the Application
|
**Run in development mode**
|
||||||
|
|
||||||
To run the application in development mode, run:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
yarn dev
|
pnpm dev
|
||||||
```
|
```
|
||||||
|
|
||||||
Then (after completing server setup and starting it) open [http://localhost:3000](http://localhost:3000) to view it in the browser.
|
Then (after completing server setup and starting it) open [http://localhost:3000](http://localhost:3000) to view it in the browser.
|
||||||
|
|
||||||
### OpenAPI Code Generation
|
**OpenAPI Code Generation**
|
||||||
|
|
||||||
To generate the TypeScript files from the openapi.json file, make sure the python server is running, then run:
|
To generate the TypeScript files from the openapi.json file, make sure the python server is running, then run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
yarn openapi
|
pnpm openapi
|
||||||
```
|
```
|
||||||
|
|
||||||
## Back-End
|
### Backend
|
||||||
|
|
||||||
Start with `cd server`.
|
Start with `cd server`.
|
||||||
|
|
||||||
### Quick-run instructions (only if you installed everything already)
|
**Run in development mode**
|
||||||
|
|
||||||
```bash
|
|
||||||
redis-server # Mac
|
|
||||||
docker compose up -d redis # Windows
|
|
||||||
poetry run celery -A reflector.worker.app worker --loglevel=info
|
|
||||||
poetry run python -m reflector.app
|
|
||||||
```
|
|
||||||
|
|
||||||
### Installation
|
|
||||||
|
|
||||||
Download [Python 3.11 from the official website](https://www.python.org/downloads/) and ensure you have version 3.11 by running `python --version`.
|
|
||||||
|
|
||||||
Run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python --version # It should say 3.11
|
|
||||||
pip install poetry
|
|
||||||
poetry install --no-root
|
|
||||||
cp .env_template .env
|
|
||||||
```
|
|
||||||
|
|
||||||
Then fill `.env` with the omitted values (ask in Zulip). At the moment of this writing, the only value omitted is `AUTH_FIEF_CLIENT_SECRET`.
|
|
||||||
|
|
||||||
### Start the API/Backend
|
|
||||||
|
|
||||||
Start the background worker:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
poetry run celery -A reflector.worker.app worker --loglevel=info
|
|
||||||
```
|
|
||||||
|
|
||||||
### Redis (Mac)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
yarn add redis
|
|
||||||
poetry run celery -A reflector.worker.app worker --loglevel=info
|
|
||||||
redis-server
|
|
||||||
```
|
|
||||||
|
|
||||||
### Redis (Windows)
|
|
||||||
|
|
||||||
**Option 1**
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose up -d redis
|
docker compose up -d redis
|
||||||
|
|
||||||
|
# on the first run, or if the schemas changed
|
||||||
|
uv run alembic upgrade head
|
||||||
|
|
||||||
|
# start the worker
|
||||||
|
uv run celery -A reflector.worker.app worker --loglevel=info
|
||||||
|
|
||||||
|
# start the app
|
||||||
|
uv run -m reflector.app --reload
|
||||||
```
|
```
|
||||||
|
|
||||||
**Option 2**
|
Then fill `.env` with the omitted values (ask in Zulip).
|
||||||
|
|
||||||
Install:
|
**Crontab (optional)**
|
||||||
|
|
||||||
- [Git for Windows](https://gitforwindows.org/)
|
|
||||||
- [Windows Subsystem for Linux (WSL)](https://docs.microsoft.com/en-us/windows/wsl/install)
|
|
||||||
- Install your preferred Linux distribution via the Microsoft Store (e.g., Ubuntu).
|
|
||||||
|
|
||||||
Open your Linux distribution and update the package list:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install redis-server
|
|
||||||
redis-server
|
|
||||||
```
|
|
||||||
|
|
||||||
## Update the database schema (run on first install, and after each pull containing a migration)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
poetry run alembic heads
|
|
||||||
```
|
|
||||||
|
|
||||||
## Main Server
|
|
||||||
|
|
||||||
```bash
|
|
||||||
poetry run python -m reflector.app
|
|
||||||
```
|
|
||||||
|
|
||||||
### Crontab (optional)
|
|
||||||
|
|
||||||
For crontab (only healthcheck for now), start the celery beat (you don't need it on your local dev environment):
|
For crontab (only healthcheck for now), start the celery beat (you don't need it on your local dev environment):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry run celery -A reflector.worker.app beat
|
uv run celery -A reflector.worker.app beat
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Using docker
|
### GPU models
|
||||||
|
|
||||||
Use:
|
Currently, reflector heavily use custom local models, deployed on modal. All the micro services are available in server/gpu/
|
||||||
|
|
||||||
```bash
|
To deploy llm changes to modal, you need:
|
||||||
docker-compose up server
|
|
||||||
```
|
|
||||||
|
|
||||||
### Using local GPT4All
|
|
||||||
|
|
||||||
- Start GPT4All with any model you want
|
|
||||||
- Ensure the API server is activated in GPT4all
|
|
||||||
- Run with: `LLM_BACKEND=openai LLM_URL=http://localhost:4891/v1/completions LLM_OPENAI_MODEL="GPT4All Falcon" python -m reflector.app`
|
|
||||||
|
|
||||||
### Using local files
|
|
||||||
|
|
||||||
```
|
|
||||||
poetry run python -m reflector.tools.process path/to/audio.wav
|
|
||||||
```
|
|
||||||
|
|
||||||
## AI Models
|
|
||||||
|
|
||||||
### Modal
|
|
||||||
To deploy llm changes to modal, you need.
|
|
||||||
- a modal account
|
- a modal account
|
||||||
- set up the required secret in your modal account (REFLECTOR_GPU_APIKEY)
|
- set up the required secret in your modal account (REFLECTOR_GPU_APIKEY)
|
||||||
- install the modal cli
|
- install the modal cli
|
||||||
- connect your modal cli to your account if not done previously
|
- connect your modal cli to your account if not done previously
|
||||||
- `modal run path/to/required/llm`
|
- `modal run path/to/required/llm`
|
||||||
|
|
||||||
_(Documentation for this section is pending.)_
|
## Using local files
|
||||||
|
|
||||||
|
You can manually process an audio file by calling the process tool:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run python -m reflector.tools.process path/to/audio.wav
|
||||||
|
```
|
||||||
|
|||||||
18
compose.yml
18
compose.yml
@@ -39,10 +39,26 @@ services:
|
|||||||
image: node:18
|
image: node:18
|
||||||
ports:
|
ports:
|
||||||
- "3000:3000"
|
- "3000:3000"
|
||||||
command: sh -c "yarn install && yarn dev"
|
command: sh -c "corepack enable && pnpm install && pnpm dev"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
working_dir: /app
|
working_dir: /app
|
||||||
volumes:
|
volumes:
|
||||||
- ./www:/app/
|
- ./www:/app/
|
||||||
|
- /app/node_modules
|
||||||
env_file:
|
env_file:
|
||||||
- ./www/.env.local
|
- ./www/.env.local
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
image: postgres:17
|
||||||
|
ports:
|
||||||
|
- 5432:5432
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: reflector
|
||||||
|
POSTGRES_PASSWORD: reflector
|
||||||
|
POSTGRES_DB: reflector
|
||||||
|
volumes:
|
||||||
|
- ./data/postgres:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
attachable: true
|
||||||
|
|||||||
@@ -1,21 +0,0 @@
|
|||||||
TRANSCRIPT_BACKEND=modal
|
|
||||||
TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
|
|
||||||
TRANSCRIPT_MODAL_API_KEY=***REMOVED***
|
|
||||||
|
|
||||||
LLM_BACKEND=modal
|
|
||||||
LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
|
|
||||||
LLM_MODAL_API_KEY=***REMOVED***
|
|
||||||
|
|
||||||
AUTH_BACKEND=fief
|
|
||||||
AUTH_FIEF_URL=https://auth.reflector.media/reflector-local
|
|
||||||
AUTH_FIEF_CLIENT_ID=***REMOVED***
|
|
||||||
AUTH_FIEF_CLIENT_SECRET=<ask in zulip> <-----------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
|
|
||||||
ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
|
|
||||||
DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
|
|
||||||
|
|
||||||
BASE_URL=https://xxxxx.ngrok.app
|
|
||||||
DIARIZATION_ENABLED=false
|
|
||||||
|
|
||||||
SQS_POLLING_TIMEOUT_SECONDS=60
|
|
||||||
1
server/.gitignore
vendored
1
server/.gitignore
vendored
@@ -180,3 +180,4 @@ reflector.sqlite3
|
|||||||
data/
|
data/
|
||||||
|
|
||||||
dump.rdb
|
dump.rdb
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
3.11.6
|
3.12
|
||||||
|
|||||||
@@ -1,30 +1,29 @@
|
|||||||
FROM python:3.11-slim as base
|
FROM python:3.12-slim
|
||||||
|
|
||||||
ENV PIP_DEFAULT_TIMEOUT=100 \
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
UV_LINK_MODE=copy
|
||||||
PIP_NO_CACHE_DIR=1 \
|
|
||||||
PYTHONDONTWRITEBYTECODE=1 \
|
|
||||||
PYTHONUNBUFFERED=1 \
|
|
||||||
POETRY_VERSION=1.3.1
|
|
||||||
|
|
||||||
# builder install base dependencies
|
# builder install base dependencies
|
||||||
FROM base AS builder
|
|
||||||
WORKDIR /tmp
|
WORKDIR /tmp
|
||||||
RUN pip install "poetry==$POETRY_VERSION"
|
RUN apt-get update && apt-get install -y curl && apt-get clean
|
||||||
RUN python -m venv /venv
|
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
||||||
|
RUN sh /uv-installer.sh && rm /uv-installer.sh
|
||||||
|
ENV PATH="/root/.local/bin/:$PATH"
|
||||||
|
|
||||||
# install application dependencies
|
# install application dependencies
|
||||||
COPY pyproject.toml poetry.lock /tmp
|
RUN mkdir -p /app
|
||||||
RUN . /venv/bin/activate && poetry config virtualenvs.create false
|
WORKDIR /app
|
||||||
RUN . /venv/bin/activate && poetry install --only main,aws --no-root --no-interaction --no-ansi
|
COPY pyproject.toml uv.lock /app/
|
||||||
|
RUN touch README.md && env uv sync --compile-bytecode --locked
|
||||||
|
|
||||||
|
# pre-download nltk packages
|
||||||
|
RUN uv run python -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')"
|
||||||
|
|
||||||
# bootstrap
|
# bootstrap
|
||||||
FROM base AS final
|
|
||||||
COPY --from=builder /venv /venv
|
|
||||||
RUN mkdir -p /app
|
|
||||||
COPY reflector /app/reflector
|
|
||||||
COPY migrations /app/migrations
|
|
||||||
COPY images /app/images
|
|
||||||
COPY alembic.ini runserver.sh /app/
|
COPY alembic.ini runserver.sh /app/
|
||||||
|
COPY images /app/images
|
||||||
|
COPY migrations /app/migrations
|
||||||
|
COPY reflector /app/reflector
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
CMD ["./runserver.sh"]
|
CMD ["./runserver.sh"]
|
||||||
|
|||||||
@@ -20,3 +20,23 @@ Polls SQS every 60 seconds via /server/reflector/worker/process.py:24-62:
|
|||||||
# Every 60 seconds, check for new recordings
|
# Every 60 seconds, check for new recordings
|
||||||
sqs = boto3.client("sqs", ...)
|
sqs = boto3.client("sqs", ...)
|
||||||
response = sqs.receive_message(QueueUrl=queue_url, ...)
|
response = sqs.receive_message(QueueUrl=queue_url, ...)
|
||||||
|
|
||||||
|
# Requeue
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run /app/requeue_uploaded_file.py TRANSCRIPT_ID
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pipeline Management
|
||||||
|
|
||||||
|
### Continue stuck pipeline from final summaries (identify_participants) step:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run python -c "from reflector.pipelines.main_live_pipeline import task_pipeline_final_summaries; result = task_pipeline_final_summaries.delay(transcript_id='TRANSCRIPT_ID'); print(f'Task queued: {result.id}')"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run full post-processing pipeline (continues to completion):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run python -c "from reflector.pipelines.main_live_pipeline import pipeline_post; pipeline_post(transcript_id='TRANSCRIPT_ID')"
|
||||||
|
```
|
||||||
|
|||||||
@@ -7,11 +7,9 @@
|
|||||||
## User authentication
|
## User authentication
|
||||||
## =======================================================
|
## =======================================================
|
||||||
|
|
||||||
## Using fief (fief.dev)
|
## Using jwt/authentik
|
||||||
AUTH_BACKEND=fief
|
AUTH_BACKEND=jwt
|
||||||
AUTH_FIEF_URL=https://auth.reflector.media/reflector-local
|
AUTH_JWT_AUDIENCE=
|
||||||
AUTH_FIEF_CLIENT_ID=***REMOVED***
|
|
||||||
AUTH_FIEF_CLIENT_SECRET=<ask in zulip>
|
|
||||||
|
|
||||||
## =======================================================
|
## =======================================================
|
||||||
## Transcription backend
|
## Transcription backend
|
||||||
@@ -22,24 +20,24 @@ AUTH_FIEF_CLIENT_SECRET=<ask in zulip>
|
|||||||
|
|
||||||
## Using local whisper
|
## Using local whisper
|
||||||
#TRANSCRIPT_BACKEND=whisper
|
#TRANSCRIPT_BACKEND=whisper
|
||||||
#WHISPER_MODEL_SIZE=tiny
|
|
||||||
|
|
||||||
## Using serverless modal.com (require reflector-gpu-modal deployed)
|
## Using serverless modal.com (require reflector-gpu-modal deployed)
|
||||||
#TRANSCRIPT_BACKEND=modal
|
#TRANSCRIPT_BACKEND=modal
|
||||||
#TRANSCRIPT_URL=https://xxxxx--reflector-transcriber-web.modal.run
|
#TRANSCRIPT_URL=https://xxxxx--reflector-transcriber-web.modal.run
|
||||||
#TRANSLATE_URL=https://xxxxx--reflector-translator-web.modal.run
|
|
||||||
#TRANSCRIPT_MODAL_API_KEY=xxxxx
|
#TRANSCRIPT_MODAL_API_KEY=xxxxx
|
||||||
|
|
||||||
TRANSCRIPT_BACKEND=modal
|
TRANSCRIPT_BACKEND=modal
|
||||||
TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
|
TRANSCRIPT_URL=https://monadical-sas--reflector-transcriber-web.modal.run
|
||||||
TRANSCRIPT_MODAL_API_KEY=***REMOVED***
|
TRANSCRIPT_MODAL_API_KEY=
|
||||||
|
|
||||||
## =======================================================
|
## =======================================================
|
||||||
## Transcription backend
|
## Translation backend
|
||||||
##
|
##
|
||||||
## Only available in modal atm
|
## Only available in modal atm
|
||||||
## =======================================================
|
## =======================================================
|
||||||
|
TRANSLATION_BACKEND=modal
|
||||||
TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
|
TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
|
||||||
|
#TRANSLATION_MODAL_API_KEY=xxxxx
|
||||||
|
|
||||||
## =======================================================
|
## =======================================================
|
||||||
## LLM backend
|
## LLM backend
|
||||||
@@ -49,28 +47,11 @@ TRANSLATE_URL=https://monadical-sas--reflector-translator-web.modal.run
|
|||||||
## llm backend implementation
|
## llm backend implementation
|
||||||
## =======================================================
|
## =======================================================
|
||||||
|
|
||||||
## Using serverless modal.com (require reflector-gpu-modal deployed)
|
## Context size for summary generation (tokens)
|
||||||
LLM_BACKEND=modal
|
# LLM_MODEL=microsoft/phi-4
|
||||||
LLM_URL=https://monadical-sas--reflector-llm-web.modal.run
|
LLM_CONTEXT_WINDOW=16000
|
||||||
LLM_MODAL_API_KEY=***REMOVED***
|
LLM_URL=
|
||||||
ZEPHYR_LLM_URL=https://monadical-sas--reflector-llm-zephyr-web.modal.run
|
LLM_API_KEY=sk-
|
||||||
|
|
||||||
|
|
||||||
## Using OpenAI
|
|
||||||
#LLM_BACKEND=openai
|
|
||||||
#LLM_OPENAI_KEY=xxx
|
|
||||||
#LLM_OPENAI_MODEL=gpt-3.5-turbo
|
|
||||||
|
|
||||||
## Using GPT4ALL
|
|
||||||
#LLM_BACKEND=openai
|
|
||||||
#LLM_URL=http://localhost:4891/v1/completions
|
|
||||||
#LLM_OPENAI_MODEL="GPT4All Falcon"
|
|
||||||
|
|
||||||
## Default LLM MODEL NAME
|
|
||||||
#DEFAULT_LLM=lmsys/vicuna-13b-v1.5
|
|
||||||
|
|
||||||
## Cache directory to store models
|
|
||||||
CACHE_DIR=data
|
|
||||||
|
|
||||||
## =======================================================
|
## =======================================================
|
||||||
## Diarization
|
## Diarization
|
||||||
@@ -79,7 +60,9 @@ CACHE_DIR=data
|
|||||||
## To allow diarization, you need to expose expose the files to be dowloded by the pipeline
|
## To allow diarization, you need to expose expose the files to be dowloded by the pipeline
|
||||||
## =======================================================
|
## =======================================================
|
||||||
DIARIZATION_ENABLED=false
|
DIARIZATION_ENABLED=false
|
||||||
|
DIARIZATION_BACKEND=modal
|
||||||
DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
|
DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
|
||||||
|
#DIARIZATION_MODAL_API_KEY=xxxxx
|
||||||
|
|
||||||
|
|
||||||
## =======================================================
|
## =======================================================
|
||||||
@@ -88,4 +71,3 @@ DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
|
|||||||
|
|
||||||
## Sentry DSN configuration
|
## Sentry DSN configuration
|
||||||
#SENTRY_DSN=
|
#SENTRY_DSN=
|
||||||
|
|
||||||
|
|||||||
@@ -1,204 +0,0 @@
|
|||||||
import re
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, List
|
|
||||||
|
|
||||||
from jiwer import wer
|
|
||||||
from Levenshtein import distance
|
|
||||||
from pydantic import BaseModel, Field, field_validator
|
|
||||||
from tqdm.auto import tqdm
|
|
||||||
from whisper.normalizers import EnglishTextNormalizer
|
|
||||||
|
|
||||||
|
|
||||||
class EvaluationResult(BaseModel):
|
|
||||||
"""
|
|
||||||
Result object of the model evaluation
|
|
||||||
"""
|
|
||||||
accuracy: float = Field(default=0.0)
|
|
||||||
total_test_samples: int = Field(default=0)
|
|
||||||
|
|
||||||
|
|
||||||
class EvaluationTestSample(BaseModel):
|
|
||||||
"""
|
|
||||||
Represents one test sample
|
|
||||||
"""
|
|
||||||
|
|
||||||
reference_text: str
|
|
||||||
predicted_text: str
|
|
||||||
|
|
||||||
def update(self, reference_text:str, predicted_text:str) -> None:
|
|
||||||
self.reference_text = reference_text
|
|
||||||
self.predicted_text = predicted_text
|
|
||||||
|
|
||||||
|
|
||||||
class TestDatasetLoader(BaseModel):
|
|
||||||
"""
|
|
||||||
Test samples loader
|
|
||||||
"""
|
|
||||||
|
|
||||||
test_dir: Path = Field(default=Path(__file__).parent)
|
|
||||||
total_samples: int = Field(default=0)
|
|
||||||
|
|
||||||
@field_validator("test_dir")
|
|
||||||
def validate_file_path(cls, path):
|
|
||||||
"""
|
|
||||||
Check the file path
|
|
||||||
"""
|
|
||||||
if not path.exists():
|
|
||||||
raise ValueError("Path does not exist")
|
|
||||||
return path
|
|
||||||
|
|
||||||
def _load_test_data(self) -> tuple[Path, Path]:
|
|
||||||
"""
|
|
||||||
Loader function to validate input files and generate samples
|
|
||||||
"""
|
|
||||||
PREDICTED_TEST_SAMPLES_DIR = self.test_dir / "predicted_texts"
|
|
||||||
REFERENCE_TEST_SAMPLES_DIR = self.test_dir / "reference_texts"
|
|
||||||
|
|
||||||
for filename in PREDICTED_TEST_SAMPLES_DIR.iterdir():
|
|
||||||
match = re.search(r"(\d+)\.txt$", filename.as_posix())
|
|
||||||
if match:
|
|
||||||
sample_id = match.group(1)
|
|
||||||
pred_file_path = PREDICTED_TEST_SAMPLES_DIR / filename
|
|
||||||
ref_file_name = "ref_sample_" + str(sample_id) + ".txt"
|
|
||||||
ref_file_path = REFERENCE_TEST_SAMPLES_DIR / ref_file_name
|
|
||||||
if ref_file_path.exists():
|
|
||||||
self.total_samples += 1
|
|
||||||
yield ref_file_path, pred_file_path
|
|
||||||
|
|
||||||
def __iter__(self) -> EvaluationTestSample:
|
|
||||||
"""
|
|
||||||
Iter method for the test loader
|
|
||||||
"""
|
|
||||||
for pred_file_path, ref_file_path in self._load_test_data():
|
|
||||||
with open(pred_file_path, "r", encoding="utf-8") as file:
|
|
||||||
pred_text = file.read()
|
|
||||||
with open(ref_file_path, "r", encoding="utf-8") as file:
|
|
||||||
ref_text = file.read()
|
|
||||||
yield EvaluationTestSample(reference_text=ref_text, predicted_text=pred_text)
|
|
||||||
|
|
||||||
|
|
||||||
class EvaluationConfig(BaseModel):
|
|
||||||
"""
|
|
||||||
Model for evaluation parameters
|
|
||||||
"""
|
|
||||||
insertion_penalty: int = Field(default=1)
|
|
||||||
substitution_penalty: int = Field(default=1)
|
|
||||||
deletion_penalty: int = Field(default=1)
|
|
||||||
normalizer: Any = Field(default=EnglishTextNormalizer())
|
|
||||||
test_directory: str = Field(default=str(Path(__file__).parent))
|
|
||||||
|
|
||||||
|
|
||||||
class ModelEvaluator:
|
|
||||||
"""
|
|
||||||
Class that comprises all model evaluation related processes and methods
|
|
||||||
"""
|
|
||||||
|
|
||||||
# The 2 popular methods of WER differ slightly. More dimensions of accuracy
|
|
||||||
# will be added. For now, the average of these 2 will serve as the metric.
|
|
||||||
WEIGHTED_WER_LEVENSHTEIN = 0.0
|
|
||||||
WER_LEVENSHTEIN = []
|
|
||||||
WEIGHTED_WER_JIWER = 0.0
|
|
||||||
WER_JIWER = []
|
|
||||||
|
|
||||||
evaluation_result = EvaluationResult()
|
|
||||||
test_dataset_loader = None
|
|
||||||
evaluation_config = None
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
self.evaluation_config = EvaluationConfig(**kwargs)
|
|
||||||
self.test_dataset_loader = TestDatasetLoader(test_dir=self.evaluation_config.test_directory)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"ModelEvaluator({self.evaluation_config})"
|
|
||||||
|
|
||||||
def describe(self) -> dict:
|
|
||||||
"""
|
|
||||||
Returns the parameters defining the evaluator
|
|
||||||
"""
|
|
||||||
return self.evaluation_config.model_dump()
|
|
||||||
|
|
||||||
def _normalize(self, sample: EvaluationTestSample) -> None:
|
|
||||||
"""
|
|
||||||
Normalize both reference and predicted text
|
|
||||||
"""
|
|
||||||
sample.update(
|
|
||||||
self.evaluation_config.normalizer(sample.reference_text),
|
|
||||||
self.evaluation_config.normalizer(sample.predicted_text),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _calculate_wer(self, sample: EvaluationTestSample) -> float:
|
|
||||||
"""
|
|
||||||
Based on weights for (insert, delete, substitute), calculate
|
|
||||||
the Word Error Rate
|
|
||||||
"""
|
|
||||||
levenshtein_distance = distance(
|
|
||||||
s1=sample.reference_text,
|
|
||||||
s2=sample.predicted_text,
|
|
||||||
weights=(
|
|
||||||
self.evaluation_config.insertion_penalty,
|
|
||||||
self.evaluation_config.deletion_penalty,
|
|
||||||
self.evaluation_config.substitution_penalty,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
wer = levenshtein_distance / len(sample.reference_text)
|
|
||||||
return wer
|
|
||||||
|
|
||||||
def _calculate_wers(self) -> None:
|
|
||||||
"""
|
|
||||||
Compute WER
|
|
||||||
"""
|
|
||||||
for sample in tqdm(self.test_dataset_loader, desc="Evaluating"):
|
|
||||||
self._normalize(sample)
|
|
||||||
wer_item_l = {
|
|
||||||
"wer": self._calculate_wer(sample),
|
|
||||||
"no_of_words": len(sample.reference_text),
|
|
||||||
}
|
|
||||||
wer_item_j = {
|
|
||||||
"wer": wer(sample.reference_text, sample.predicted_text),
|
|
||||||
"no_of_words": len(sample.reference_text),
|
|
||||||
}
|
|
||||||
self.WER_LEVENSHTEIN.append(wer_item_l)
|
|
||||||
self.WER_JIWER.append(wer_item_j)
|
|
||||||
|
|
||||||
def _calculate_weighted_wer(self, wers: List[float]) -> float:
|
|
||||||
"""
|
|
||||||
Calculate the weighted WER from WER
|
|
||||||
"""
|
|
||||||
total_wer = 0.0
|
|
||||||
total_words = 0.0
|
|
||||||
for item in wers:
|
|
||||||
total_wer += item["no_of_words"] * item["wer"]
|
|
||||||
total_words += item["no_of_words"]
|
|
||||||
return total_wer / total_words
|
|
||||||
|
|
||||||
def _calculate_model_accuracy(self) -> None:
|
|
||||||
"""
|
|
||||||
Compute model accuracy
|
|
||||||
"""
|
|
||||||
self._calculate_wers()
|
|
||||||
weighted_wer_levenshtein = self._calculate_weighted_wer(self.WER_LEVENSHTEIN)
|
|
||||||
weighted_wer_jiwer = self._calculate_weighted_wer(self.WER_JIWER)
|
|
||||||
|
|
||||||
final_weighted_wer = (weighted_wer_levenshtein + weighted_wer_jiwer) / 2
|
|
||||||
self.evaluation_result.accuracy = (1 - final_weighted_wer) * 100
|
|
||||||
|
|
||||||
def evaluate(self, recalculate: bool = False) -> EvaluationResult:
|
|
||||||
"""
|
|
||||||
Triggers the model evaluation
|
|
||||||
"""
|
|
||||||
if not self.evaluation_result.accuracy or recalculate:
|
|
||||||
self._calculate_model_accuracy()
|
|
||||||
return EvaluationResult(
|
|
||||||
accuracy=self.evaluation_result.accuracy,
|
|
||||||
total_test_samples=self.test_dataset_loader.total_samples
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
eval_config = {"insertion_penalty": 1, "deletion_penalty": 2, "substitution_penalty": 1}
|
|
||||||
|
|
||||||
evaluator = ModelEvaluator(**eval_config)
|
|
||||||
evaluation = evaluator.evaluate()
|
|
||||||
|
|
||||||
print(evaluator)
|
|
||||||
print(evaluation)
|
|
||||||
print("Model accuracy : {:.2f} %".format(evaluation.accuracy))
|
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,620 +0,0 @@
|
|||||||
Technologies ticker symbol w-e-l-l on
|
|
||||||
|
|
||||||
the TSX recently reported its 2023 q1
|
|
||||||
|
|
||||||
results beating the streets consensus
|
|
||||||
|
|
||||||
estimate for revenue and adjusted ebitda
|
|
||||||
|
|
||||||
and in a report issued this week Raymond
|
|
||||||
|
|
||||||
James analyst said quote we're impressed
|
|
||||||
|
|
||||||
by Wells capacity to drive powerful
|
|
||||||
|
|
||||||
growth across its diverse business units
|
|
||||||
|
|
||||||
in the absence of M A joining me today
|
|
||||||
|
|
||||||
is CEO Hamed chabazi to look at what's
|
|
||||||
|
|
||||||
next for well health good to see you sir
|
|
||||||
|
|
||||||
how are you great to see you Richard
|
|
||||||
|
|
||||||
thanks very much for having me great to
|
|
||||||
|
|
||||||
have you uh congratulations on your 17th
|
|
||||||
|
|
||||||
consecutive quarter of record Revenue
|
|
||||||
|
|
||||||
can you share some insights into what's
|
|
||||||
|
|
||||||
Driven these results historically and in
|
|
||||||
|
|
||||||
the past quarter as well
|
|
||||||
|
|
||||||
yeah thank you we we're very excited
|
|
||||||
|
|
||||||
about our uh q1 2023 results and as you
|
|
||||||
|
|
||||||
mentioned uh we've had a long you know
|
|
||||||
|
|
||||||
successful uh string of of uh you know
|
|
||||||
|
|
||||||
continued growth and record growth
|
|
||||||
|
|
||||||
um we also had accelerating organic
|
|
||||||
|
|
||||||
growth and I think um a big part of the
|
|
||||||
|
|
||||||
success of our franchise here is the
|
|
||||||
|
|
||||||
incredibly sticky and predictable
|
|
||||||
|
|
||||||
Revenue that we have you know well over
|
|
||||||
|
|
||||||
90 of our business is either highly
|
|
||||||
|
|
||||||
reoccurring as in uh the you know highly
|
|
||||||
|
|
||||||
predictable uh results of our two-sided
|
|
||||||
|
|
||||||
network of patients and providers or
|
|
||||||
|
|
||||||
truly recurring as in scheduled or
|
|
||||||
|
|
||||||
subscribed revenues and this allows us
|
|
||||||
|
|
||||||
to essentially make sure that that uh
|
|
||||||
|
|
||||||
you know we're on track it obviously you
|
|
||||||
|
|
||||||
know like any other business things
|
|
||||||
|
|
||||||
happen uh and sometimes it's hard to
|
|
||||||
|
|
||||||
meet those results but what's really
|
|
||||||
|
|
||||||
being unique about our platform is we do
|
|
||||||
|
|
||||||
have exposure to all kinds of different
|
|
||||||
|
|
||||||
aspects of healthcare you know we have
|
|
||||||
|
|
||||||
Prime primary care and Specialized Care
|
|
||||||
|
|
||||||
on both sides of the Border in the US
|
|
||||||
|
|
||||||
and Canada so we have exposure to
|
|
||||||
|
|
||||||
different types of business models we
|
|
||||||
|
|
||||||
have exposure to the U.S payer Network
|
|
||||||
|
|
||||||
which has higher per unit economics than
|
|
||||||
|
|
||||||
Canada and of course the stability and
|
|
||||||
|
|
||||||
uh and and sort of higher Fidelity uh
|
|
||||||
|
|
||||||
kind of Collections and revenue cycle
|
|
||||||
|
|
||||||
process that Canada has over the United
|
|
||||||
|
|
||||||
States where you don't have to kind of
|
|
||||||
|
|
||||||
deal with all of that uh at that payment
|
|
||||||
|
|
||||||
noise so just a lot of I think strength
|
|
||||||
|
|
||||||
built into the platform because of the
|
|
||||||
|
|
||||||
diversity of different Healthcare
|
|
||||||
|
|
||||||
businesses that we support
|
|
||||||
|
|
||||||
and uh where do you see Well's future
|
|
||||||
|
|
||||||
growth coming from which part of the
|
|
||||||
|
|
||||||
business uh excites you the most right
|
|
||||||
|
|
||||||
now yeah well look the centrifugal force
|
|
||||||
|
|
||||||
of well is the healthcare provider and
|
|
||||||
|
|
||||||
we exist to uh Tech enable and
|
|
||||||
|
|
||||||
ameliorate the business of that of that
|
|
||||||
|
|
||||||
Tech of that healthcare provider uh and
|
|
||||||
|
|
||||||
and and that's what we're laser focused
|
|
||||||
|
|
||||||
on and and what we're seeing is
|
|
||||||
|
|
||||||
providers not wanting to run businesses
|
|
||||||
|
|
||||||
anymore it's very simple and so we have
|
|
||||||
|
|
||||||
a digital platform and providers can
|
|
||||||
|
|
||||||
either acquire what they want and need
|
|
||||||
|
|
||||||
from our digital platform and implement
|
|
||||||
|
|
||||||
it themselves
|
|
||||||
|
|
||||||
or they can decide that they don't want
|
|
||||||
|
|
||||||
to run a business anymore they don't
|
|
||||||
|
|
||||||
want to configure and manage technology
|
|
||||||
|
|
||||||
which is becoming a bigger and bigger
|
|
||||||
|
|
||||||
part of their world every single day and
|
|
||||||
|
|
||||||
when we see what we've seen with that
|
|
||||||
|
|
||||||
Dynamic is that uh is that a lot of them
|
|
||||||
|
|
||||||
are now just wanting to work in a place
|
|
||||||
|
|
||||||
where where all the technology is
|
|
||||||
|
|
||||||
configured for them it's wrapped around
|
|
||||||
|
|
||||||
them and they have a competent operating
|
|
||||||
|
|
||||||
partner that is supporting the organ the
|
|
||||||
|
|
||||||
the practice uh and and taking care of
|
|
||||||
|
|
||||||
the front office in the back office so
|
|
||||||
|
|
||||||
that they can focus on providing care
|
|
||||||
|
|
||||||
this results in them seeing more
|
|
||||||
|
|
||||||
patients uh and and being happier
|
|
||||||
|
|
||||||
because you know they became doctors to
|
|
||||||
|
|
||||||
see patients not so they can manage uh
|
|
||||||
|
|
||||||
workers and and deal with HR issues and
|
|
||||||
|
|
||||||
deal with labs and all that kind of
|
|
||||||
|
|
||||||
stuff excellent and I know too that
|
|
||||||
|
|
||||||
Acquisitions have played a key role in
|
|
||||||
|
|
||||||
well can you share any insights into how
|
|
||||||
|
|
||||||
the Acquisitions fit into Wells growth
|
|
||||||
|
|
||||||
strategy
|
|
||||||
|
|
||||||
sure in in look in 2020 and 2021 we did
|
|
||||||
|
|
||||||
a lot of Acquisitions in 2022 we took a
|
|
||||||
|
|
||||||
bit of a breather and we've really
|
|
||||||
|
|
||||||
focused on integration and I think
|
|
||||||
|
|
||||||
that's one of the reasons why you saw
|
|
||||||
|
|
||||||
this accelerating organic growth we
|
|
||||||
|
|
||||||
really were able to demonstrate that we
|
|
||||||
|
|
||||||
could bring together the different
|
|
||||||
|
|
||||||
elements of our technology platform we
|
|
||||||
|
|
||||||
started to sell bundles we started to
|
|
||||||
|
|
||||||
really derive Synergy uh and activate uh
|
|
||||||
|
|
||||||
you know more sales as a result of
|
|
||||||
|
|
||||||
selling uh all the different products
|
|
||||||
|
|
||||||
and services with one voice with One
|
|
||||||
|
|
||||||
Vision uh so we made it easier for
|
|
||||||
|
|
||||||
providers to use their technology and I
|
|
||||||
|
|
||||||
think that was a big reason uh for our
|
|
||||||
|
|
||||||
growth now M A as you know where Capital
|
|
||||||
|
|
||||||
allocation company we're never far from
|
|
||||||
|
|
||||||
it and so we did continue to have you
|
|
||||||
|
|
||||||
know tuck-ins here and there and in fact
|
|
||||||
|
|
||||||
today uh we announced that we've
|
|
||||||
|
|
||||||
acquired uh the Alberta operations of uh
|
|
||||||
|
|
||||||
MCI one Health and other publicly traded
|
|
||||||
|
|
||||||
company uh who was looking to raise
|
|
||||||
|
|
||||||
funds to support their business we're
|
|
||||||
|
|
||||||
very pleased with with this acquisition
|
|
||||||
|
|
||||||
it just demonstrates our continued
|
|
||||||
|
|
||||||
discipline these are you know great
|
|
||||||
|
|
||||||
primary care clinics in in Canada right
|
|
||||||
|
|
||||||
in the greater Calgary area and uh uh
|
|
||||||
|
|
||||||
you know just allows us to grow our
|
|
||||||
|
|
||||||
footprint in Alberta which is an
|
|
||||||
|
|
||||||
important Province for us and it it's
|
|
||||||
|
|
||||||
it's if you look at the price if you
|
|
||||||
|
|
||||||
look at what we're getting uh you know
|
|
||||||
|
|
||||||
it's just demonstrative of our continued
|
|
||||||
|
|
||||||
uh discipline and just you know a few
|
|
||||||
|
|
||||||
days ago at our conference call I
|
|
||||||
|
|
||||||
mentioned uh that we had you know a
|
|
||||||
|
|
||||||
really strong lineup of Acquisitions uh
|
|
||||||
|
|
||||||
and you know they're starting to uh uh I
|
|
||||||
|
|
||||||
think uh come to fruition for us
|
|
||||||
|
|
||||||
a company on the grown-up question I you
|
|
||||||
|
|
||||||
recently announced a new AI investment
|
|
||||||
|
|
||||||
program last month what specific areas
|
|
||||||
|
|
||||||
of healthcare technology or AI are you
|
|
||||||
|
|
||||||
focusing on and what's the strategy when
|
|
||||||
|
|
||||||
it comes to AI
|
|
||||||
|
|
||||||
yes uh look AI as as I'm sure you're
|
|
||||||
|
|
||||||
aware is it's become you know really uh
|
|
||||||
|
|
||||||
an incredibly important topic in in all
|
|
||||||
|
|
||||||
aspects of of business and and you know
|
|
||||||
|
|
||||||
not just business socially as well
|
|
||||||
|
|
||||||
everyone's talking about uh this this
|
|
||||||
|
|
||||||
new breakthrough disruptive technology
|
|
||||||
|
|
||||||
the large language models and generative
|
|
||||||
|
|
||||||
AI
|
|
||||||
|
|
||||||
um I mean look AI uh has been about a 80
|
|
||||||
|
|
||||||
year old overnight success a lot of
|
|
||||||
|
|
||||||
people have been working on this for a
|
|
||||||
|
|
||||||
long time generative AI is just sort of
|
|
||||||
|
|
||||||
you know the culmination of a lot of
|
|
||||||
|
|
||||||
things coming together and working uh
|
|
||||||
|
|
||||||
but it is uncorked enormous uh
|
|
||||||
|
|
||||||
Innovation and and we think that um this
|
|
||||||
|
|
||||||
there's a very good news story about
|
|
||||||
|
|
||||||
this in healthcare particularly where we
|
|
||||||
|
|
||||||
were looking to look we were looking to
|
|
||||||
|
|
||||||
unlock uh the value of of the data that
|
|
||||||
|
|
||||||
that we all produce every single day
|
|
||||||
|
|
||||||
um as as humans and and so we've
|
|
||||||
|
|
||||||
established an AI investment program
|
|
||||||
|
|
||||||
because no one company can can tackle
|
|
||||||
|
|
||||||
all of these Innovations themselves and
|
|
||||||
|
|
||||||
what well has done too is it's taken a
|
|
||||||
|
|
||||||
very much an ecosystem approach by
|
|
||||||
|
|
||||||
establishing its apps.health Marketplace
|
|
||||||
|
|
||||||
and so we're very excited about not only
|
|
||||||
|
|
||||||
uh allocating Capital into promising
|
|
||||||
|
|
||||||
young AI companies that are focused on
|
|
||||||
|
|
||||||
digital health and solving Healthcare
|
|
||||||
|
|
||||||
problems but also giving them access to
|
|
||||||
|
|
||||||
um you know safely and securely to our
|
|
||||||
|
|
||||||
provider Network to our uh you know to
|
|
||||||
|
|
||||||
to our Outpatient Clinic Network which
|
|
||||||
|
|
||||||
is the largest owned and operated
|
|
||||||
|
|
||||||
Network in Canada by far uh so
|
|
||||||
|
|
||||||
um and and when these and it's it was
|
|
||||||
|
|
||||||
remarkable when we announced this
|
|
||||||
|
|
||||||
program we've had just in the in the
|
|
||||||
|
|
||||||
first uh week to 10 days we've had over
|
|
||||||
|
|
||||||
a hundred uh inbound prospects come in
|
|
||||||
|
|
||||||
uh that that wanted to you know
|
|
||||||
|
|
||||||
collaborate with us and again I don't
|
|
||||||
|
|
||||||
think that's necessarily for the money
|
|
||||||
|
|
||||||
you know we're saying we would invest a
|
|
||||||
|
|
||||||
minimum of a quarter of a million
|
|
||||||
|
|
||||||
dollars you know a lot of them will
|
|
||||||
|
|
||||||
likely be higher than a quarter of a
|
|
||||||
|
|
||||||
million dollars
|
|
||||||
|
|
||||||
so it's not life-changing money but but
|
|
||||||
|
|
||||||
our structural advantages and and and
|
|
||||||
|
|
||||||
the benefits that we have in the Well
|
|
||||||
|
|
||||||
Network those are extremely hard to come
|
|
||||||
|
|
||||||
by uh and I think and I think uh uh
|
|
||||||
|
|
||||||
you'll see us uh you know help some of
|
|
||||||
|
|
||||||
these companies uh succeed and they will
|
|
||||||
|
|
||||||
help us drive uh you know more
|
|
||||||
|
|
||||||
Innovation to that helps the provider
|
|
||||||
|
|
||||||
but speaking of this very interesting AI
|
|
||||||
|
|
||||||
I know your company just launched well
|
|
||||||
|
|
||||||
AI voice this is super interesting tell
|
|
||||||
|
|
||||||
me what it is and the impact it could
|
|
||||||
|
|
||||||
have on health care providers
|
|
||||||
|
|
||||||
yeah thanks for uh asking Richard our
|
|
||||||
|
|
||||||
providers uh are thrilled with this you
|
|
||||||
|
|
||||||
know we've we've had a number of of of
|
|
||||||
|
|
||||||
our own well providers testing this
|
|
||||||
|
|
||||||
technology and it it it really feels
|
|
||||||
|
|
||||||
like magic to them it's essentially an
|
|
||||||
|
|
||||||
ambient AI powered scribe so it's a it's
|
|
||||||
|
|
||||||
a service that with the consent of the
|
|
||||||
|
|
||||||
parties involved listens to the
|
|
||||||
|
|
||||||
conversation between a patient and
|
|
||||||
|
|
||||||
provider and then uh essentially
|
|
||||||
|
|
||||||
condenses that into a medically relevant
|
|
||||||
|
|
||||||
note for the chart files uh typically
|
|
||||||
|
|
||||||
that is a lengthy process a doctor has
|
|
||||||
|
|
||||||
to transcribe notes then review those
|
|
||||||
|
|
||||||
notes and make sure that uh a a a a
|
|
||||||
|
|
||||||
appropriate medically oriented and
|
|
||||||
|
|
||||||
structured node is is is uh prepared and
|
|
||||||
|
|
||||||
put into the chart and that could take
|
|
||||||
|
|
||||||
you know sometimes more than more time
|
|
||||||
|
|
||||||
than the actual consultation uh time and
|
|
||||||
|
|
||||||
so we believe that on average if it's
|
|
||||||
|
|
||||||
used regularly and consistently this can
|
|
||||||
|
|
||||||
give providers back at least a third of
|
|
||||||
|
|
||||||
their day
|
|
||||||
|
|
||||||
um and and it's it's just a game changer
|
|
||||||
|
|
||||||
uh and and uh we have now gone into
|
|
||||||
|
|
||||||
General release with this product it's
|
|
||||||
|
|
||||||
widely available in Canada uh it has
|
|
||||||
|
|
||||||
been integrated into our EMR which makes
|
|
||||||
|
|
||||||
it even more valuable tools like this
|
|
||||||
|
|
||||||
are going to start popping up but if
|
|
||||||
|
|
||||||
they're not integrated into your
|
|
||||||
|
|
||||||
practice management system then you have
|
|
||||||
|
|
||||||
to kind of have data in in more than one
|
|
||||||
|
|
||||||
place and and move that around a little
|
|
||||||
|
|
||||||
bit which which makes it a little bit
|
|
||||||
|
|
||||||
more difficult especially with HIPAA
|
|
||||||
|
|
||||||
requirements and and regulations so
|
|
||||||
|
|
||||||
again I think this is the first of many
|
|
||||||
|
|
||||||
types of different products and services
|
|
||||||
|
|
||||||
that allow doctors to place more
|
|
||||||
|
|
||||||
emphasis and focus on the patient
|
|
||||||
|
|
||||||
experience instead of having their head
|
|
||||||
|
|
||||||
in a laptop and looking at you once in a
|
|
||||||
|
|
||||||
while they'll be looking at you and
|
|
||||||
|
|
||||||
speaking to their practice management
|
|
||||||
|
|
||||||
system and I think this you know think
|
|
||||||
|
|
||||||
about it as Alexa for for our doctors uh
|
|
||||||
|
|
||||||
you know this this ability to speak uh
|
|
||||||
|
|
||||||
and and have you know uh you know Voice
|
|
||||||
|
|
||||||
driven AI assistant that does things
|
|
||||||
|
|
||||||
like this I think are going to be you
|
|
||||||
|
|
||||||
know incredibly helpful and valuable uh
|
|
||||||
|
|
||||||
for for healthcare providers
|
|
||||||
|
|
||||||
super fascinating I mean we're just
|
|
||||||
|
|
||||||
hearing you know more about AI maybe AI
|
|
||||||
|
|
||||||
for the first time but here you are with
|
|
||||||
|
|
||||||
a product already on the market in the
|
|
||||||
|
|
||||||
in the healthcare field that's going to
|
|
||||||
|
|
||||||
be pretty attractive to be out there uh
|
|
||||||
|
|
||||||
right ahead of many other people right
|
|
||||||
|
|
||||||
thank you Richard thanks for that
|
|
||||||
|
|
||||||
recognition that's been Our intention we
|
|
||||||
|
|
||||||
we want to demonstrate that we uh you
|
|
||||||
|
|
||||||
know that we're all in on ensuring that
|
|
||||||
|
|
||||||
technology that benefits providers uh is
|
|
||||||
|
|
||||||
is is accelerated and uh de-risked and
|
|
||||||
|
|
||||||
provided uh you know um in in a timely
|
|
||||||
|
|
||||||
way you know providers need this help we
|
|
||||||
|
|
||||||
we have a healthcare crisis in the
|
|
||||||
|
|
||||||
country that is generally characterized
|
|
||||||
|
|
||||||
as a as a lack of doctors and so imagine
|
|
||||||
|
|
||||||
if we can get our doctors to be 20 or 30
|
|
||||||
|
|
||||||
percent more productive through the use
|
|
||||||
|
|
||||||
of these types of tools well they're
|
|
||||||
|
|
||||||
going to just see more patience and and
|
|
||||||
|
|
||||||
that's going to help all of us and uh
|
|
||||||
|
|
||||||
and look if you step back Wells business
|
|
||||||
|
|
||||||
model is all about having exposure to
|
|
||||||
|
|
||||||
the success of doctors and doing our
|
|
||||||
|
|
||||||
best to help them be more successful
|
|
||||||
|
|
||||||
because we're in a revenue share
|
|
||||||
|
|
||||||
relationship with most of the doctors
|
|
||||||
|
|
||||||
that we work with and so this uh this is
|
|
||||||
|
|
||||||
good for the ecosystem it's great for
|
|
||||||
|
|
||||||
the provider and it's great for well as
|
|
||||||
|
|
||||||
well super fascinating I'm Ed shabazzi
|
|
||||||
|
|
||||||
CEO well Health Technologies ticker
|
|
||||||
|
|
||||||
w-e-l-l great to catch up again thank
|
|
||||||
|
|
||||||
you sir
|
|
||||||
|
|
||||||
thank you Richard appreciate you having
|
|
||||||
|
|
||||||
me
|
|
||||||
|
|
||||||
[Music]
|
|
||||||
|
|
||||||
thank you
|
|
||||||
|
|
||||||
@@ -1,970 +0,0 @@
|
|||||||
learning medicine is hard work osmosis
|
|
||||||
|
|
||||||
makes it easy it takes our lectures and
|
|
||||||
|
|
||||||
notes to create a personalized study
|
|
||||||
|
|
||||||
plan with exclusive videos practice
|
|
||||||
|
|
||||||
questions and flashcards and so much
|
|
||||||
|
|
||||||
more try it free today
|
|
||||||
|
|
||||||
in diabetes mellitus your body has
|
|
||||||
|
|
||||||
trouble moving glucose which is the type
|
|
||||||
|
|
||||||
of sugar from your blood into your cells
|
|
||||||
|
|
||||||
this leads to high levels of glucose in
|
|
||||||
|
|
||||||
your blood and not enough of it in your
|
|
||||||
|
|
||||||
cells and remember that your cells need
|
|
||||||
|
|
||||||
glucose as a source of energy so not
|
|
||||||
|
|
||||||
letting the glucose enter means that the
|
|
||||||
|
|
||||||
cells star for energy despite having
|
|
||||||
|
|
||||||
glucose right on their doorstep in
|
|
||||||
|
|
||||||
general the body controls how much
|
|
||||||
|
|
||||||
glucose is in the blood relative to how
|
|
||||||
|
|
||||||
much gets into the cells with two
|
|
||||||
|
|
||||||
hormones insulin and glucagon insulin is
|
|
||||||
|
|
||||||
used to reduce blood glucose levels and
|
|
||||||
|
|
||||||
glucagon is used to increase blood
|
|
||||||
|
|
||||||
glucose levels both of these hormones
|
|
||||||
|
|
||||||
are produced by clusters of cells in the
|
|
||||||
|
|
||||||
pancreas called islets of langerhans
|
|
||||||
|
|
||||||
insulin is secreted by beta cells in the
|
|
||||||
|
|
||||||
center of these islets and glucagon is
|
|
||||||
|
|
||||||
secreted by alpha cells in the periphery
|
|
||||||
|
|
||||||
of the islets insulin reduces the amount
|
|
||||||
|
|
||||||
of glucose in the blood by binding to
|
|
||||||
|
|
||||||
insulin receptors embedded in the cell
|
|
||||||
|
|
||||||
membrane of various insulin responsive
|
|
||||||
|
|
||||||
tissues like muscle cells in adipose
|
|
||||||
|
|
||||||
tissue when activated the insulin
|
|
||||||
|
|
||||||
receptors cause vesicles containing
|
|
||||||
|
|
||||||
glucose transporter that are inside the
|
|
||||||
|
|
||||||
cell to fuse with the cell membrane
|
|
||||||
|
|
||||||
allowing glucose to be transported into
|
|
||||||
|
|
||||||
the cell glucagon does exactly the
|
|
||||||
|
|
||||||
opposite it raises the blood glucose
|
|
||||||
|
|
||||||
levels by getting the liver to generate
|
|
||||||
|
|
||||||
new molecules of glucose from other
|
|
||||||
|
|
||||||
molecules and also break down glycogen
|
|
||||||
|
|
||||||
into glucose so that I can all get
|
|
||||||
|
|
||||||
dumped into the blood diabetes mellitus
|
|
||||||
|
|
||||||
is diagnosed when blood glucose levels
|
|
||||||
|
|
||||||
get too high and this is seen among 10
|
|
||||||
|
|
||||||
percent of the world population there
|
|
||||||
|
|
||||||
are two types of diabetes type 1 and
|
|
||||||
|
|
||||||
type 2 and the main difference between
|
|
||||||
|
|
||||||
them is the underlying mechanism that
|
|
||||||
|
|
||||||
causes the blood glucose levels to rise
|
|
||||||
|
|
||||||
about 10% of people with diabetes have
|
|
||||||
|
|
||||||
type 1 and the remaining 90% of people
|
|
||||||
|
|
||||||
with diabetes have type 2 let's start
|
|
||||||
|
|
||||||
with type 1 diabetes mellitus sometimes
|
|
||||||
|
|
||||||
just called type 1 diabetes in this
|
|
||||||
|
|
||||||
situation the body doesn't make enough
|
|
||||||
|
|
||||||
insulin the reason this happens is that
|
|
||||||
|
|
||||||
in type 1 diabetes there's a type 4
|
|
||||||
|
|
||||||
hypersensitivity response or a cell
|
|
||||||
|
|
||||||
mediated immune response where a
|
|
||||||
|
|
||||||
person's own T cells at
|
|
||||||
|
|
||||||
the pancreas as a quick review remember
|
|
||||||
|
|
||||||
that the immune system has T cells that
|
|
||||||
|
|
||||||
react to all sorts of antigens which are
|
|
||||||
|
|
||||||
usually small peptides polysaccharides
|
|
||||||
|
|
||||||
or lipids and that some of these
|
|
||||||
|
|
||||||
antigens are part of our own body cells
|
|
||||||
|
|
||||||
it doesn't make sense to allow T cells
|
|
||||||
|
|
||||||
that will attack our own cells to hang
|
|
||||||
|
|
||||||
around until there's this process to
|
|
||||||
|
|
||||||
eliminate them called self tolerance in
|
|
||||||
|
|
||||||
type 1 diabetes there's a genetic
|
|
||||||
|
|
||||||
abnormality that causes a loss of self
|
|
||||||
|
|
||||||
tolerance among T cells that
|
|
||||||
|
|
||||||
specifically target the beta cell
|
|
||||||
|
|
||||||
antigens losing self tolerance means
|
|
||||||
|
|
||||||
that these T cells are allowed to
|
|
||||||
|
|
||||||
recruit other immune cells and
|
|
||||||
|
|
||||||
coordinate an attack on these beta cells
|
|
||||||
|
|
||||||
losing beta cells means less insulin and
|
|
||||||
|
|
||||||
less insulin means that glucose piles up
|
|
||||||
|
|
||||||
in the blood because it can't enter the
|
|
||||||
|
|
||||||
body's cells one really important group
|
|
||||||
|
|
||||||
of genes involved in regulation of the
|
|
||||||
|
|
||||||
immune response is the human leukocyte
|
|
||||||
|
|
||||||
antigen system or HLA system even though
|
|
||||||
|
|
||||||
it's called a system it's basically this
|
|
||||||
|
|
||||||
group of genes on chromosome 6 that
|
|
||||||
|
|
||||||
encode the major histocompatibility
|
|
||||||
|
|
||||||
complex or MHC which is a protein that's
|
|
||||||
|
|
||||||
extremely important in helping the
|
|
||||||
|
|
||||||
immune system recognize foreign
|
|
||||||
|
|
||||||
molecules as well as maintaining self
|
|
||||||
|
|
||||||
tolerance MHC is like the serving
|
|
||||||
|
|
||||||
platter that antigens are presented to
|
|
||||||
|
|
||||||
the immune cells on interestingly people
|
|
||||||
|
|
||||||
with type 1 diabetes often have specific
|
|
||||||
|
|
||||||
HLA genes in common with each other one
|
|
||||||
|
|
||||||
called
|
|
||||||
|
|
||||||
HLA dr3 and another called HLA dr4 but
|
|
||||||
|
|
||||||
this is just a genetic clue right
|
|
||||||
|
|
||||||
because not everyone with HLA dr3 and
|
|
||||||
|
|
||||||
HLA dr4 develops diabetes in diabetes
|
|
||||||
|
|
||||||
mellitus type 1 destruction of beta
|
|
||||||
|
|
||||||
cells usually starts early in life but
|
|
||||||
|
|
||||||
sometimes up to 90% of the beta cells
|
|
||||||
|
|
||||||
are destroyed before symptoms crop up
|
|
||||||
|
|
||||||
for clinical symptoms of uncontrolled
|
|
||||||
|
|
||||||
diabetes that all sound similar our
|
|
||||||
|
|
||||||
polyphagia glycosuria polyuria and
|
|
||||||
|
|
||||||
polydipsia let's go through them one by
|
|
||||||
|
|
||||||
one even though there's a lot of glucose
|
|
||||||
|
|
||||||
in the blood it cannot get into the
|
|
||||||
|
|
||||||
cells which leaves cells starved for
|
|
||||||
|
|
||||||
energy so in response adipose tissue
|
|
||||||
|
|
||||||
starts breaking down fat called
|
|
||||||
|
|
||||||
lipolysis
|
|
||||||
|
|
||||||
and muscle tissue starts breaking down
|
|
||||||
|
|
||||||
proteins both of which results in weight
|
|
||||||
|
|
||||||
loss for someone with uncontrolled
|
|
||||||
|
|
||||||
diabetes this catabolic state leaves
|
|
||||||
|
|
||||||
people feeling hungry
|
|
||||||
|
|
||||||
also known as poly fascia Faiza means
|
|
||||||
|
|
||||||
eating and poly means a lot now with
|
|
||||||
|
|
||||||
high glucose levels that means that when
|
|
||||||
|
|
||||||
blood gets filtered through the kidneys
|
|
||||||
|
|
||||||
some of it starts to spill into the
|
|
||||||
|
|
||||||
urine called glycosuria glyco surfers to
|
|
||||||
|
|
||||||
glucose and urea the urine since glucose
|
|
||||||
|
|
||||||
is osmotically active water tends to
|
|
||||||
|
|
||||||
follow it resulting in an increase in
|
|
||||||
|
|
||||||
urination or polyuria poly again refers
|
|
||||||
|
|
||||||
to a lot and urea again refers to urine
|
|
||||||
|
|
||||||
finally because there's so much
|
|
||||||
|
|
||||||
urination people with uncontrolled
|
|
||||||
|
|
||||||
diabetes become dehydrated and thirsty
|
|
||||||
|
|
||||||
or polydipsia poly means a lot and dip
|
|
||||||
|
|
||||||
SIA means thirst even though people with
|
|
||||||
|
|
||||||
diabetes are not able to produce their
|
|
||||||
|
|
||||||
own insulin they can still respond to
|
|
||||||
|
|
||||||
insulin so treatment involves lifelong
|
|
||||||
|
|
||||||
insulin therapy to regulate their blood
|
|
||||||
|
|
||||||
glucose levels and basically enable
|
|
||||||
|
|
||||||
their cells to use glucose
|
|
||||||
|
|
||||||
one really serious complication with
|
|
||||||
|
|
||||||
type 1 diabetes is called diabetic
|
|
||||||
|
|
||||||
ketoacidosis or DKA to understand it
|
|
||||||
|
|
||||||
let's go back to the process of
|
|
||||||
|
|
||||||
lipolysis where fat is broken down into
|
|
||||||
|
|
||||||
free fatty acids after that happens the
|
|
||||||
|
|
||||||
liver turns the fatty acids into ketone
|
|
||||||
|
|
||||||
bodies like Osito acetic acid in beta
|
|
||||||
|
|
||||||
hydroxy butyrate acid a seed of acetic
|
|
||||||
|
|
||||||
acid is a keto acid because it has a
|
|
||||||
|
|
||||||
ketone group in a carboxylic acid group
|
|
||||||
|
|
||||||
beta hydroxy rhetoric acid on the other
|
|
||||||
|
|
||||||
hand even though it's still one of the
|
|
||||||
|
|
||||||
ketone bodies isn't technically a keto
|
|
||||||
|
|
||||||
acid since its ketone group has been
|
|
||||||
|
|
||||||
reduced to a hydroxyl group these ketone
|
|
||||||
|
|
||||||
bodies are important because they can be
|
|
||||||
|
|
||||||
used by cells for energy but they also
|
|
||||||
|
|
||||||
increase the acidity of the blood which
|
|
||||||
|
|
||||||
is why it's called ketoacidosis and the
|
|
||||||
|
|
||||||
blood becoming really acidic can have
|
|
||||||
|
|
||||||
major effects throughout the body
|
|
||||||
|
|
||||||
individuals can develop custom all
|
|
||||||
|
|
||||||
respiration which is a deep and labored
|
|
||||||
|
|
||||||
breathing as the body tries to move
|
|
||||||
|
|
||||||
carbon dioxide out of the blood in an
|
|
||||||
|
|
||||||
effort to reduce its acidity cells also
|
|
||||||
|
|
||||||
have a transporter that exchanges
|
|
||||||
|
|
||||||
hydrogen ions or protons for potassium
|
|
||||||
|
|
||||||
when the blood gets acidic it's by
|
|
||||||
|
|
||||||
definition loaded with protons that get
|
|
||||||
|
|
||||||
sent into cells while potassium gets
|
|
||||||
|
|
||||||
sent into the fluid outside cells
|
|
||||||
|
|
||||||
another thing to keep in mind is that in
|
|
||||||
|
|
||||||
addition to helping glucose enter cells
|
|
||||||
|
|
||||||
insulin stimulates the sodium potassium
|
|
||||||
|
|
||||||
ATPase --is which help potassium get
|
|
||||||
|
|
||||||
into the cells and so without insulin
|
|
||||||
|
|
||||||
more potassium stays in the fluid
|
|
||||||
|
|
||||||
outside cells both of these mechanisms
|
|
||||||
|
|
||||||
lead to increased potassium in the fluid
|
|
||||||
|
|
||||||
outside cells which quickly makes it
|
|
||||||
|
|
||||||
into the blood and causes hyperkalemia
|
|
||||||
|
|
||||||
the potassium is then excreted so over
|
|
||||||
|
|
||||||
time even though the blood potassium
|
|
||||||
|
|
||||||
levels remain high over all stores of
|
|
||||||
|
|
||||||
potassium in the body which include
|
|
||||||
|
|
||||||
potassium inside cells starts to run low
|
|
||||||
|
|
||||||
individuals will also have a high anion
|
|
||||||
|
|
||||||
gap which reflects a large difference in
|
|
||||||
|
|
||||||
the unmeasured negative and positive
|
|
||||||
|
|
||||||
ions in the serum largely due to the
|
|
||||||
|
|
||||||
build-up of ketoacids
|
|
||||||
|
|
||||||
diabetic ketoacidosis can happen even in
|
|
||||||
|
|
||||||
people who have already been diagnosed
|
|
||||||
|
|
||||||
with diabetes and currently have some
|
|
||||||
|
|
||||||
sort of insulin therapy
|
|
||||||
|
|
||||||
in states of stress like an infection
|
|
||||||
|
|
||||||
the body releases epinephrine which in
|
|
||||||
|
|
||||||
turn stimulates the release of glucagon
|
|
||||||
|
|
||||||
too much glucagon can tip the delicate
|
|
||||||
|
|
||||||
hormonal balance of glucagon and insulin
|
|
||||||
|
|
||||||
in favor of elevating blood sugars and
|
|
||||||
|
|
||||||
can lead to a cascade of events we just
|
|
||||||
|
|
||||||
described increased glucose in the blood
|
|
||||||
|
|
||||||
loss of glucose in the urine loss of
|
|
||||||
|
|
||||||
water dehydration and in parallel and
|
|
||||||
|
|
||||||
need for alternative energy generation
|
|
||||||
|
|
||||||
of ketone bodies and ketoacidosis
|
|
||||||
|
|
||||||
interestingly both ketone bodies break
|
|
||||||
|
|
||||||
down into acetone and escape as a gas by
|
|
||||||
|
|
||||||
getting breathed out the lungs which
|
|
||||||
|
|
||||||
gives us sweet fruity smell to a
|
|
||||||
|
|
||||||
person's breath in general though that's
|
|
||||||
|
|
||||||
the only sweet thing about this illness
|
|
||||||
|
|
||||||
which also causes nausea vomiting and if
|
|
||||||
|
|
||||||
severe mental status changes and acute
|
|
||||||
|
|
||||||
cerebral edema
|
|
||||||
|
|
||||||
treatment of a DKA episode involves
|
|
||||||
|
|
||||||
giving plenty of fluids which helps with
|
|
||||||
|
|
||||||
dehydration insulin which helps lower
|
|
||||||
|
|
||||||
blood glucose levels and replacement of
|
|
||||||
|
|
||||||
electrolytes like potassium all of which
|
|
||||||
|
|
||||||
help to reverse the acidosis now let's
|
|
||||||
|
|
||||||
switch gears and talk about type 2
|
|
||||||
|
|
||||||
diabetes which is where the body makes
|
|
||||||
|
|
||||||
insulin but the tissues don't respond as
|
|
||||||
|
|
||||||
well to it the exact reason why cells
|
|
||||||
|
|
||||||
don't respond isn't fully understood
|
|
||||||
|
|
||||||
essentially the body's providing the
|
|
||||||
|
|
||||||
normal amount of insulin but the cells
|
|
||||||
|
|
||||||
don't move their glucose transporters to
|
|
||||||
|
|
||||||
their membrane in response which
|
|
||||||
|
|
||||||
remember is needed for the glucose to
|
|
||||||
|
|
||||||
get into the cells these cells therefore
|
|
||||||
|
|
||||||
have insulin resistance some risk
|
|
||||||
|
|
||||||
factors for insulin resistance are
|
|
||||||
|
|
||||||
obesity lack of exercise and
|
|
||||||
|
|
||||||
hypertension the exact mechanisms are
|
|
||||||
|
|
||||||
still being explored for example in
|
|
||||||
|
|
||||||
excess of adipose tissue or fat is
|
|
||||||
|
|
||||||
thought to cause the release of free
|
|
||||||
|
|
||||||
fatty acids in so-called edible kinds
|
|
||||||
|
|
||||||
which are signaling molecules that can
|
|
||||||
|
|
||||||
cause inflammation which seems related
|
|
||||||
|
|
||||||
to insulin resistance
|
|
||||||
|
|
||||||
however many people that are obese are
|
|
||||||
|
|
||||||
not diabetic so genetic factors probably
|
|
||||||
|
|
||||||
play a major role as well we see this
|
|
||||||
|
|
||||||
when we look at twin studies as well
|
|
||||||
|
|
||||||
we're having a twin with type-2 diabetes
|
|
||||||
|
|
||||||
increases the risk of developing type 2
|
|
||||||
|
|
||||||
diabetes completely independently of
|
|
||||||
|
|
||||||
other environmental risk factors in type
|
|
||||||
|
|
||||||
2 diabetes since tissues don't respond
|
|
||||||
|
|
||||||
as well to normal levels of insulin the
|
|
||||||
|
|
||||||
body ends up producing more insulin in
|
|
||||||
|
|
||||||
order to get the same effect and move
|
|
||||||
|
|
||||||
glucose out of the blood
|
|
||||||
|
|
||||||
they do this through beta cell
|
|
||||||
|
|
||||||
hyperplasia an increased number of beta
|
|
||||||
|
|
||||||
cells and beta cell hypertrophy where
|
|
||||||
|
|
||||||
they actually grow in size all in this
|
|
||||||
|
|
||||||
attempt to pump out more insulin this
|
|
||||||
|
|
||||||
works for a while and by keeping insulin
|
|
||||||
|
|
||||||
levels higher than normal blood glucose
|
|
||||||
|
|
||||||
levels can be kept normal called normal
|
|
||||||
|
|
||||||
glycemia now along with insulin beta
|
|
||||||
|
|
||||||
cells also secrete islet amyloid
|
|
||||||
|
|
||||||
polypeptide or amylin so while beta
|
|
||||||
|
|
||||||
cells are cranking out insulin they also
|
|
||||||
|
|
||||||
secrete an increased amount of amylin
|
|
||||||
|
|
||||||
over time Emlyn builds up and aggregates
|
|
||||||
|
|
||||||
in the islets this beta cell
|
|
||||||
|
|
||||||
compensation though is not sustainable
|
|
||||||
|
|
||||||
and over time those maxed out beta cells
|
|
||||||
|
|
||||||
get exhausted and they become
|
|
||||||
|
|
||||||
dysfunctional and undergo hypo trophy
|
|
||||||
|
|
||||||
and get smaller as well as hypoplasia
|
|
||||||
|
|
||||||
and die off as beta cells are lost in
|
|
||||||
|
|
||||||
insulin levels decrease glucose levels
|
|
||||||
|
|
||||||
in the blood start to increase in
|
|
||||||
|
|
||||||
patients develop hyperglycemia which
|
|
||||||
|
|
||||||
leads to similar clinical signs that we
|
|
||||||
|
|
||||||
mentioned before like Paul aphasia
|
|
||||||
|
|
||||||
glycosuria polyuria polydipsia but
|
|
||||||
|
|
||||||
unlike type 1 diabetes there's generally
|
|
||||||
|
|
||||||
some circulating insulin in type 2
|
|
||||||
|
|
||||||
diabetes from the beta cells that are
|
|
||||||
|
|
||||||
trying to compensate for the insulin
|
|
||||||
|
|
||||||
resistance this means that the insulin
|
|
||||||
|
|
||||||
glucagon balances such that diabetic
|
|
||||||
|
|
||||||
ketoacidosis does not usually develop
|
|
||||||
|
|
||||||
having said that a complication called
|
|
||||||
|
|
||||||
hyperosmolar hyperglycemic state or HHS
|
|
||||||
|
|
||||||
is much more common in type 2 diabetes
|
|
||||||
|
|
||||||
than type 1 diabetes and it causes
|
|
||||||
|
|
||||||
increased plasma osmolarity due to
|
|
||||||
|
|
||||||
extreme dehydration and concentration of
|
|
||||||
|
|
||||||
the blood to help understand this
|
|
||||||
|
|
||||||
remember that glucose is a polar
|
|
||||||
|
|
||||||
molecule that cannot passively diffuse
|
|
||||||
|
|
||||||
across cell membranes which means that
|
|
||||||
|
|
||||||
it acts as a solute so when levels of
|
|
||||||
|
|
||||||
glucose are super high in the blood
|
|
||||||
|
|
||||||
meaning it's a hyperosmolar State water
|
|
||||||
|
|
||||||
starts to leave the body cells and enter
|
|
||||||
|
|
||||||
the blood vessels leaving the cells were
|
|
||||||
|
|
||||||
relatively dry in travailed rather than
|
|
||||||
|
|
||||||
plump and juicy blood vessels that are
|
|
||||||
|
|
||||||
full of water lead to increased
|
|
||||||
|
|
||||||
urination and total body dehydration and
|
|
||||||
|
|
||||||
this is a very serious situation because
|
|
||||||
|
|
||||||
the dehydration of the body's cells and
|
|
||||||
|
|
||||||
in particular the brain can cause a
|
|
||||||
|
|
||||||
number of symptoms including mental
|
|
||||||
|
|
||||||
status changes in HHS you can sometimes
|
|
||||||
|
|
||||||
see mild ketone emia and acidosis but
|
|
||||||
|
|
||||||
not to the extent that it's seen in DKA
|
|
||||||
|
|
||||||
and in DKA you can see some hyper
|
|
||||||
|
|
||||||
osmolarity so there's definitely overlap
|
|
||||||
|
|
||||||
between these two syndromes
|
|
||||||
|
|
||||||
besides type 1 and type 2 diabetes there
|
|
||||||
|
|
||||||
are also a couple other subtypes of
|
|
||||||
|
|
||||||
diabetes mellitus gestational diabetes
|
|
||||||
|
|
||||||
is when pregnant women have increased
|
|
||||||
|
|
||||||
blood glucose which is particularly
|
|
||||||
|
|
||||||
during the third trimester although
|
|
||||||
|
|
||||||
ultimately unknown the cause is thought
|
|
||||||
|
|
||||||
to be related to pregnancy hormones that
|
|
||||||
|
|
||||||
interfere with insulins action on
|
|
||||||
|
|
||||||
insulin receptors also sometimes people
|
|
||||||
|
|
||||||
can develop drug-induced diabetes which
|
|
||||||
|
|
||||||
is where medications have side effects
|
|
||||||
|
|
||||||
that tend to increase blood glucose
|
|
||||||
|
|
||||||
levels the mechanism for both of these
|
|
||||||
|
|
||||||
is thought to be related to insulin
|
|
||||||
|
|
||||||
resistance like type 2 diabetes rather
|
|
||||||
|
|
||||||
than an autoimmune destruction process
|
|
||||||
|
|
||||||
like in type 1 diabetes diagnosing type
|
|
||||||
|
|
||||||
1 or type 2 diabetes is done by getting
|
|
||||||
|
|
||||||
a sense for how much glucose is floating
|
|
||||||
|
|
||||||
around in the blood and has specific
|
|
||||||
|
|
||||||
standards that the World Health
|
|
||||||
|
|
||||||
Organization uses very commonly a
|
|
||||||
|
|
||||||
fasting glucose test is taken where the
|
|
||||||
|
|
||||||
person doesn't eat or drink except the
|
|
||||||
|
|
||||||
water that's okay for a total of eight
|
|
||||||
|
|
||||||
hours and then has their blood tested
|
|
||||||
|
|
||||||
for glucose levels levels of 100
|
|
||||||
|
|
||||||
milligrams per deciliter to 120
|
|
||||||
|
|
||||||
five milligrams per deciliter indicates
|
|
||||||
|
|
||||||
pre-diabetes and 126 milligrams per
|
|
||||||
|
|
||||||
deciliter or higher indicates diabetes a
|
|
||||||
|
|
||||||
non fasting a random glucose test can be
|
|
||||||
|
|
||||||
done at any time with 200 milligrams per
|
|
||||||
|
|
||||||
deciliter or higher being a red flag for
|
|
||||||
|
|
||||||
diabetes another test is called an oral
|
|
||||||
|
|
||||||
glucose tolerance test where person is
|
|
||||||
|
|
||||||
given glucose and then blood samples are
|
|
||||||
|
|
||||||
taken at time intervals to figure out
|
|
||||||
|
|
||||||
how well it's being cleared from the
|
|
||||||
|
|
||||||
blood the most important interval being
|
|
||||||
|
|
||||||
two hours later levels of 140 milligrams
|
|
||||||
|
|
||||||
per deciliter to 199 milligrams per
|
|
||||||
|
|
||||||
deciliter indicate pre-diabetes
|
|
||||||
|
|
||||||
and 200 or above indicates diabetes
|
|
||||||
|
|
||||||
another thing to know is that when blood
|
|
||||||
|
|
||||||
glucose levels get high the glucose can
|
|
||||||
|
|
||||||
also stick to proteins that are floating
|
|
||||||
|
|
||||||
around in the blood or in cells so that
|
|
||||||
|
|
||||||
brings us to another type of test that
|
|
||||||
|
|
||||||
can be done which is the hba1c test
|
|
||||||
|
|
||||||
which tests for the proportion of
|
|
||||||
|
|
||||||
hemoglobin in red blood cells that has
|
|
||||||
|
|
||||||
glucose stuck to it called glycated
|
|
||||||
|
|
||||||
hemoglobin hba1c levels of 5.7% 26.4%
|
|
||||||
|
|
||||||
indicate pre-diabetes
|
|
||||||
|
|
||||||
and 6.5 percent or higher indicates
|
|
||||||
|
|
||||||
diabetes this proportion of glycated
|
|
||||||
|
|
||||||
hemoglobin doesn't change day to day so
|
|
||||||
|
|
||||||
it gives a sense for whether the blood
|
|
||||||
|
|
||||||
glucose levels have been high over the
|
|
||||||
|
|
||||||
past two to three months finally we have
|
|
||||||
|
|
||||||
the c-peptide test which tests for
|
|
||||||
|
|
||||||
byproducts of insulin production if the
|
|
||||||
|
|
||||||
level of c-peptide is low or absent it
|
|
||||||
|
|
||||||
means the pancreas is no longer
|
|
||||||
|
|
||||||
producing enough insulin and the glucose
|
|
||||||
|
|
||||||
cannot enter the cells
|
|
||||||
|
|
||||||
for type one diabetes insulin is the
|
|
||||||
|
|
||||||
only treatment option for type 2
|
|
||||||
|
|
||||||
diabetes on the other hand lifestyle
|
|
||||||
|
|
||||||
changes like weight loss and exercise
|
|
||||||
|
|
||||||
along with a healthy diet and an oral
|
|
||||||
|
|
||||||
anti-diabetic medication like metformin
|
|
||||||
|
|
||||||
in several other classes can sometimes
|
|
||||||
|
|
||||||
be enough to reverse some of that
|
|
||||||
|
|
||||||
insulin resistance and keep blood sugar
|
|
||||||
|
|
||||||
levels in check however if oral
|
|
||||||
|
|
||||||
anti-diabetic medications fail type 2
|
|
||||||
|
|
||||||
diabetes can also be treated with
|
|
||||||
|
|
||||||
insulin something to bear in mind is
|
|
||||||
|
|
||||||
that insulin treatment comes with a risk
|
|
||||||
|
|
||||||
of hypoglycemia especially if insulin is
|
|
||||||
|
|
||||||
taken without a meal symptoms of
|
|
||||||
|
|
||||||
hypoglycemia can be mild like weakness
|
|
||||||
|
|
||||||
hunger and shaking but they can progress
|
|
||||||
|
|
||||||
to a loss of consciousness in seizures
|
|
||||||
|
|
||||||
in severe cases in mild cases drinking
|
|
||||||
|
|
||||||
juices or eating candy or sugar might be
|
|
||||||
|
|
||||||
enough to bring blood sugar up but in
|
|
||||||
|
|
||||||
severe cases intravenous glucose should
|
|
||||||
|
|
||||||
be given as soon as possible
|
|
||||||
|
|
||||||
the FDA has also recently approved
|
|
||||||
|
|
||||||
intranasal glucagon as a treatment for
|
|
||||||
|
|
||||||
severe hypoglycemia all right now over
|
|
||||||
|
|
||||||
time high glucose levels can cause
|
|
||||||
|
|
||||||
damage to tiny blood vessels while the
|
|
||||||
|
|
||||||
micro vasculature in arterioles a
|
|
||||||
|
|
||||||
process called hyaline
|
|
||||||
|
|
||||||
arteriolosclerosis is where the walls of
|
|
||||||
|
|
||||||
the arterioles develop hyaline deposits
|
|
||||||
|
|
||||||
which are deposits of proteins and these
|
|
||||||
|
|
||||||
make them hard and inflexible in
|
|
||||||
|
|
||||||
capillaries the basement membrane can
|
|
||||||
|
|
||||||
thicken and make it difficult for oxygen
|
|
||||||
|
|
||||||
to easily move from the capillary to the
|
|
||||||
|
|
||||||
tissues causing hypoxia
|
|
||||||
|
|
||||||
one of the most significant effects is
|
|
||||||
|
|
||||||
that diabetes increases the risk of
|
|
||||||
|
|
||||||
medium and large arterial wall damage
|
|
||||||
|
|
||||||
and subsequent atherosclerosis which can
|
|
||||||
|
|
||||||
lead to heart attacks and strokes which
|
|
||||||
|
|
||||||
are major causes of morbidity and
|
|
||||||
|
|
||||||
mortality for patients with diabetes in
|
|
||||||
|
|
||||||
the eyes diabetes can lead to
|
|
||||||
|
|
||||||
retinopathy and evidence of that can be
|
|
||||||
|
|
||||||
seen on a fundus copic exam that shows
|
|
||||||
|
|
||||||
cotton-wool spots or flare hemorrhages
|
|
||||||
|
|
||||||
and can eventually cause blindness in
|
|
||||||
|
|
||||||
the kidneys the a ferrant and efferent
|
|
||||||
|
|
||||||
arterioles as well as the glomerulus
|
|
||||||
|
|
||||||
itself can get damaged which can lead to
|
|
||||||
|
|
||||||
an F Radek syndrome that slowly
|
|
||||||
|
|
||||||
diminishes the kidneys ability to filter
|
|
||||||
|
|
||||||
blood over time and can ultimately lead
|
|
||||||
|
|
||||||
to dialysis diabetes can also affect the
|
|
||||||
|
|
||||||
function of nerves causing symptoms like
|
|
||||||
|
|
||||||
a decrease in sensation in the toes and
|
|
||||||
|
|
||||||
fingers sometimes called a stocking
|
|
||||||
|
|
||||||
glove distribution as well as causes the
|
|
||||||
|
|
||||||
autonomic nervous system to malfunction
|
|
||||||
|
|
||||||
and that system controls a number of
|
|
||||||
|
|
||||||
body functions
|
|
||||||
|
|
||||||
everything from sweating to passing gas
|
|
||||||
|
|
||||||
finally both the poor blood supply and
|
|
||||||
|
|
||||||
nerve damage can lead to ulcers
|
|
||||||
|
|
||||||
typically on the feet that don't heal
|
|
||||||
|
|
||||||
quickly and can get pretty severe and
|
|
||||||
|
|
||||||
need to be amputated these are some of
|
|
||||||
|
|
||||||
the complications of uncontrolled
|
|
||||||
|
|
||||||
diabetes which is why it's important to
|
|
||||||
|
|
||||||
diagnose and control diabetes through a
|
|
||||||
|
|
||||||
healthy lifestyle medications to reduce
|
|
||||||
|
|
||||||
insulin resistance and even insulin
|
|
||||||
|
|
||||||
therapy if beta cells have been
|
|
||||||
|
|
||||||
exhausted while type 1 diabetes cannot
|
|
||||||
|
|
||||||
be prevented type 2 diabetes can in fact
|
|
||||||
|
|
||||||
many people with diabetes can control
|
|
||||||
|
|
||||||
their blood sugar levels really
|
|
||||||
|
|
||||||
effectively and live a full and active
|
|
||||||
|
|
||||||
life without any of the complications
|
|
||||||
|
|
||||||
thanks for watching if you're interested
|
|
||||||
|
|
||||||
in a deeper dive on this topic take a
|
|
||||||
|
|
||||||
look at as Moses org where we have
|
|
||||||
|
|
||||||
flashcards questions and other awesome
|
|
||||||
|
|
||||||
tools to help you learn medicine
|
|
||||||
|
|
||||||
you
|
|
||||||
|
|
||||||
@@ -3,8 +3,9 @@
|
|||||||
This repository hold an API for the GPU implementation of the Reflector API service,
|
This repository hold an API for the GPU implementation of the Reflector API service,
|
||||||
and use [Modal.com](https://modal.com)
|
and use [Modal.com](https://modal.com)
|
||||||
|
|
||||||
- `reflector_llm.py` - LLM API
|
- `reflector_diarizer.py` - Diarization API
|
||||||
- `reflector_transcriber.py` - Transcription API
|
- `reflector_transcriber.py` - Transcription API
|
||||||
|
- `reflector_translator.py` - Translation API
|
||||||
|
|
||||||
## Modal.com deployment
|
## Modal.com deployment
|
||||||
|
|
||||||
@@ -23,16 +24,20 @@ $ modal deploy reflector_llm.py
|
|||||||
└── 🔨 Created web => https://xxxx--reflector-llm-web.modal.run
|
└── 🔨 Created web => https://xxxx--reflector-llm-web.modal.run
|
||||||
```
|
```
|
||||||
|
|
||||||
Then in your reflector api configuration `.env`, you can set theses keys:
|
Then in your reflector api configuration `.env`, you can set these keys:
|
||||||
|
|
||||||
```
|
```
|
||||||
TRANSCRIPT_BACKEND=modal
|
TRANSCRIPT_BACKEND=modal
|
||||||
TRANSCRIPT_URL=https://xxxx--reflector-transcriber-web.modal.run
|
TRANSCRIPT_URL=https://xxxx--reflector-transcriber-web.modal.run
|
||||||
TRANSCRIPT_MODAL_API_KEY=REFLECTOR_APIKEY
|
TRANSCRIPT_MODAL_API_KEY=REFLECTOR_APIKEY
|
||||||
|
|
||||||
LLM_BACKEND=modal
|
DIARIZATION_BACKEND=modal
|
||||||
LLM_URL=https://xxxx--reflector-llm-web.modal.run
|
DIARIZATION_URL=https://xxxx--reflector-diarizer-web.modal.run
|
||||||
LLM_MODAL_API_KEY=REFLECTOR_APIKEY
|
DIARIZATION_MODAL_API_KEY=REFLECTOR_APIKEY
|
||||||
|
|
||||||
|
TRANSLATION_BACKEND=modal
|
||||||
|
TRANSLATION_URL=https://xxxx--reflector-translator-web.modal.run
|
||||||
|
TRANSLATION_MODAL_API_KEY=REFLECTOR_APIKEY
|
||||||
```
|
```
|
||||||
|
|
||||||
## API
|
## API
|
||||||
|
|||||||
@@ -1,214 +0,0 @@
|
|||||||
"""
|
|
||||||
Reflector GPU backend - LLM
|
|
||||||
===========================
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import modal
|
|
||||||
from modal import App, Image, Secret, asgi_app, enter, exit, method
|
|
||||||
|
|
||||||
# LLM
|
|
||||||
LLM_MODEL: str = "lmsys/vicuna-13b-v1.5"
|
|
||||||
LLM_LOW_CPU_MEM_USAGE: bool = True
|
|
||||||
LLM_TORCH_DTYPE: str = "bfloat16"
|
|
||||||
LLM_MAX_NEW_TOKENS: int = 300
|
|
||||||
|
|
||||||
IMAGE_MODEL_DIR = "/root/llm_models"
|
|
||||||
|
|
||||||
app = App(name="reflector-llm")
|
|
||||||
|
|
||||||
|
|
||||||
def download_llm():
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
|
|
||||||
print("Downloading LLM model")
|
|
||||||
snapshot_download(LLM_MODEL, cache_dir=IMAGE_MODEL_DIR)
|
|
||||||
print("LLM model downloaded")
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_cache_llm():
|
|
||||||
"""
|
|
||||||
XXX The cache for model files in Transformers v4.22.0 has been updated.
|
|
||||||
Migrating your old cache. This is a one-time only operation. You can
|
|
||||||
interrupt this and resume the migration later on by calling
|
|
||||||
`transformers.utils.move_cache()`.
|
|
||||||
"""
|
|
||||||
from transformers.utils.hub import move_cache
|
|
||||||
|
|
||||||
print("Moving LLM cache")
|
|
||||||
move_cache(cache_dir=IMAGE_MODEL_DIR, new_cache_dir=IMAGE_MODEL_DIR)
|
|
||||||
print("LLM cache moved")
|
|
||||||
|
|
||||||
|
|
||||||
llm_image = (
|
|
||||||
Image.debian_slim(python_version="3.10.8")
|
|
||||||
.apt_install("git")
|
|
||||||
.pip_install(
|
|
||||||
"transformers",
|
|
||||||
"torch",
|
|
||||||
"sentencepiece",
|
|
||||||
"protobuf",
|
|
||||||
"jsonformer==0.12.0",
|
|
||||||
"accelerate==0.21.0",
|
|
||||||
"einops==0.6.1",
|
|
||||||
"hf-transfer~=0.1",
|
|
||||||
"huggingface_hub==0.16.4",
|
|
||||||
)
|
|
||||||
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
|
|
||||||
.run_function(download_llm)
|
|
||||||
.run_function(migrate_cache_llm)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.cls(
|
|
||||||
gpu="A100",
|
|
||||||
timeout=60 * 5,
|
|
||||||
scaledown_window=60 * 5,
|
|
||||||
allow_concurrent_inputs=15,
|
|
||||||
image=llm_image,
|
|
||||||
)
|
|
||||||
class LLM:
|
|
||||||
@enter()
|
|
||||||
def enter(self):
|
|
||||||
import torch
|
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
|
||||||
|
|
||||||
print("Instance llm model")
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
|
||||||
LLM_MODEL,
|
|
||||||
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
|
|
||||||
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
|
|
||||||
cache_dir=IMAGE_MODEL_DIR,
|
|
||||||
local_files_only=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# JSONFormer doesn't yet support generation configs
|
|
||||||
print("Instance llm generation config")
|
|
||||||
model.config.max_new_tokens = LLM_MAX_NEW_TOKENS
|
|
||||||
|
|
||||||
# generation configuration
|
|
||||||
gen_cfg = GenerationConfig.from_model_config(model.config)
|
|
||||||
gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS
|
|
||||||
|
|
||||||
# load tokenizer
|
|
||||||
print("Instance llm tokenizer")
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
|
||||||
LLM_MODEL, cache_dir=IMAGE_MODEL_DIR, local_files_only=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# move model to gpu
|
|
||||||
print("Move llm model to GPU")
|
|
||||||
model = model.cuda()
|
|
||||||
|
|
||||||
print("Warmup llm done")
|
|
||||||
self.model = model
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
self.gen_cfg = gen_cfg
|
|
||||||
self.GenerationConfig = GenerationConfig
|
|
||||||
|
|
||||||
self.lock = threading.Lock()
|
|
||||||
|
|
||||||
@exit()
|
|
||||||
def exit():
|
|
||||||
print("Exit llm")
|
|
||||||
|
|
||||||
@method()
|
|
||||||
def generate(
|
|
||||||
self, prompt: str, gen_schema: str | None, gen_cfg: str | None
|
|
||||||
) -> dict:
|
|
||||||
"""
|
|
||||||
Perform a generation action using the LLM
|
|
||||||
"""
|
|
||||||
print(f"Generate {prompt=}")
|
|
||||||
if gen_cfg:
|
|
||||||
gen_cfg = self.GenerationConfig.from_dict(json.loads(gen_cfg))
|
|
||||||
else:
|
|
||||||
gen_cfg = self.gen_cfg
|
|
||||||
|
|
||||||
# If a gen_schema is given, conform to gen_schema
|
|
||||||
with self.lock:
|
|
||||||
if gen_schema:
|
|
||||||
import jsonformer
|
|
||||||
|
|
||||||
print(f"Schema {gen_schema=}")
|
|
||||||
jsonformer_llm = jsonformer.Jsonformer(
|
|
||||||
model=self.model,
|
|
||||||
tokenizer=self.tokenizer,
|
|
||||||
json_schema=json.loads(gen_schema),
|
|
||||||
prompt=prompt,
|
|
||||||
max_string_token_length=gen_cfg.max_new_tokens,
|
|
||||||
)
|
|
||||||
response = jsonformer_llm()
|
|
||||||
else:
|
|
||||||
# If no gen_schema, perform prompt only generation
|
|
||||||
|
|
||||||
# tokenize prompt
|
|
||||||
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
|
|
||||||
self.model.device
|
|
||||||
)
|
|
||||||
output = self.model.generate(input_ids, generation_config=gen_cfg)
|
|
||||||
|
|
||||||
# decode output
|
|
||||||
response = self.tokenizer.decode(
|
|
||||||
output[0].cpu(), skip_special_tokens=True
|
|
||||||
)
|
|
||||||
response = response[len(prompt) :]
|
|
||||||
print(f"Generated {response=}")
|
|
||||||
return {"text": response}
|
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------
|
|
||||||
# Web API
|
|
||||||
# -------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
@app.function(
|
|
||||||
scaledown_window=60 * 10,
|
|
||||||
timeout=60 * 5,
|
|
||||||
allow_concurrent_inputs=45,
|
|
||||||
secrets=[
|
|
||||||
Secret.from_name("reflector-gpu"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
@asgi_app()
|
|
||||||
def web():
|
|
||||||
from fastapi import Depends, FastAPI, HTTPException, status
|
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
llmstub = LLM()
|
|
||||||
|
|
||||||
app = FastAPI()
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
|
||||||
|
|
||||||
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
|
|
||||||
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Invalid API key",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
|
|
||||||
class LLMRequest(BaseModel):
|
|
||||||
prompt: str
|
|
||||||
gen_schema: Optional[dict] = None
|
|
||||||
gen_cfg: Optional[dict] = None
|
|
||||||
|
|
||||||
@app.post("/llm", dependencies=[Depends(apikey_auth)])
|
|
||||||
def llm(
|
|
||||||
req: LLMRequest,
|
|
||||||
):
|
|
||||||
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
|
|
||||||
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
|
|
||||||
func = llmstub.generate.spawn(
|
|
||||||
prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg
|
|
||||||
)
|
|
||||||
result = func.get()
|
|
||||||
return result
|
|
||||||
|
|
||||||
return app
|
|
||||||
@@ -1,220 +0,0 @@
|
|||||||
"""
|
|
||||||
Reflector GPU backend - LLM
|
|
||||||
===========================
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import modal
|
|
||||||
from modal import App, Image, Secret, asgi_app, enter, exit, method
|
|
||||||
|
|
||||||
# LLM
|
|
||||||
LLM_MODEL: str = "HuggingFaceH4/zephyr-7b-alpha"
|
|
||||||
LLM_LOW_CPU_MEM_USAGE: bool = True
|
|
||||||
LLM_TORCH_DTYPE: str = "bfloat16"
|
|
||||||
LLM_MAX_NEW_TOKENS: int = 300
|
|
||||||
|
|
||||||
IMAGE_MODEL_DIR = "/root/llm_models/zephyr"
|
|
||||||
|
|
||||||
app = App(name="reflector-llm-zephyr")
|
|
||||||
|
|
||||||
|
|
||||||
def download_llm():
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
|
|
||||||
print("Downloading LLM model")
|
|
||||||
snapshot_download(LLM_MODEL, cache_dir=IMAGE_MODEL_DIR)
|
|
||||||
print("LLM model downloaded")
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_cache_llm():
|
|
||||||
"""
|
|
||||||
XXX The cache for model files in Transformers v4.22.0 has been updated.
|
|
||||||
Migrating your old cache. This is a one-time only operation. You can
|
|
||||||
interrupt this and resume the migration later on by calling
|
|
||||||
`transformers.utils.move_cache()`.
|
|
||||||
"""
|
|
||||||
from transformers.utils.hub import move_cache
|
|
||||||
|
|
||||||
print("Moving LLM cache")
|
|
||||||
move_cache(cache_dir=IMAGE_MODEL_DIR, new_cache_dir=IMAGE_MODEL_DIR)
|
|
||||||
print("LLM cache moved")
|
|
||||||
|
|
||||||
|
|
||||||
llm_image = (
|
|
||||||
Image.debian_slim(python_version="3.10.8")
|
|
||||||
.apt_install("git")
|
|
||||||
.pip_install(
|
|
||||||
"transformers==4.34.0",
|
|
||||||
"torch",
|
|
||||||
"sentencepiece",
|
|
||||||
"protobuf",
|
|
||||||
"jsonformer==0.12.0",
|
|
||||||
"accelerate==0.21.0",
|
|
||||||
"einops==0.6.1",
|
|
||||||
"hf-transfer~=0.1",
|
|
||||||
"huggingface_hub==0.16.4",
|
|
||||||
)
|
|
||||||
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
|
|
||||||
.run_function(download_llm)
|
|
||||||
.run_function(migrate_cache_llm)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.cls(
|
|
||||||
gpu="A10G",
|
|
||||||
timeout=60 * 5,
|
|
||||||
scaledown_window=60 * 5,
|
|
||||||
allow_concurrent_inputs=10,
|
|
||||||
image=llm_image,
|
|
||||||
)
|
|
||||||
class LLM:
|
|
||||||
@enter()
|
|
||||||
def enter(self):
|
|
||||||
import torch
|
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
|
||||||
|
|
||||||
print("Instance llm model")
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
|
||||||
LLM_MODEL,
|
|
||||||
torch_dtype=getattr(torch, LLM_TORCH_DTYPE),
|
|
||||||
low_cpu_mem_usage=LLM_LOW_CPU_MEM_USAGE,
|
|
||||||
cache_dir=IMAGE_MODEL_DIR,
|
|
||||||
local_files_only=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# JSONFormer doesn't yet support generation configs
|
|
||||||
print("Instance llm generation config")
|
|
||||||
model.config.max_new_tokens = LLM_MAX_NEW_TOKENS
|
|
||||||
|
|
||||||
# generation configuration
|
|
||||||
gen_cfg = GenerationConfig.from_model_config(model.config)
|
|
||||||
gen_cfg.max_new_tokens = LLM_MAX_NEW_TOKENS
|
|
||||||
|
|
||||||
# load tokenizer
|
|
||||||
print("Instance llm tokenizer")
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
|
||||||
LLM_MODEL, cache_dir=IMAGE_MODEL_DIR, local_files_only=True
|
|
||||||
)
|
|
||||||
gen_cfg.pad_token_id = tokenizer.eos_token_id
|
|
||||||
gen_cfg.eos_token_id = tokenizer.eos_token_id
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
model.config.pad_token_id = tokenizer.eos_token_id
|
|
||||||
|
|
||||||
# move model to gpu
|
|
||||||
print("Move llm model to GPU")
|
|
||||||
model = model.cuda()
|
|
||||||
|
|
||||||
print("Warmup llm done")
|
|
||||||
self.model = model
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
self.gen_cfg = gen_cfg
|
|
||||||
self.GenerationConfig = GenerationConfig
|
|
||||||
self.lock = threading.Lock()
|
|
||||||
|
|
||||||
@exit()
|
|
||||||
def exit():
|
|
||||||
print("Exit llm")
|
|
||||||
|
|
||||||
@method()
|
|
||||||
def generate(
|
|
||||||
self, prompt: str, gen_schema: str | None, gen_cfg: str | None
|
|
||||||
) -> dict:
|
|
||||||
"""
|
|
||||||
Perform a generation action using the LLM
|
|
||||||
"""
|
|
||||||
print(f"Generate {prompt=}")
|
|
||||||
if gen_cfg:
|
|
||||||
gen_cfg = self.GenerationConfig.from_dict(json.loads(gen_cfg))
|
|
||||||
gen_cfg.pad_token_id = self.tokenizer.eos_token_id
|
|
||||||
gen_cfg.eos_token_id = self.tokenizer.eos_token_id
|
|
||||||
else:
|
|
||||||
gen_cfg = self.gen_cfg
|
|
||||||
|
|
||||||
# If a gen_schema is given, conform to gen_schema
|
|
||||||
with self.lock:
|
|
||||||
if gen_schema:
|
|
||||||
import jsonformer
|
|
||||||
|
|
||||||
print(f"Schema {gen_schema=}")
|
|
||||||
jsonformer_llm = jsonformer.Jsonformer(
|
|
||||||
model=self.model,
|
|
||||||
tokenizer=self.tokenizer,
|
|
||||||
json_schema=json.loads(gen_schema),
|
|
||||||
prompt=prompt,
|
|
||||||
max_string_token_length=gen_cfg.max_new_tokens,
|
|
||||||
)
|
|
||||||
response = jsonformer_llm()
|
|
||||||
else:
|
|
||||||
# If no gen_schema, perform prompt only generation
|
|
||||||
|
|
||||||
# tokenize prompt
|
|
||||||
input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
|
|
||||||
self.model.device
|
|
||||||
)
|
|
||||||
output = self.model.generate(input_ids, generation_config=gen_cfg)
|
|
||||||
|
|
||||||
# decode output
|
|
||||||
response = self.tokenizer.decode(
|
|
||||||
output[0].cpu(), skip_special_tokens=True
|
|
||||||
)
|
|
||||||
response = response[len(prompt) :]
|
|
||||||
response = {"long_summary": response}
|
|
||||||
print(f"Generated {response=}")
|
|
||||||
return {"text": response}
|
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------
|
|
||||||
# Web API
|
|
||||||
# -------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
@app.function(
|
|
||||||
scaledown_window=60 * 10,
|
|
||||||
timeout=60 * 5,
|
|
||||||
allow_concurrent_inputs=30,
|
|
||||||
secrets=[
|
|
||||||
Secret.from_name("reflector-gpu"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
@asgi_app()
|
|
||||||
def web():
|
|
||||||
from fastapi import Depends, FastAPI, HTTPException, status
|
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
llmstub = LLM()
|
|
||||||
|
|
||||||
app = FastAPI()
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
|
||||||
|
|
||||||
def apikey_auth(apikey: str = Depends(oauth2_scheme)):
|
|
||||||
if apikey != os.environ["REFLECTOR_GPU_APIKEY"]:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Invalid API key",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
|
|
||||||
class LLMRequest(BaseModel):
|
|
||||||
prompt: str
|
|
||||||
gen_schema: Optional[dict] = None
|
|
||||||
gen_cfg: Optional[dict] = None
|
|
||||||
|
|
||||||
@app.post("/llm", dependencies=[Depends(apikey_auth)])
|
|
||||||
def llm(
|
|
||||||
req: LLMRequest,
|
|
||||||
):
|
|
||||||
gen_schema = json.dumps(req.gen_schema) if req.gen_schema else None
|
|
||||||
gen_cfg = json.dumps(req.gen_cfg) if req.gen_cfg else None
|
|
||||||
func = llmstub.generate.spawn(
|
|
||||||
prompt=req.prompt, gen_schema=gen_schema, gen_cfg=gen_cfg
|
|
||||||
)
|
|
||||||
result = func.get()
|
|
||||||
return result
|
|
||||||
|
|
||||||
return app
|
|
||||||
@@ -1,171 +0,0 @@
|
|||||||
# # Run an OpenAI-Compatible vLLM Server
|
|
||||||
|
|
||||||
import modal
|
|
||||||
|
|
||||||
MODELS_DIR = "/llamas"
|
|
||||||
MODEL_NAME = "NousResearch/Hermes-3-Llama-3.1-8B"
|
|
||||||
N_GPU = 1
|
|
||||||
|
|
||||||
|
|
||||||
def download_llm():
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
|
|
||||||
print("Downloading LLM model")
|
|
||||||
snapshot_download(
|
|
||||||
MODEL_NAME,
|
|
||||||
local_dir=f"{MODELS_DIR}/{MODEL_NAME}",
|
|
||||||
ignore_patterns=[
|
|
||||||
"*.pt",
|
|
||||||
"*.bin",
|
|
||||||
"*.pth",
|
|
||||||
"original/*",
|
|
||||||
], # Ensure safetensors
|
|
||||||
)
|
|
||||||
print("LLM model downloaded")
|
|
||||||
|
|
||||||
|
|
||||||
def move_cache():
|
|
||||||
from transformers.utils import move_cache as transformers_move_cache
|
|
||||||
|
|
||||||
transformers_move_cache()
|
|
||||||
|
|
||||||
|
|
||||||
vllm_image = (
|
|
||||||
modal.Image.debian_slim(python_version="3.10")
|
|
||||||
.pip_install("vllm==0.5.3post1")
|
|
||||||
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
|
|
||||||
.pip_install(
|
|
||||||
# "accelerate==0.34.2",
|
|
||||||
"einops==0.8.0",
|
|
||||||
"hf-transfer~=0.1",
|
|
||||||
)
|
|
||||||
.run_function(download_llm)
|
|
||||||
.run_function(move_cache)
|
|
||||||
.pip_install(
|
|
||||||
"bitsandbytes>=0.42.9",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
app = modal.App("reflector-vllm-hermes3")
|
|
||||||
|
|
||||||
|
|
||||||
@app.function(
|
|
||||||
image=vllm_image,
|
|
||||||
gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
|
|
||||||
timeout=60 * 5,
|
|
||||||
scaledown_window=60 * 5,
|
|
||||||
allow_concurrent_inputs=100,
|
|
||||||
secrets=[
|
|
||||||
modal.Secret.from_name("reflector-gpu"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
@modal.asgi_app()
|
|
||||||
def serve():
|
|
||||||
import os
|
|
||||||
|
|
||||||
import fastapi
|
|
||||||
import vllm.entrypoints.openai.api_server as api_server
|
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
|
||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|
||||||
from vllm.entrypoints.logger import RequestLogger
|
|
||||||
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
|
||||||
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
|
||||||
from vllm.usage.usage_lib import UsageContext
|
|
||||||
|
|
||||||
TOKEN = os.environ["REFLECTOR_GPU_APIKEY"]
|
|
||||||
|
|
||||||
# create a fastAPI app that uses vLLM's OpenAI-compatible router
|
|
||||||
web_app = fastapi.FastAPI(
|
|
||||||
title=f"OpenAI-compatible {MODEL_NAME} server",
|
|
||||||
description="Run an OpenAI-compatible LLM server with vLLM on modal.com",
|
|
||||||
version="0.0.1",
|
|
||||||
docs_url="/docs",
|
|
||||||
)
|
|
||||||
|
|
||||||
# security: CORS middleware for external requests
|
|
||||||
http_bearer = fastapi.security.HTTPBearer(
|
|
||||||
scheme_name="Bearer Token",
|
|
||||||
description="See code for authentication details.",
|
|
||||||
)
|
|
||||||
web_app.add_middleware(
|
|
||||||
fastapi.middleware.cors.CORSMiddleware,
|
|
||||||
allow_origins=["*"],
|
|
||||||
allow_credentials=True,
|
|
||||||
allow_methods=["*"],
|
|
||||||
allow_headers=["*"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# security: inject dependency on authed routes
|
|
||||||
async def is_authenticated(api_key: str = fastapi.Security(http_bearer)):
|
|
||||||
if api_key.credentials != TOKEN:
|
|
||||||
raise fastapi.HTTPException(
|
|
||||||
status_code=fastapi.status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Invalid authentication credentials",
|
|
||||||
)
|
|
||||||
return {"username": "authenticated_user"}
|
|
||||||
|
|
||||||
router = fastapi.APIRouter(dependencies=[fastapi.Depends(is_authenticated)])
|
|
||||||
|
|
||||||
# wrap vllm's router in auth router
|
|
||||||
router.include_router(api_server.router)
|
|
||||||
# add authed vllm to our fastAPI app
|
|
||||||
web_app.include_router(router)
|
|
||||||
|
|
||||||
engine_args = AsyncEngineArgs(
|
|
||||||
model=MODELS_DIR + "/" + MODEL_NAME,
|
|
||||||
tensor_parallel_size=N_GPU,
|
|
||||||
gpu_memory_utilization=0.90,
|
|
||||||
# max_model_len=8096,
|
|
||||||
enforce_eager=False, # capture the graph for faster inference, but slower cold starts (30s > 20s)
|
|
||||||
# --- 4 bits load
|
|
||||||
# quantization="bitsandbytes",
|
|
||||||
# load_format="bitsandbytes",
|
|
||||||
)
|
|
||||||
|
|
||||||
engine = AsyncLLMEngine.from_engine_args(
|
|
||||||
engine_args, usage_context=UsageContext.OPENAI_API_SERVER
|
|
||||||
)
|
|
||||||
|
|
||||||
model_config = get_model_config(engine)
|
|
||||||
|
|
||||||
request_logger = RequestLogger(max_log_len=2048)
|
|
||||||
|
|
||||||
api_server.openai_serving_chat = OpenAIServingChat(
|
|
||||||
engine,
|
|
||||||
model_config=model_config,
|
|
||||||
served_model_names=[MODEL_NAME],
|
|
||||||
chat_template=None,
|
|
||||||
response_role="assistant",
|
|
||||||
lora_modules=[],
|
|
||||||
prompt_adapters=[],
|
|
||||||
request_logger=request_logger,
|
|
||||||
)
|
|
||||||
api_server.openai_serving_completion = OpenAIServingCompletion(
|
|
||||||
engine,
|
|
||||||
model_config=model_config,
|
|
||||||
served_model_names=[MODEL_NAME],
|
|
||||||
lora_modules=[],
|
|
||||||
prompt_adapters=[],
|
|
||||||
request_logger=request_logger,
|
|
||||||
)
|
|
||||||
|
|
||||||
return web_app
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_config(engine):
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
try: # adapted from vLLM source -- https://github.com/vllm-project/vllm/blob/507ef787d85dec24490069ffceacbd6b161f4f72/vllm/entrypoints/openai/api_server.py#L235C1-L247C1
|
|
||||||
event_loop = asyncio.get_running_loop()
|
|
||||||
except RuntimeError:
|
|
||||||
event_loop = None
|
|
||||||
|
|
||||||
if event_loop is not None and event_loop.is_running():
|
|
||||||
# If the current is instanced by Ray Serve,
|
|
||||||
# there is already a running event loop
|
|
||||||
model_config = event_loop.run_until_complete(engine.get_model_config())
|
|
||||||
else:
|
|
||||||
# When using single vLLM without engine_use_ray
|
|
||||||
model_config = asyncio.run(engine.get_model_config())
|
|
||||||
|
|
||||||
return model_config
|
|
||||||
@@ -1 +1,3 @@
|
|||||||
Generic single-database configuration.
|
Generic single-database configuration.
|
||||||
|
|
||||||
|
Both data migrations and schema migrations must be in migrations.
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
from logging.config import fileConfig
|
from logging.config import fileConfig
|
||||||
|
|
||||||
from alembic import context
|
from alembic import context
|
||||||
|
from sqlalchemy import engine_from_config, pool
|
||||||
|
|
||||||
from reflector.db import metadata
|
from reflector.db import metadata
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from sqlalchemy import engine_from_config, pool
|
|
||||||
|
|
||||||
# this is the Alembic Config object, which provides
|
# this is the Alembic Config object, which provides
|
||||||
# access to the values within the .ini file in use.
|
# access to the values within the .ini file in use.
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ Create Date: 2024-09-24 16:12:56.944133
|
|||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
import sqlalchemy as sa
|
|
||||||
from alembic import op
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
|
|||||||
@@ -0,0 +1,25 @@
|
|||||||
|
"""add_webvtt_field_to_transcript
|
||||||
|
|
||||||
|
Revision ID: 0bc0f3ff0111
|
||||||
|
Revises: b7df9609542c
|
||||||
|
Create Date: 2025-08-05 19:36:41.740957
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "0bc0f3ff0111"
|
||||||
|
down_revision: Union[str, None] = "b7df9609542c"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column("transcript", sa.Column("webvtt", sa.Text(), nullable=True))
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("transcript", "webvtt")
|
||||||
@@ -5,11 +5,11 @@ Revises: f819277e5169
|
|||||||
Create Date: 2023-11-07 11:12:21.614198
|
Create Date: 2023-11-07 11:12:21.614198
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "0fea6d96b096"
|
revision: str = "0fea6d96b096"
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""add_full_text_search
|
||||||
|
|
||||||
|
Revision ID: 116b2f287eab
|
||||||
|
Revises: 0bc0f3ff0111
|
||||||
|
Create Date: 2025-08-07 11:27:38.473517
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "116b2f287eab"
|
||||||
|
down_revision: Union[str, None] = "0bc0f3ff0111"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
conn = op.get_bind()
|
||||||
|
if conn.dialect.name != "postgresql":
|
||||||
|
return
|
||||||
|
|
||||||
|
op.execute("""
|
||||||
|
ALTER TABLE transcript ADD COLUMN search_vector_en tsvector
|
||||||
|
GENERATED ALWAYS AS (
|
||||||
|
setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
|
||||||
|
setweight(to_tsvector('english', coalesce(webvtt, '')), 'B')
|
||||||
|
) STORED
|
||||||
|
""")
|
||||||
|
|
||||||
|
op.create_index(
|
||||||
|
"idx_transcript_search_vector_en",
|
||||||
|
"transcript",
|
||||||
|
["search_vector_en"],
|
||||||
|
postgresql_using="gin",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
conn = op.get_bind()
|
||||||
|
if conn.dialect.name != "postgresql":
|
||||||
|
return
|
||||||
|
|
||||||
|
op.drop_index("idx_transcript_search_vector_en", table_name="transcript")
|
||||||
|
op.drop_column("transcript", "search_vector_en")
|
||||||
@@ -5,26 +5,26 @@ Revises: 0fea6d96b096
|
|||||||
Create Date: 2023-11-30 15:56:03.341466
|
Create Date: 2023-11-30 15:56:03.341466
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = '125031f7cb78'
|
revision: str = "125031f7cb78"
|
||||||
down_revision: Union[str, None] = '0fea6d96b096'
|
down_revision: Union[str, None] = "0fea6d96b096"
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.add_column('transcript', sa.Column('participants', sa.JSON(), nullable=True))
|
op.add_column("transcript", sa.Column("participants", sa.JSON(), nullable=True))
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.drop_column('transcript', 'participants')
|
op.drop_column("transcript", "participants")
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ Revises: f819277e5169
|
|||||||
Create Date: 2025-06-17 14:00:03.000000
|
Create Date: 2025-06-17 14:00:03.000000
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
@@ -19,16 +20,16 @@ depends_on: Union[str, Sequence[str], None] = None
|
|||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
op.create_table(
|
op.create_table(
|
||||||
'meeting_consent',
|
"meeting_consent",
|
||||||
sa.Column('id', sa.String(), nullable=False),
|
sa.Column("id", sa.String(), nullable=False),
|
||||||
sa.Column('meeting_id', sa.String(), nullable=False),
|
sa.Column("meeting_id", sa.String(), nullable=False),
|
||||||
sa.Column('user_id', sa.String(), nullable=True),
|
sa.Column("user_id", sa.String(), nullable=True),
|
||||||
sa.Column('consent_given', sa.Boolean(), nullable=False),
|
sa.Column("consent_given", sa.Boolean(), nullable=False),
|
||||||
sa.Column('consent_timestamp', sa.DateTime(), nullable=False),
|
sa.Column("consent_timestamp", sa.DateTime(), nullable=False),
|
||||||
sa.PrimaryKeyConstraint('id'),
|
sa.PrimaryKeyConstraint("id"),
|
||||||
sa.ForeignKeyConstraint(['meeting_id'], ['meeting.id']),
|
sa.ForeignKeyConstraint(["meeting_id"], ["meeting.id"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
op.drop_table('meeting_consent')
|
op.drop_table("meeting_consent")
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ Revises: 20250617140003
|
|||||||
Create Date: 2025-06-18 14:00:00.000000
|
Create Date: 2025-06-18 14:00:00.000000
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
@@ -22,4 +23,4 @@ def upgrade() -> None:
|
|||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
op.drop_column("transcript", "audio_deleted")
|
op.drop_column("transcript", "audio_deleted")
|
||||||
|
|||||||
@@ -5,36 +5,40 @@ Revises: ccd68dc784ff
|
|||||||
Create Date: 2025-07-15 16:53:40.397394
|
Create Date: 2025-07-15 16:53:40.397394
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = '2cf0b60a9d34'
|
revision: str = "2cf0b60a9d34"
|
||||||
down_revision: Union[str, None] = 'ccd68dc784ff'
|
down_revision: Union[str, None] = "ccd68dc784ff"
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
with op.batch_alter_table('transcript', schema=None) as batch_op:
|
with op.batch_alter_table("transcript", schema=None) as batch_op:
|
||||||
batch_op.alter_column('duration',
|
batch_op.alter_column(
|
||||||
existing_type=sa.INTEGER(),
|
"duration",
|
||||||
type_=sa.Float(),
|
existing_type=sa.INTEGER(),
|
||||||
existing_nullable=True)
|
type_=sa.Float(),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
with op.batch_alter_table('transcript', schema=None) as batch_op:
|
with op.batch_alter_table("transcript", schema=None) as batch_op:
|
||||||
batch_op.alter_column('duration',
|
batch_op.alter_column(
|
||||||
existing_type=sa.Float(),
|
"duration",
|
||||||
type_=sa.INTEGER(),
|
existing_type=sa.Float(),
|
||||||
existing_nullable=True)
|
type_=sa.INTEGER(),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|||||||
@@ -5,17 +5,17 @@ Revises: 9920ecfe2735
|
|||||||
Create Date: 2023-11-02 19:53:09.116240
|
Create Date: 2023-11-02 19:53:09.116240
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
from sqlalchemy.sql import table, column
|
from alembic import op
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.sql import column, table
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = '38a927dcb099'
|
revision: str = "38a927dcb099"
|
||||||
down_revision: Union[str, None] = '9920ecfe2735'
|
down_revision: Union[str, None] = "9920ecfe2735"
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|||||||
@@ -5,13 +5,13 @@ Revises: 38a927dcb099
|
|||||||
Create Date: 2023-11-10 18:12:17.886522
|
Create Date: 2023-11-10 18:12:17.886522
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
from sqlalchemy.sql import table, column
|
from alembic import op
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.sql import column, table
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "4814901632bc"
|
revision: str = "4814901632bc"
|
||||||
@@ -24,9 +24,11 @@ def upgrade() -> None:
|
|||||||
# for all the transcripts, calculate the duration from the mp3
|
# for all the transcripts, calculate the duration from the mp3
|
||||||
# and update the duration column
|
# and update the duration column
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from reflector.settings import settings
|
|
||||||
import av
|
import av
|
||||||
|
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
bind = op.get_bind()
|
bind = op.get_bind()
|
||||||
transcript = table(
|
transcript = table(
|
||||||
"transcript", column("id", sa.String), column("duration", sa.Float)
|
"transcript", column("id", sa.String), column("duration", sa.Float)
|
||||||
|
|||||||
@@ -5,14 +5,11 @@ Revises:
|
|||||||
Create Date: 2023-08-29 10:54:45.142974
|
Create Date: 2023-08-29 10:54:45.142974
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
|
||||||
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = '543ed284d69a'
|
revision: str = "543ed284d69a"
|
||||||
down_revision: Union[str, None] = None
|
down_revision: Union[str, None] = None
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|||||||
@@ -0,0 +1,53 @@
|
|||||||
|
"""remove_one_active_meeting_per_room_constraint
|
||||||
|
|
||||||
|
Revision ID: 6025e9b2bef2
|
||||||
|
Revises: 9f5c78d352d6
|
||||||
|
Create Date: 2025-08-18 18:45:44.418392
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "6025e9b2bef2"
|
||||||
|
down_revision: Union[str, None] = "9f5c78d352d6"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# Remove the unique constraint that prevents multiple active meetings per room
|
||||||
|
# This is needed to support calendar integration with overlapping meetings
|
||||||
|
# Check if index exists before trying to drop it
|
||||||
|
from alembic import context
|
||||||
|
|
||||||
|
if context.get_context().dialect.name == "postgresql":
|
||||||
|
conn = op.get_bind()
|
||||||
|
result = conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"SELECT 1 FROM pg_indexes WHERE indexname = 'idx_one_active_meeting_per_room'"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if result.fetchone():
|
||||||
|
op.drop_index("idx_one_active_meeting_per_room", table_name="meeting")
|
||||||
|
else:
|
||||||
|
# For SQLite, just try to drop it
|
||||||
|
try:
|
||||||
|
op.drop_index("idx_one_active_meeting_per_room", table_name="meeting")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# Restore the unique constraint
|
||||||
|
op.create_index(
|
||||||
|
"idx_one_active_meeting_per_room",
|
||||||
|
"meeting",
|
||||||
|
["room_id"],
|
||||||
|
unique=True,
|
||||||
|
postgresql_where=sa.text("is_active = true"),
|
||||||
|
sqlite_where=sa.text("is_active = 1"),
|
||||||
|
)
|
||||||
@@ -8,9 +8,8 @@ Create Date: 2025-06-27 09:04:21.006823
|
|||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "62dea3db63a5"
|
revision: str = "62dea3db63a5"
|
||||||
@@ -33,7 +32,7 @@ def upgrade() -> None:
|
|||||||
sa.Column("user_id", sa.String(), nullable=True),
|
sa.Column("user_id", sa.String(), nullable=True),
|
||||||
sa.Column("room_id", sa.String(), nullable=True),
|
sa.Column("room_id", sa.String(), nullable=True),
|
||||||
sa.Column(
|
sa.Column(
|
||||||
"is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False
|
"is_locked", sa.Boolean(), server_default=sa.text("false"), nullable=False
|
||||||
),
|
),
|
||||||
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
|
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
|
||||||
sa.Column(
|
sa.Column(
|
||||||
@@ -54,12 +53,15 @@ def upgrade() -> None:
|
|||||||
sa.Column("user_id", sa.String(), nullable=False),
|
sa.Column("user_id", sa.String(), nullable=False),
|
||||||
sa.Column("created_at", sa.DateTime(), nullable=False),
|
sa.Column("created_at", sa.DateTime(), nullable=False),
|
||||||
sa.Column(
|
sa.Column(
|
||||||
"zulip_auto_post", sa.Boolean(), server_default=sa.text("0"), nullable=False
|
"zulip_auto_post",
|
||||||
|
sa.Boolean(),
|
||||||
|
server_default=sa.text("false"),
|
||||||
|
nullable=False,
|
||||||
),
|
),
|
||||||
sa.Column("zulip_stream", sa.String(), nullable=True),
|
sa.Column("zulip_stream", sa.String(), nullable=True),
|
||||||
sa.Column("zulip_topic", sa.String(), nullable=True),
|
sa.Column("zulip_topic", sa.String(), nullable=True),
|
||||||
sa.Column(
|
sa.Column(
|
||||||
"is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False
|
"is_locked", sa.Boolean(), server_default=sa.text("false"), nullable=False
|
||||||
),
|
),
|
||||||
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
|
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
|
||||||
sa.Column(
|
sa.Column(
|
||||||
|
|||||||
@@ -20,11 +20,14 @@ depends_on: Union[str, Sequence[str], None] = None
|
|||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
sourcekind_enum = sa.Enum("room", "live", "file", name="sourcekind")
|
||||||
|
sourcekind_enum.create(op.get_bind())
|
||||||
|
|
||||||
op.add_column(
|
op.add_column(
|
||||||
"transcript",
|
"transcript",
|
||||||
sa.Column(
|
sa.Column(
|
||||||
"source_kind",
|
"source_kind",
|
||||||
sa.Enum("ROOM", "LIVE", "FILE", name="sourcekind"),
|
sourcekind_enum,
|
||||||
nullable=True,
|
nullable=True,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -43,6 +46,8 @@ def upgrade() -> None:
|
|||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.drop_column("transcript", "source_kind")
|
op.drop_column("transcript", "source_kind")
|
||||||
|
sourcekind_enum = sa.Enum(name="sourcekind")
|
||||||
|
sourcekind_enum.drop(op.get_bind())
|
||||||
|
|
||||||
|
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|||||||
@@ -5,26 +5,28 @@ Revises: 62dea3db63a5
|
|||||||
Create Date: 2024-09-06 14:02:06.649665
|
Create Date: 2024-09-06 14:02:06.649665
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = '764ce6db4388'
|
revision: str = "764ce6db4388"
|
||||||
down_revision: Union[str, None] = '62dea3db63a5'
|
down_revision: Union[str, None] = "62dea3db63a5"
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.add_column('transcript', sa.Column('zulip_message_id', sa.Integer(), nullable=True))
|
op.add_column(
|
||||||
|
"transcript", sa.Column("zulip_message_id", sa.Integer(), nullable=True)
|
||||||
|
)
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.drop_column('transcript', 'zulip_message_id')
|
op.drop_column("transcript", "zulip_message_id")
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|||||||
@@ -0,0 +1,106 @@
|
|||||||
|
"""populate_webvtt_from_topics
|
||||||
|
|
||||||
|
Revision ID: 8120ebc75366
|
||||||
|
Revises: 116b2f287eab
|
||||||
|
Create Date: 2025-08-11 19:11:01.316947
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "8120ebc75366"
|
||||||
|
down_revision: Union[str, None] = "116b2f287eab"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def topics_to_webvtt(topics):
|
||||||
|
"""Convert topics list to WebVTT format string."""
|
||||||
|
if not topics:
|
||||||
|
return None
|
||||||
|
|
||||||
|
lines = ["WEBVTT", ""]
|
||||||
|
|
||||||
|
for topic in topics:
|
||||||
|
start_time = format_timestamp(topic.get("start"))
|
||||||
|
end_time = format_timestamp(topic.get("end"))
|
||||||
|
text = topic.get("text", "").strip()
|
||||||
|
|
||||||
|
if start_time and end_time and text:
|
||||||
|
lines.append(f"{start_time} --> {end_time}")
|
||||||
|
lines.append(text)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def format_timestamp(seconds):
|
||||||
|
"""Format seconds to WebVTT timestamp format (HH:MM:SS.mmm)."""
|
||||||
|
if seconds is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
hours = int(seconds // 3600)
|
||||||
|
minutes = int((seconds % 3600) // 60)
|
||||||
|
secs = seconds % 60
|
||||||
|
|
||||||
|
return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Populate WebVTT field for all transcripts with topics."""
|
||||||
|
|
||||||
|
# Get connection
|
||||||
|
connection = op.get_bind()
|
||||||
|
|
||||||
|
# Query all transcripts with topics
|
||||||
|
result = connection.execute(
|
||||||
|
text("SELECT id, topics FROM transcript WHERE topics IS NOT NULL")
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = result.fetchall()
|
||||||
|
print(f"Found {len(rows)} transcripts with topics")
|
||||||
|
|
||||||
|
updated_count = 0
|
||||||
|
error_count = 0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
transcript_id = row[0]
|
||||||
|
topics_data = row[1]
|
||||||
|
|
||||||
|
if not topics_data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Parse JSON if it's a string
|
||||||
|
if isinstance(topics_data, str):
|
||||||
|
topics_data = json.loads(topics_data)
|
||||||
|
|
||||||
|
# Convert topics to WebVTT format
|
||||||
|
webvtt_content = topics_to_webvtt(topics_data)
|
||||||
|
|
||||||
|
if webvtt_content:
|
||||||
|
# Update the webvtt field
|
||||||
|
connection.execute(
|
||||||
|
text("UPDATE transcript SET webvtt = :webvtt WHERE id = :id"),
|
||||||
|
{"webvtt": webvtt_content, "id": transcript_id},
|
||||||
|
)
|
||||||
|
updated_count += 1
|
||||||
|
print(f"✓ Updated transcript {transcript_id}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_count += 1
|
||||||
|
print(f"✗ Error updating transcript {transcript_id}: {e}")
|
||||||
|
|
||||||
|
print(f"\nMigration complete!")
|
||||||
|
print(f" Updated: {updated_count}")
|
||||||
|
print(f" Errors: {error_count}")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Clear WebVTT field for all transcripts."""
|
||||||
|
op.execute(text("UPDATE transcript SET webvtt = NULL"))
|
||||||
@@ -9,8 +9,6 @@ Create Date: 2025-07-15 19:30:19.876332
|
|||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
from alembic import op
|
||||||
import sqlalchemy as sa
|
|
||||||
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "88d292678ba2"
|
revision: str = "88d292678ba2"
|
||||||
@@ -21,7 +19,7 @@ depends_on: Union[str, Sequence[str], None] = None
|
|||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
# Get database connection
|
# Get database connection
|
||||||
@@ -58,7 +56,9 @@ def upgrade() -> None:
|
|||||||
fixed_events = json.dumps(jevents)
|
fixed_events = json.dumps(jevents)
|
||||||
assert "NaN" not in fixed_events
|
assert "NaN" not in fixed_events
|
||||||
except (json.JSONDecodeError, AssertionError) as e:
|
except (json.JSONDecodeError, AssertionError) as e:
|
||||||
print(f"Warning: Invalid JSON for transcript {transcript_id}, skipping: {e}")
|
print(
|
||||||
|
f"Warning: Invalid JSON for transcript {transcript_id}, skipping: {e}"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Update the record with fixed JSON
|
# Update the record with fixed JSON
|
||||||
|
|||||||
@@ -5,13 +5,13 @@ Revises: 99365b0cd87b
|
|||||||
Create Date: 2023-11-02 18:55:17.019498
|
Create Date: 2023-11-02 18:55:17.019498
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
from sqlalchemy.sql import table, column
|
from alembic import op
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.sql import column, table
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "9920ecfe2735"
|
revision: str = "9920ecfe2735"
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ Create Date: 2023-09-01 20:19:47.216334
|
|||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "99365b0cd87b"
|
revision: str = "99365b0cd87b"
|
||||||
@@ -22,7 +22,7 @@ def upgrade() -> None:
|
|||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.execute(
|
op.execute(
|
||||||
"UPDATE transcript SET events = "
|
"UPDATE transcript SET events = "
|
||||||
'REPLACE(events, \'"event": "SUMMARY"\', \'"event": "LONG_SUMMARY"\');'
|
'REPLACE(events::text, \'"event": "SUMMARY"\', \'"event": "LONG_SUMMARY"\')::json;'
|
||||||
)
|
)
|
||||||
op.alter_column("transcript", "summary", new_column_name="long_summary")
|
op.alter_column("transcript", "summary", new_column_name="long_summary")
|
||||||
op.add_column("transcript", sa.Column("title", sa.String(), nullable=True))
|
op.add_column("transcript", sa.Column("title", sa.String(), nullable=True))
|
||||||
@@ -34,7 +34,7 @@ def downgrade() -> None:
|
|||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.execute(
|
op.execute(
|
||||||
"UPDATE transcript SET events = "
|
"UPDATE transcript SET events = "
|
||||||
'REPLACE(events, \'"event": "LONG_SUMMARY"\', \'"event": "SUMMARY"\');'
|
'REPLACE(events::text, \'"event": "LONG_SUMMARY"\', \'"event": "SUMMARY"\')::json;'
|
||||||
)
|
)
|
||||||
with op.batch_alter_table("transcript", schema=None) as batch_op:
|
with op.batch_alter_table("transcript", schema=None) as batch_op:
|
||||||
batch_op.alter_column("long_summary", nullable=True, new_column_name="summary")
|
batch_op.alter_column("long_summary", nullable=True, new_column_name="summary")
|
||||||
|
|||||||
121
server/migrations/versions/9f5c78d352d6_datetime_timezone.py
Normal file
121
server/migrations/versions/9f5c78d352d6_datetime_timezone.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
"""datetime timezone
|
||||||
|
|
||||||
|
Revision ID: 9f5c78d352d6
|
||||||
|
Revises: 8120ebc75366
|
||||||
|
Create Date: 2025-08-13 19:18:27.113593
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "9f5c78d352d6"
|
||||||
|
down_revision: Union[str, None] = "8120ebc75366"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table("meeting", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"start_date",
|
||||||
|
existing_type=postgresql.TIMESTAMP(),
|
||||||
|
type_=sa.DateTime(timezone=True),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
batch_op.alter_column(
|
||||||
|
"end_date",
|
||||||
|
existing_type=postgresql.TIMESTAMP(),
|
||||||
|
type_=sa.DateTime(timezone=True),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"consent_timestamp",
|
||||||
|
existing_type=postgresql.TIMESTAMP(),
|
||||||
|
type_=sa.DateTime(timezone=True),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("recording", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"recorded_at",
|
||||||
|
existing_type=postgresql.TIMESTAMP(),
|
||||||
|
type_=sa.DateTime(timezone=True),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("room", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"created_at",
|
||||||
|
existing_type=postgresql.TIMESTAMP(),
|
||||||
|
type_=sa.DateTime(timezone=True),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("transcript", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"created_at",
|
||||||
|
existing_type=postgresql.TIMESTAMP(),
|
||||||
|
type_=sa.DateTime(timezone=True),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table("transcript", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"created_at",
|
||||||
|
existing_type=sa.DateTime(timezone=True),
|
||||||
|
type_=postgresql.TIMESTAMP(),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("room", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"created_at",
|
||||||
|
existing_type=sa.DateTime(timezone=True),
|
||||||
|
type_=postgresql.TIMESTAMP(),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("recording", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"recorded_at",
|
||||||
|
existing_type=sa.DateTime(timezone=True),
|
||||||
|
type_=postgresql.TIMESTAMP(),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("meeting_consent", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"consent_timestamp",
|
||||||
|
existing_type=sa.DateTime(timezone=True),
|
||||||
|
type_=postgresql.TIMESTAMP(),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
with op.batch_alter_table("meeting", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"end_date",
|
||||||
|
existing_type=sa.DateTime(timezone=True),
|
||||||
|
type_=postgresql.TIMESTAMP(),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
batch_op.alter_column(
|
||||||
|
"start_date",
|
||||||
|
existing_type=sa.DateTime(timezone=True),
|
||||||
|
type_=postgresql.TIMESTAMP(),
|
||||||
|
existing_nullable=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -25,7 +25,7 @@ def upgrade() -> None:
|
|||||||
sa.Column(
|
sa.Column(
|
||||||
"is_shared",
|
"is_shared",
|
||||||
sa.Boolean(),
|
sa.Boolean(),
|
||||||
server_default=sa.text("0"),
|
server_default=sa.text("false"),
|
||||||
nullable=False,
|
nullable=False,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,8 +9,6 @@ Create Date: 2025-07-15 20:09:40.253018
|
|||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
from alembic import op
|
||||||
import sqlalchemy as sa
|
|
||||||
from sqlalchemy.dialects import postgresql
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "a9c9c229ee36"
|
revision: str = "a9c9c229ee36"
|
||||||
|
|||||||
@@ -5,30 +5,37 @@ Revises: 6ea59639f30e
|
|||||||
Create Date: 2025-01-28 10:06:50.446233
|
Create Date: 2025-01-28 10:06:50.446233
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = 'b0e5f7876032'
|
revision: str = "b0e5f7876032"
|
||||||
down_revision: Union[str, None] = '6ea59639f30e'
|
down_revision: Union[str, None] = "6ea59639f30e"
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
with op.batch_alter_table('meeting', schema=None) as batch_op:
|
with op.batch_alter_table("meeting", schema=None) as batch_op:
|
||||||
batch_op.add_column(sa.Column('is_active', sa.Boolean(), server_default=sa.text('1'), nullable=False))
|
batch_op.add_column(
|
||||||
|
sa.Column(
|
||||||
|
"is_active",
|
||||||
|
sa.Boolean(),
|
||||||
|
server_default=sa.text("true"),
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
with op.batch_alter_table('meeting', schema=None) as batch_op:
|
with op.batch_alter_table("meeting", schema=None) as batch_op:
|
||||||
batch_op.drop_column('is_active')
|
batch_op.drop_column("is_active")
|
||||||
|
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|||||||
@@ -8,9 +8,8 @@ Create Date: 2025-06-27 08:57:16.306940
|
|||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "b3df9681cae9"
|
revision: str = "b3df9681cae9"
|
||||||
|
|||||||
@@ -8,9 +8,8 @@ Create Date: 2024-10-11 13:45:28.914902
|
|||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "b469348df210"
|
revision: str = "b469348df210"
|
||||||
|
|||||||
@@ -0,0 +1,35 @@
|
|||||||
|
"""add_unique_constraint_one_active_meeting_per_room
|
||||||
|
|
||||||
|
Revision ID: b7df9609542c
|
||||||
|
Revises: d7fbb74b673b
|
||||||
|
Create Date: 2025-07-25 16:27:06.959868
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "b7df9609542c"
|
||||||
|
down_revision: Union[str, None] = "d7fbb74b673b"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# Create a partial unique index that ensures only one active meeting per room
|
||||||
|
# This works for both PostgreSQL and SQLite
|
||||||
|
op.create_index(
|
||||||
|
"idx_one_active_meeting_per_room",
|
||||||
|
"meeting",
|
||||||
|
["room_id"],
|
||||||
|
unique=True,
|
||||||
|
postgresql_where=sa.text("is_active = true"),
|
||||||
|
sqlite_where=sa.text("is_active = 1"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("idx_one_active_meeting_per_room", table_name="meeting")
|
||||||
@@ -5,25 +5,31 @@ Revises: 125031f7cb78
|
|||||||
Create Date: 2023-12-13 15:37:51.303970
|
Create Date: 2023-12-13 15:37:51.303970
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = 'b9348748bbbc'
|
revision: str = "b9348748bbbc"
|
||||||
down_revision: Union[str, None] = '125031f7cb78'
|
down_revision: Union[str, None] = "125031f7cb78"
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.add_column('transcript', sa.Column('reviewed', sa.Boolean(), server_default=sa.text('0'), nullable=False))
|
op.add_column(
|
||||||
|
"transcript",
|
||||||
|
sa.Column(
|
||||||
|
"reviewed", sa.Boolean(), server_default=sa.text("false"), nullable=False
|
||||||
|
),
|
||||||
|
)
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
op.drop_column('transcript', 'reviewed')
|
op.drop_column("transcript", "reviewed")
|
||||||
# ### end Alembic commands ###
|
# ### end Alembic commands ###
|
||||||
|
|||||||
@@ -9,8 +9,6 @@ Create Date: 2025-07-15 11:48:42.854741
|
|||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
from alembic import op
|
||||||
import sqlalchemy as sa
|
|
||||||
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "ccd68dc784ff"
|
revision: str = "ccd68dc784ff"
|
||||||
|
|||||||
@@ -8,9 +8,8 @@ Create Date: 2025-06-27 09:27:25.302152
|
|||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "d3ff3a39297f"
|
revision: str = "d3ff3a39297f"
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
"""add_grace_period_fields_to_meeting
|
||||||
|
|
||||||
|
Revision ID: d4a1c446458c
|
||||||
|
Revises: 6025e9b2bef2
|
||||||
|
Create Date: 2025-08-18 18:50:37.768052
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "d4a1c446458c"
|
||||||
|
down_revision: Union[str, None] = "6025e9b2bef2"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# Add fields to track when participants left for grace period logic
|
||||||
|
op.add_column(
|
||||||
|
"meeting", sa.Column("last_participant_left_at", sa.DateTime(timezone=True))
|
||||||
|
)
|
||||||
|
op.add_column(
|
||||||
|
"meeting",
|
||||||
|
sa.Column("grace_period_minutes", sa.Integer, server_default=sa.text("15")),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("meeting", "grace_period_minutes")
|
||||||
|
op.drop_column("meeting", "last_participant_left_at")
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
"""Add room_id to transcript
|
||||||
|
|
||||||
|
Revision ID: d7fbb74b673b
|
||||||
|
Revises: a9c9c229ee36
|
||||||
|
Create Date: 2025-07-17 12:00:00.000000
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "d7fbb74b673b"
|
||||||
|
down_revision: Union[str, None] = "a9c9c229ee36"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# Add room_id column to transcript table
|
||||||
|
op.add_column("transcript", sa.Column("room_id", sa.String(), nullable=True))
|
||||||
|
|
||||||
|
# Add index for room_id for better query performance
|
||||||
|
op.create_index("idx_transcript_room_id", "transcript", ["room_id"])
|
||||||
|
|
||||||
|
# Populate room_id for existing ROOM-type transcripts
|
||||||
|
# This joins through recording -> meeting -> room to get the room_id
|
||||||
|
op.execute("""
|
||||||
|
UPDATE transcript AS t
|
||||||
|
SET room_id = r.id
|
||||||
|
FROM recording rec
|
||||||
|
JOIN meeting m ON rec.meeting_id = m.id
|
||||||
|
JOIN room r ON m.room_id = r.id
|
||||||
|
WHERE t.recording_id = rec.id
|
||||||
|
AND t.source_kind = 'room'
|
||||||
|
AND t.room_id IS NULL
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Fix missing meeting_id for ROOM-type transcripts
|
||||||
|
# The meeting_id field exists but was never populated
|
||||||
|
op.execute("""
|
||||||
|
UPDATE transcript AS t
|
||||||
|
SET meeting_id = rec.meeting_id
|
||||||
|
FROM recording rec
|
||||||
|
WHERE t.recording_id = rec.id
|
||||||
|
AND t.source_kind = 'room'
|
||||||
|
AND t.meeting_id IS NULL
|
||||||
|
AND rec.meeting_id IS NOT NULL
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# Drop the index first
|
||||||
|
op.drop_index("idx_transcript_room_id", "transcript")
|
||||||
|
|
||||||
|
# Drop the room_id column
|
||||||
|
op.drop_column("transcript", "room_id")
|
||||||
@@ -5,11 +5,11 @@ Revises: 4814901632bc
|
|||||||
Create Date: 2023-11-16 10:29:09.351664
|
Create Date: 2023-11-16 10:29:09.351664
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "f819277e5169"
|
revision: str = "f819277e5169"
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
4607
server/poetry.lock
generated
4607
server/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,84 +1,111 @@
|
|||||||
[tool.poetry]
|
[project]
|
||||||
name = "reflector-server"
|
name = "reflector"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Monadical team <ops@monadical.com>"]
|
authors = [{ name = "Monadical team", email = "ops@monadical.com" }]
|
||||||
|
requires-python = ">=3.11, <3.13"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
packages = []
|
dependencies = [
|
||||||
|
"aiohttp>=3.9.0",
|
||||||
|
"aiohttp-cors>=0.7.0",
|
||||||
|
"av>=10.0.0",
|
||||||
|
"requests>=2.31.0",
|
||||||
|
"aiortc>=1.5.0",
|
||||||
|
"sortedcontainers>=2.4.0",
|
||||||
|
"loguru>=0.7.0",
|
||||||
|
"pydantic-settings>=2.0.2",
|
||||||
|
"structlog>=23.1.0",
|
||||||
|
"uvicorn[standard]>=0.23.1",
|
||||||
|
"fastapi[standard]>=0.100.1",
|
||||||
|
"sentry-sdk[fastapi]>=1.29.2",
|
||||||
|
"httpx>=0.24.1",
|
||||||
|
"fastapi-pagination>=0.12.6",
|
||||||
|
"databases[aiosqlite, asyncpg]>=0.7.0",
|
||||||
|
"sqlalchemy<1.5",
|
||||||
|
"alembic>=1.11.3",
|
||||||
|
"nltk>=3.8.1",
|
||||||
|
"prometheus-fastapi-instrumentator>=6.1.0",
|
||||||
|
"sentencepiece>=0.1.99",
|
||||||
|
"protobuf>=4.24.3",
|
||||||
|
"profanityfilter>=2.0.6",
|
||||||
|
"celery>=5.3.4",
|
||||||
|
"redis>=5.0.1",
|
||||||
|
"python-jose[cryptography]>=3.3.0",
|
||||||
|
"python-multipart>=0.0.6",
|
||||||
|
"faster-whisper>=0.10.0",
|
||||||
|
"transformers>=4.36.2",
|
||||||
|
"jsonschema>=4.23.0",
|
||||||
|
"openai>=1.59.7",
|
||||||
|
"psycopg2-binary>=2.9.10",
|
||||||
|
"llama-index>=0.12.52",
|
||||||
|
"llama-index-llms-openai-like>=0.4.0",
|
||||||
|
"pytest-env>=1.1.5",
|
||||||
|
"webvtt-py>=0.5.0",
|
||||||
|
"icalendar>=6.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[dependency-groups]
|
||||||
python = "^3.11"
|
dev = [
|
||||||
aiohttp = "^3.9.0"
|
"black>=24.1.1",
|
||||||
aiohttp-cors = "^0.7.0"
|
"stamina>=23.1.0",
|
||||||
av = "^10.0.0"
|
"pyinstrument>=4.6.1",
|
||||||
requests = "^2.31.0"
|
]
|
||||||
aiortc = "^1.5.0"
|
tests = [
|
||||||
sortedcontainers = "^2.4.0"
|
"pytest-cov>=4.1.0",
|
||||||
loguru = "^0.7.0"
|
"pytest-aiohttp>=1.0.4",
|
||||||
pydantic-settings = "^2.0.2"
|
"pytest-asyncio>=0.21.1",
|
||||||
structlog = "^23.1.0"
|
"pytest>=7.4.0",
|
||||||
uvicorn = {extras = ["standard"], version = "^0.23.1"}
|
"httpx-ws>=0.4.1",
|
||||||
fastapi = "^0.100.1"
|
"pytest-httpx>=0.23.1",
|
||||||
sentry-sdk = {extras = ["fastapi"], version = "^1.29.2"}
|
"pytest-celery>=0.0.0",
|
||||||
httpx = "^0.24.1"
|
"pytest-docker>=3.2.3",
|
||||||
fastapi-pagination = "^0.12.6"
|
"asgi-lifespan>=2.1.0",
|
||||||
databases = {extras = ["aiosqlite", "asyncpg"], version = "^0.7.0"}
|
]
|
||||||
sqlalchemy = "<1.5"
|
aws = ["aioboto3>=11.2.0"]
|
||||||
fief-client = {extras = ["fastapi"], version = "^0.17.0"}
|
evaluation = [
|
||||||
alembic = "^1.11.3"
|
"jiwer>=3.0.2",
|
||||||
nltk = "^3.8.1"
|
"levenshtein>=0.21.1",
|
||||||
prometheus-fastapi-instrumentator = "^6.1.0"
|
"tqdm>=4.66.0",
|
||||||
sentencepiece = "^0.1.99"
|
"pydantic>=2.1.1",
|
||||||
protobuf = "^4.24.3"
|
]
|
||||||
profanityfilter = "^2.0.6"
|
|
||||||
celery = "^5.3.4"
|
|
||||||
redis = "^5.0.1"
|
|
||||||
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
|
|
||||||
python-multipart = "^0.0.6"
|
|
||||||
faster-whisper = "^0.10.0"
|
|
||||||
transformers = "^4.36.2"
|
|
||||||
black = "24.1.1"
|
|
||||||
jsonschema = "^4.23.0"
|
|
||||||
openai = "^1.59.7"
|
|
||||||
|
|
||||||
|
[tool.uv]
|
||||||
[tool.poetry.group.dev.dependencies]
|
default-groups = [
|
||||||
black = "^24.1.1"
|
"dev",
|
||||||
stamina = "^23.1.0"
|
"tests",
|
||||||
pyinstrument = "^4.6.1"
|
"aws",
|
||||||
|
"evaluation",
|
||||||
|
]
|
||||||
[tool.poetry.group.tests.dependencies]
|
|
||||||
pytest-cov = "^4.1.0"
|
|
||||||
pytest-aiohttp = "^1.0.4"
|
|
||||||
pytest-asyncio = "^0.21.1"
|
|
||||||
pytest = "^7.4.0"
|
|
||||||
httpx-ws = "^0.4.1"
|
|
||||||
pytest-httpx = "^0.23.1"
|
|
||||||
pytest-celery = "^0.0.0"
|
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.aws.dependencies]
|
|
||||||
aioboto3 = "^11.2.0"
|
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.evaluation.dependencies]
|
|
||||||
jiwer = "^3.0.2"
|
|
||||||
levenshtein = "^0.21.1"
|
|
||||||
tqdm = "^4.66.0"
|
|
||||||
pydantic = "^2.1.1"
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["hatchling"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
packages = ["reflector"]
|
||||||
|
|
||||||
[tool.coverage.run]
|
[tool.coverage.run]
|
||||||
source = ["reflector"]
|
source = ["reflector"]
|
||||||
|
|
||||||
|
[tool.pytest_env]
|
||||||
|
ENVIRONMENT = "pytest"
|
||||||
|
DATABASE_URL = "postgresql://test_user:test_password@localhost:15432/reflector_test"
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
|
addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
|
||||||
testpaths = ["tests"]
|
testpaths = ["tests"]
|
||||||
asyncio_mode = "auto"
|
asyncio_mode = "auto"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = [
|
||||||
|
"I", # isort - import sorting
|
||||||
|
"F401", # unused imports
|
||||||
|
"PLC0415", # import-outside-top-level - detect inline imports
|
||||||
|
]
|
||||||
|
|
||||||
[tool.ruff.lint.per-file-ignores]
|
[tool.ruff.lint.per-file-ignores]
|
||||||
"reflector/processors/summary/summary_builder.py" = ["E501"]
|
"reflector/processors/summary/summary_builder.py" = ["E501"]
|
||||||
|
"gpu/**.py" = ["PLC0415"]
|
||||||
|
"reflector/tools/**.py" = ["PLC0415"]
|
||||||
|
"migrations/versions/**.py" = ["PLC0415"]
|
||||||
|
"tests/**.py" = ["PLC0415"]
|
||||||
|
|||||||
@@ -1,34 +0,0 @@
|
|||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
# Get the input file name from the command line argument
|
|
||||||
input_file = sys.argv[1]
|
|
||||||
# example use: python 0-reflector-local.py input.m4a agenda.txt
|
|
||||||
|
|
||||||
# Get the agenda file name from the command line argument if provided
|
|
||||||
if len(sys.argv) > 2:
|
|
||||||
agenda_file = sys.argv[2]
|
|
||||||
else:
|
|
||||||
agenda_file = "agenda.txt"
|
|
||||||
# example use: python 0-reflector-local.py input.m4a my_agenda.txt
|
|
||||||
|
|
||||||
# Check if the agenda file exists
|
|
||||||
if not os.path.exists(agenda_file):
|
|
||||||
logger.error("agenda_file is missing")
|
|
||||||
|
|
||||||
# Check if the input file is .m4a, if so convert to .mp4
|
|
||||||
if input_file.endswith(".m4a"):
|
|
||||||
subprocess.run(["ffmpeg", "-i", input_file, f"{input_file}.mp4"])
|
|
||||||
input_file = f"{input_file}.mp4"
|
|
||||||
|
|
||||||
# Run the first script to generate the transcript
|
|
||||||
subprocess.run(["python3", "1-transcript-generator.py", input_file, f"{input_file}_transcript.txt"])
|
|
||||||
|
|
||||||
# Run the second script to compare the transcript to the agenda
|
|
||||||
subprocess.run(["python3", "2-agenda-transcript-diff.py", agenda_file, f"{input_file}_transcript.txt"])
|
|
||||||
|
|
||||||
# Run the third script to summarize the transcript
|
|
||||||
subprocess.run(["python3", "3-transcript-summarizer.py", f"{input_file}_transcript.txt", f"{input_file}_summary.txt"])
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import os
|
|
||||||
|
|
||||||
import moviepy.editor
|
|
||||||
import whisper
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
WHISPER_MODEL_SIZE = "base"
|
|
||||||
|
|
||||||
|
|
||||||
def init_argparse() -> argparse.ArgumentParser:
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
usage="%(prog)s <LOCATION> <OUTPUT>",
|
|
||||||
description="Creates a transcript of a video or audio file using the OpenAI Whisper model"
|
|
||||||
)
|
|
||||||
parser.add_argument("location", help="Location of the media file")
|
|
||||||
parser.add_argument("output", help="Output file path")
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
import sys
|
|
||||||
sys.setrecursionlimit(10000)
|
|
||||||
|
|
||||||
parser = init_argparse()
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
media_file = args.location
|
|
||||||
logger.info(f"Processing file: {media_file}")
|
|
||||||
|
|
||||||
# Check if the media file is a valid audio or video file
|
|
||||||
if os.path.isfile(media_file) and not media_file.endswith(
|
|
||||||
('.mp3', '.wav', '.ogg', '.flac', '.mp4', '.avi', '.flv')):
|
|
||||||
logger.error(f"Invalid file format: {media_file}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# If the media file we just retrieved is an audio file then skip extraction step
|
|
||||||
audio_filename = media_file
|
|
||||||
logger.info(f"Found audio-only file, skipping audio extraction")
|
|
||||||
|
|
||||||
audio = moviepy.editor.AudioFileClip(audio_filename)
|
|
||||||
|
|
||||||
logger.info("Selected extracted audio")
|
|
||||||
|
|
||||||
# Transcribe the audio file using the OpenAI Whisper model
|
|
||||||
logger.info("Loading Whisper speech-to-text model")
|
|
||||||
whisper_model = whisper.load_model(WHISPER_MODEL_SIZE)
|
|
||||||
|
|
||||||
logger.info(f"Transcribing file: {media_file}")
|
|
||||||
whisper_result = whisper_model.transcribe(media_file)
|
|
||||||
|
|
||||||
logger.info("Finished transcribing file")
|
|
||||||
|
|
||||||
# Save the transcript to the specified file.
|
|
||||||
logger.info(f"Saving transcript to: {args.output}")
|
|
||||||
transcript_file = open(args.output, "w")
|
|
||||||
transcript_file.write(whisper_result["text"])
|
|
||||||
transcript_file.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
import argparse
|
|
||||||
|
|
||||||
import spacy
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
|
|
||||||
# Define the paths for agenda and transcription files
|
|
||||||
def init_argparse() -> argparse.ArgumentParser:
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
usage="%(prog)s <AGENDA> <TRANSCRIPTION>",
|
|
||||||
description="Compares the transcript of a video or audio file to an agenda using the SpaCy model"
|
|
||||||
)
|
|
||||||
parser.add_argument("agenda", help="Location of the agenda file")
|
|
||||||
parser.add_argument("transcription", help="Location of the transcription file")
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
args = init_argparse().parse_args()
|
|
||||||
agenda_path = args.agenda
|
|
||||||
transcription_path = args.transcription
|
|
||||||
|
|
||||||
# Load the spaCy model and add the sentencizer
|
|
||||||
spaCy_model = "en_core_web_md"
|
|
||||||
nlp = spacy.load(spaCy_model)
|
|
||||||
nlp.add_pipe('sentencizer')
|
|
||||||
logger.info("Loaded spaCy model " + spaCy_model)
|
|
||||||
|
|
||||||
# Load the agenda
|
|
||||||
with open(agenda_path, "r") as f:
|
|
||||||
agenda = [line.strip() for line in f.readlines() if line.strip()]
|
|
||||||
logger.info("Loaded agenda items")
|
|
||||||
|
|
||||||
# Load the transcription
|
|
||||||
with open(transcription_path, "r") as f:
|
|
||||||
transcription = f.read()
|
|
||||||
logger.info("Loaded transcription")
|
|
||||||
|
|
||||||
# Tokenize the transcription using spaCy
|
|
||||||
doc_transcription = nlp(transcription)
|
|
||||||
logger.info("Tokenized transcription")
|
|
||||||
|
|
||||||
# Find the items covered in the transcription
|
|
||||||
covered_items = {}
|
|
||||||
for item in agenda:
|
|
||||||
item_doc = nlp(item)
|
|
||||||
for sent in doc_transcription.sents:
|
|
||||||
if not sent or not all(token.has_vector for token in sent):
|
|
||||||
# Skip an empty span or one without any word vectors
|
|
||||||
continue
|
|
||||||
similarity = sent.similarity(item_doc)
|
|
||||||
similarity_threshold = 0.7
|
|
||||||
if similarity > similarity_threshold: # Set the threshold to determine what is considered a match
|
|
||||||
covered_items[item] = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# Count the number of items covered and calculatre the percentage
|
|
||||||
num_covered_items = sum(covered_items.values())
|
|
||||||
percentage_covered = num_covered_items / len(agenda) * 100
|
|
||||||
|
|
||||||
# Print the results
|
|
||||||
print("💬 Agenda items covered in the transcription:")
|
|
||||||
for item in agenda:
|
|
||||||
if item in covered_items and covered_items[item]:
|
|
||||||
print("✅ ", item)
|
|
||||||
else:
|
|
||||||
print("❌ ", item)
|
|
||||||
print("📊 Coverage: {:.2f}%".format(percentage_covered))
|
|
||||||
logger.info("Finished comparing agenda to transcription with similarity threshold of " + str(similarity_threshold))
|
|
||||||
@@ -1,94 +0,0 @@
|
|||||||
import argparse
|
|
||||||
|
|
||||||
import nltk
|
|
||||||
|
|
||||||
nltk.download('stopwords')
|
|
||||||
from nltk.corpus import stopwords
|
|
||||||
from nltk.tokenize import word_tokenize, sent_tokenize
|
|
||||||
from heapq import nlargest
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
|
|
||||||
# Function to initialize the argument parser
|
|
||||||
def init_argparse():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
usage="%(prog)s <TRANSCRIPT> <SUMMARY>",
|
|
||||||
description="Summarization"
|
|
||||||
)
|
|
||||||
parser.add_argument("transcript", type=str, default="transcript.txt", help="Path to the input transcript file")
|
|
||||||
parser.add_argument("summary", type=str, default="summary.txt", help="Path to the output summary file")
|
|
||||||
parser.add_argument("--num_sentences", type=int, default=5, help="Number of sentences to include in the summary")
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
# Function to read the input transcript file
|
|
||||||
def read_transcript(file_path):
|
|
||||||
with open(file_path, "r") as file:
|
|
||||||
transcript = file.read()
|
|
||||||
return transcript
|
|
||||||
|
|
||||||
|
|
||||||
# Function to preprocess the text by removing stop words and special characters
|
|
||||||
def preprocess_text(text):
|
|
||||||
stop_words = set(stopwords.words('english'))
|
|
||||||
words = word_tokenize(text)
|
|
||||||
words = [w.lower() for w in words if w.isalpha() and w.lower() not in stop_words]
|
|
||||||
return words
|
|
||||||
|
|
||||||
|
|
||||||
# Function to score each sentence based on the frequency of its words and return the top sentences
|
|
||||||
def summarize_text(text, num_sentences):
|
|
||||||
# Tokenize the text into sentences
|
|
||||||
sentences = sent_tokenize(text)
|
|
||||||
|
|
||||||
# Preprocess the text by removing stop words and special characters
|
|
||||||
words = preprocess_text(text)
|
|
||||||
|
|
||||||
# Calculate the frequency of each word in the text
|
|
||||||
word_freq = nltk.FreqDist(words)
|
|
||||||
|
|
||||||
# Calculate the score for each sentence based on the frequency of its words
|
|
||||||
sentence_scores = {}
|
|
||||||
for i, sentence in enumerate(sentences):
|
|
||||||
sentence_words = preprocess_text(sentence)
|
|
||||||
for word in sentence_words:
|
|
||||||
if word in word_freq:
|
|
||||||
if i not in sentence_scores:
|
|
||||||
sentence_scores[i] = word_freq[word]
|
|
||||||
else:
|
|
||||||
sentence_scores[i] += word_freq[word]
|
|
||||||
|
|
||||||
# Select the top sentences based on their scores
|
|
||||||
top_sentences = nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
|
|
||||||
|
|
||||||
# Sort the top sentences in the order they appeared in the original text
|
|
||||||
summary_sent = sorted(top_sentences)
|
|
||||||
summary = [sentences[i] for i in summary_sent]
|
|
||||||
|
|
||||||
return " ".join(summary)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# Initialize the argument parser and parse the arguments
|
|
||||||
parser = init_argparse()
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Read the input transcript file
|
|
||||||
logger.info(f"Reading transcript from: {args.transcript}")
|
|
||||||
transcript = read_transcript(args.transcript)
|
|
||||||
|
|
||||||
# Summarize the transcript using the nltk library
|
|
||||||
logger.info("Summarizing transcript")
|
|
||||||
summary = summarize_text(transcript, args.num_sentences)
|
|
||||||
|
|
||||||
# Write the summary to the output file
|
|
||||||
logger.info(f"Writing summary to: {args.summary}")
|
|
||||||
with open(args.summary, "w") as f:
|
|
||||||
f.write("Summary of: " + args.transcript + "\n\n")
|
|
||||||
f.write(summary)
|
|
||||||
|
|
||||||
logger.info("Summarization completed")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
# Deloitte HR @ NYS Cybersecurity Conference
|
|
||||||
- ways to retain and grow your workforce
|
|
||||||
- how to enable cybersecurity professionals to do their best work
|
|
||||||
- low-budget activities that can be implemented starting tomorrow
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,3 +0,0 @@
|
|||||||
Summary of: 30min-CyberHR/30min-CyberHR.m4a.mp4_transcript.txt
|
|
||||||
|
|
||||||
Since the workforce is an organization's most valuable asset, investing in workforce experience activities, we've found has lead to more productive work, more efficient work, more innovative approaches to the work, and more engaged teams which ultimately results in better mission outcomes for your organization. And this one really focuses on not just pulsing a workforce once a year through an annual HR survey of, how do you really feel like, you know, what leadership considerations should we implement or, you know, how can we enhance the performance management process. We've just found that, you know, by investing in this and putting the workforce as, you know, the center part of what you invest in as an organization and leaders, it's not only about retention, talent, you know, the cyber workforce crisis, but people want to do work well and they're able to get more done and achieve more without you, you know, directly supervising and micromanaging or looking at everything because, you know, you know, you know, you're not going to be able to do anything. I hope there was a little bit of, you know, the landscape of the cyber workforce with some practical tips that you can take away for how to just think about, you know, improving the overall workforce experience and investing in your employees. So with this, you know, we know that all of you are in the trenches every day, you're facing this, you're living this, and we are just interested to hear from all of you, you know, just to start, like, what's one thing that has worked well in your organization in terms of enhancing or investing in the workforce experience?
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,47 +0,0 @@
|
|||||||
AGENDA: Most important things to look for in a start up
|
|
||||||
|
|
||||||
TAM: Make sure the market is sufficiently large than once they win they can get rewarded
|
|
||||||
- Medium sized markets that should be winner take all can work
|
|
||||||
- TAM needs to be realistic of direct market size
|
|
||||||
|
|
||||||
Product market fit: Being in a good market with a product than can satisfy that market
|
|
||||||
- Solves a problem
|
|
||||||
- Builds a solution a customer wants to buy
|
|
||||||
- Either saves the customer something (time/money/pain) or gives them something (revenue/enjoyment)
|
|
||||||
|
|
||||||
Unit economics: Profit for delivering all-in cost must be attractive (% or $ amount)
|
|
||||||
- Revenue minus direct costs
|
|
||||||
- Raw input costs (materials, variable labour), direct cost of delivering and servicing the sale
|
|
||||||
- Attractive as a % of sales so it can contribute to fixed overhead
|
|
||||||
- Look for high incremental contribution margin
|
|
||||||
|
|
||||||
LTV CAC: Life-time value (revenue contribution) vs cost to acquire customer must be healthy
|
|
||||||
- LTV = Purchase value x number of purchases x customer lifespan
|
|
||||||
- CAC = All-in costs of sales + marketing over number of new customer additions
|
|
||||||
- Strong reputation leads to referrals leads to lower CAC. Want customers evangelizing product/service
|
|
||||||
- Rule of thumb higher than 3
|
|
||||||
|
|
||||||
Churn: Fits into LTV, low churn leads to higher LTV and helps keep future CAC down
|
|
||||||
- Selling to replenish revenue every year is hard
|
|
||||||
- Can run through entire customer base over time
|
|
||||||
- Low churn builds strong net dollar retention
|
|
||||||
|
|
||||||
Business: Must have sufficient barriers to entry to ward off copy-cats once established
|
|
||||||
- High switching costs (lock-in)
|
|
||||||
- Addictive
|
|
||||||
- Steep learning curve once adopted (form of switching cost)
|
|
||||||
- Two sided liquidity
|
|
||||||
- Patents, IP, Branding
|
|
||||||
- No hyper-scaler who can roll over you quickly
|
|
||||||
- Scale could be a barrier to entry but works against most start-ups, not for them
|
|
||||||
- Once developed, answer question: Could a well funded competitor starting up today easily duplicate this business or is it cheaper to buy the start up?
|
|
||||||
|
|
||||||
Founders: Must be religious about their product. Believe they will change the world against all odds.
|
|
||||||
- Just money in the bank is not enough to build a successful company. Just good tech not enough
|
|
||||||
to build a successful company
|
|
||||||
- Founders must be motivated to build something, not (all) about money. They would be doing
|
|
||||||
this for free because they believe in it. Not looking for quick score
|
|
||||||
- Founders must be persuasive. They will be asking others to sacrifice to make their dream come
|
|
||||||
to life. They will need to convince investors this company can work and deserves funding.
|
|
||||||
- Must understand who the customer is and what problem they are helping to solve.
|
|
||||||
- Founders aren’t expected to know all the preceding points in this document but have an understanding of most of this, and be able to offer a vision.
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
AGENDA: Most important things to look for in a start up
|
|
||||||
TAM: Make sure the market is sufficiently large than once they win they can get rewarded
|
|
||||||
Product market fit: Being in a good market with a product than can satisfy that market
|
|
||||||
Unit economics: Profit for delivering all-in cost must be attractive (% or $ amount)
|
|
||||||
LTV CAC: Life-time value (revenue contribution) vs cost to acquire customer must be healthy
|
|
||||||
Churn: Fits into LTV, low churn leads to higher LTV and helps keep future CAC down
|
|
||||||
Business: Must have sufficient barriers to entry to ward off copy-cats once established
|
|
||||||
Founders: Must be religious about their product. Believe they will change the world against all odds.
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
Summary of: recordings/42min-StartupsTechTalk.mp4
|
|
||||||
|
|
||||||
The speaker discusses their plan to launch an investment company, which will sit on a pool of cash raised from various partners and investors. They will take equity stakes in startups that they believe have the potential to scale and become successful. The speaker emphasizes the importance of investing in companies that have a large total addressable market (TAM) and good product-market fit. They also discuss the concept of unit economics and how it is important to ensure that the profit from selling a product or service outweighs the cost of producing it. The speaker encourages their team to keep an eye out for interesting startups and to send them their way if they come across any.
|
|
||||||
|
|
||||||
The conversation is about the importance of unit economics, incremental margin, lifetime value, customer acquisition costs, churn, and barriers to entry in evaluating businesses for investment. The speaker explains that companies with good unit economics and high incremental contribution margins are ideal for investment. Lifetime value measures how much a customer will spend on a business over their entire existence, while customer acquisition costs measure the cost of acquiring a new customer. Churn refers to the rate at which customers leave a business, and businesses with low churn tend to have high lifetime values. High barriers to entry, such as high switching costs, can make it difficult for competitors to enter the market and kill established businesses.
|
|
||||||
|
|
||||||
The speaker discusses various factors that can contribute to a company's success and create a competitive advantage. These include making the product addictive, having steep learning curves, creating two-sided liquidity for marketplaces, having patents or intellectual property, strong branding, and scale as a barrier to entry. The speaker also emphasizes the importance of founders having a plan to differentiate themselves from competitors and avoid being rolled over by larger companies. Additionally, the speaker mentions MasterCard and Visa as examples of companies that invented their markets, while Apple was able to build a strong brand despite starting with no developers or users.
|
|
||||||
|
|
||||||
The speaker discusses the importance of founders in building successful companies, emphasizing that they must be passionate and believe in their product. They should also be charismatic and able to persuade others to work towards their vision. The speaker cites examples of successful CEOs such as Zuckerberg, Steve Jobs, Elon Musk, Bill Gates, Jeff Bezos, Travis Kalanick, and emphasizes that luck is also a factor in success. The speaker encourages listeners to have a critical eye when evaluating startups and to look for those with a clear understanding of their customers and the problem they are solving.
|
|
||||||
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,3 +0,0 @@
|
|||||||
Summary of: 42min-StartupsTechTalk/42min-StartupsTechTalk.mp4_transcript.txt
|
|
||||||
|
|
||||||
If you had perfect knowledge, and you need like one more piece of advertising, drove like 0.2 customers in each customer generates, like let's say you wanted to completely maximize, you'd make it say your contribution margin, on incremental sales, is just over what you're spending on ad revenue. Like if you're, I don't know, well, let's see, I got like you don't really want to advertise a ton in the huge and everywhere, and then getting to ubiquitous, because you grab it, damage your brands, but just like an economic textbook theory, and be like, it'd be that basic math. And the table's like exactly, we're going to be really cautious to like be able to move in a year if we need to, but Google's goal is going to be giving away foundational models, lock everyone in, make them use Google Cloud, make them use Google Tools, and it's going to be very hard to switch off. Like if you were starting to develop Figma, you might say, okay, well Adobe is just gonna eat my lunch, right, like right away. So when you see a startup or talk to a founder and he's saying these things in your head like, man, this isn't gonna work because of, you know, there's no tab or there's, you know, like Amazon's gonna roll these cuts over in like two days or whatever, you know, or the man, this is really interesting because not only they're not doing it and no one else is doing this, but like they're going after a big market.
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,4 +0,0 @@
|
|||||||
GitHub
|
|
||||||
Requirements
|
|
||||||
Junior Developers
|
|
||||||
Riding Elephants
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
Summary of: https://www.youtube.com/watch?v=DzRoYc2UGKI
|
|
||||||
|
|
||||||
Small Developer is a program that creates an entire project for you based on a prompt. It uses the JATGPT API to generate code and files, and it's easy to use. The program can be installed by cloning the GitHub repository and using modalcom. The program can create projects for various languages, including Python and Ruby. You can also create a prompt.md file to input your prompt instead of pasting it into the terminal. The program is useful for creating detailed specs that can be passed on to junior developers. Overall, Small Developer is a helpful tool for quickly generating code and projects.
|
|
||||||
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,11 +0,0 @@
|
|||||||
# Record on Voice Memos on iPhone
|
|
||||||
|
|
||||||
# Airdrop to MacBook Air
|
|
||||||
|
|
||||||
# Run Reflector on .m4a Recording and Agenda
|
|
||||||
|
|
||||||
python 0-reflector-local.py voicememo.m4a agenda.txt
|
|
||||||
|
|
||||||
OR - using 30min-CyberHR example:
|
|
||||||
|
|
||||||
python 0-reflector-local.py 30min-CyberHR/30min-CyberHR.m4a 30min-CyberHR/30min-CyberHR-agenda.txt
|
|
||||||
@@ -1,125 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
import moviepy.editor
|
|
||||||
import nltk
|
|
||||||
import whisper
|
|
||||||
from loguru import logger
|
|
||||||
from transformers import BartTokenizer, BartForConditionalGeneration
|
|
||||||
|
|
||||||
nltk.download('punkt', quiet=True)
|
|
||||||
|
|
||||||
WHISPER_MODEL_SIZE = "base"
|
|
||||||
|
|
||||||
|
|
||||||
def init_argparse() -> argparse.ArgumentParser:
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
usage="%(prog)s [OPTIONS] <LOCATION> <OUTPUT>",
|
|
||||||
description="Creates a transcript of a video or audio file, then summarizes it using BART."
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument("location", help="Location of the media file")
|
|
||||||
parser.add_argument("output", help="Output file path")
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"-t", "--transcript", help="Save a copy of the intermediary transcript file", type=str)
|
|
||||||
parser.add_argument(
|
|
||||||
"-l", "--language", help="Language that the summary should be written in",
|
|
||||||
type=str, default="english", choices=['english', 'spanish', 'french', 'german', 'romanian'])
|
|
||||||
parser.add_argument(
|
|
||||||
"-m", "--model_name", help="Name or path of the BART model",
|
|
||||||
type=str, default="facebook/bart-large-cnn")
|
|
||||||
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
# NLTK chunking function
|
|
||||||
def chunk_text(txt, max_chunk_length=500):
|
|
||||||
"Split text into smaller chunks."
|
|
||||||
sentences = nltk.sent_tokenize(txt)
|
|
||||||
chunks = []
|
|
||||||
current_chunk = ""
|
|
||||||
for sentence in sentences:
|
|
||||||
if len(current_chunk) + len(sentence) < max_chunk_length:
|
|
||||||
current_chunk += f" {sentence.strip()}"
|
|
||||||
else:
|
|
||||||
chunks.append(current_chunk.strip())
|
|
||||||
current_chunk = f"{sentence.strip()}"
|
|
||||||
chunks.append(current_chunk.strip())
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
|
|
||||||
# BART summary function
|
|
||||||
def summarize_chunks(chunks, tokenizer, model):
|
|
||||||
summaries = []
|
|
||||||
for c in chunks:
|
|
||||||
input_ids = tokenizer.encode(c, return_tensors='pt')
|
|
||||||
summary_ids = model.generate(
|
|
||||||
input_ids, num_beams=4, length_penalty=2.0, max_length=1024, no_repeat_ngram_size=3)
|
|
||||||
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
|
||||||
summaries.append(summary)
|
|
||||||
return summaries
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
import sys
|
|
||||||
sys.setrecursionlimit(10000)
|
|
||||||
|
|
||||||
parser = init_argparse()
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
media_file = args.location
|
|
||||||
logger.info(f"Processing file: {media_file}")
|
|
||||||
|
|
||||||
# If the media file we just retrieved is a video, extract its audio stream.
|
|
||||||
if os.path.isfile(media_file) and media_file.endswith(('.mp4', '.avi', '.flv')):
|
|
||||||
audio_filename = tempfile.NamedTemporaryFile(
|
|
||||||
suffix=".mp3", delete=False).name
|
|
||||||
logger.info(f"Extracting audio to: {audio_filename}")
|
|
||||||
|
|
||||||
video = moviepy.editor.VideoFileClip(media_file)
|
|
||||||
video.audio.write_audiofile(audio_filename, logger=None)
|
|
||||||
|
|
||||||
logger.info("Finished extracting audio")
|
|
||||||
media_file = audio_filename
|
|
||||||
|
|
||||||
# Transcribe the audio file using the OpenAI Whisper model
|
|
||||||
logger.info("Loading Whisper speech-to-text model")
|
|
||||||
whisper_model = whisper.load_model(WHISPER_MODEL_SIZE)
|
|
||||||
|
|
||||||
logger.info(f"Transcribing audio file: {media_file}")
|
|
||||||
whisper_result = whisper_model.transcribe(media_file)
|
|
||||||
|
|
||||||
logger.info("Finished transcribing file")
|
|
||||||
|
|
||||||
# If we got the transcript parameter on the command line, save the transcript to the specified file.
|
|
||||||
if args.transcript:
|
|
||||||
logger.info(f"Saving transcript to: {args.transcript}")
|
|
||||||
transcript_file = open(args.transcript, "w")
|
|
||||||
transcript_file.write(whisper_result["text"])
|
|
||||||
transcript_file.close()
|
|
||||||
|
|
||||||
# Summarize the generated transcript using the BART model
|
|
||||||
logger.info(f"Loading BART model: {args.model_name}")
|
|
||||||
tokenizer = BartTokenizer.from_pretrained(args.model_name)
|
|
||||||
model = BartForConditionalGeneration.from_pretrained(args.model_name)
|
|
||||||
|
|
||||||
logger.info("Breaking transcript into smaller chunks")
|
|
||||||
chunks = chunk_text(whisper_result['text'])
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Transcript broken into {len(chunks)} chunks of at most 500 words") # TODO fix variable
|
|
||||||
|
|
||||||
logger.info(f"Writing summary text in {args.language} to: {args.output}")
|
|
||||||
with open(args.output, 'w') as f:
|
|
||||||
f.write('Summary of: ' + args.location + "\n\n")
|
|
||||||
summaries = summarize_chunks(chunks, tokenizer, model)
|
|
||||||
for summary in summaries:
|
|
||||||
f.write(summary.strip() + "\n\n")
|
|
||||||
|
|
||||||
logger.info("Summarization completed")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,12 +1,13 @@
|
|||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
import reflector.auth # noqa
|
|
||||||
import reflector.db # noqa
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.routing import APIRoute
|
from fastapi.routing import APIRoute
|
||||||
from fastapi_pagination import add_pagination
|
from fastapi_pagination import add_pagination
|
||||||
from prometheus_fastapi_instrumentator import Instrumentator
|
from prometheus_fastapi_instrumentator import Instrumentator
|
||||||
|
|
||||||
|
import reflector.auth # noqa
|
||||||
|
import reflector.db # noqa
|
||||||
from reflector.events import subscribers_shutdown, subscribers_startup
|
from reflector.events import subscribers_shutdown, subscribers_startup
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.metrics import metrics_init
|
from reflector.metrics import metrics_init
|
||||||
@@ -147,6 +148,10 @@ if settings.PROFILING:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
uvicorn.run("reflector.app:app", host="0.0.0.0", port=1250, reload=True)
|
should_reload = "--reload" in sys.argv
|
||||||
|
|
||||||
|
uvicorn.run("reflector.app:app", host="0.0.0.0", port=1250, reload=should_reload)
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
from reflector.settings import settings
|
|
||||||
from reflector.logger import logger
|
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
|
from reflector.logger import logger
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
logger.info(f"User authentication using {settings.AUTH_BACKEND}")
|
logger.info(f"User authentication using {settings.AUTH_BACKEND}")
|
||||||
module_name = f"reflector.auth.auth_{settings.AUTH_BACKEND}"
|
module_name = f"reflector.auth.auth_{settings.AUTH_BACKEND}"
|
||||||
auth_module = importlib.import_module(module_name)
|
auth_module = importlib.import_module(module_name)
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
from fastapi.security import OAuth2AuthorizationCodeBearer
|
|
||||||
from fief_client import FiefAccessTokenInfo, FiefAsync, FiefUserInfo
|
|
||||||
from fief_client.integrations.fastapi import FiefAuth
|
|
||||||
from reflector.settings import settings
|
|
||||||
|
|
||||||
fief = FiefAsync(
|
|
||||||
settings.AUTH_FIEF_URL,
|
|
||||||
settings.AUTH_FIEF_CLIENT_ID,
|
|
||||||
settings.AUTH_FIEF_CLIENT_SECRET,
|
|
||||||
)
|
|
||||||
|
|
||||||
scheme = OAuth2AuthorizationCodeBearer(
|
|
||||||
f"{settings.AUTH_FIEF_URL}/authorize",
|
|
||||||
f"{settings.AUTH_FIEF_URL}/api/token",
|
|
||||||
scopes={"openid": "openid", "offline_access": "offline_access"},
|
|
||||||
auto_error=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
auth = FiefAuth(fief, scheme)
|
|
||||||
|
|
||||||
UserInfo = FiefUserInfo
|
|
||||||
AccessTokenInfo = FiefAccessTokenInfo
|
|
||||||
authenticated = auth.authenticated()
|
|
||||||
current_user = auth.current_user()
|
|
||||||
current_user_optional = auth.current_user(optional=True)
|
|
||||||
@@ -4,6 +4,7 @@ from fastapi import Depends, HTTPException
|
|||||||
from fastapi.security import OAuth2PasswordBearer
|
from fastapi.security import OAuth2PasswordBearer
|
||||||
from jose import JWTError, jwt
|
from jose import JWTError, jwt
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
from pydantic import BaseModel
|
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
from fastapi import Depends
|
from fastapi import Depends
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
from fastapi.security import OAuth2PasswordBearer
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token", auto_error=False)
|
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token", auto_error=False)
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import signal
|
import signal
|
||||||
|
from typing import NoReturn
|
||||||
|
|
||||||
from aiortc.contrib.signaling import add_signaling_arguments, create_signaling
|
from aiortc.contrib.signaling import add_signaling_arguments, create_signaling
|
||||||
|
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.stream_client import StreamClient
|
from reflector.stream_client import StreamClient
|
||||||
from typing import NoReturn
|
|
||||||
|
|
||||||
|
|
||||||
async def main() -> NoReturn:
|
async def main() -> NoReturn:
|
||||||
@@ -51,7 +51,7 @@ async def main() -> NoReturn:
|
|||||||
|
|
||||||
logger.info(f"Cancelling {len(tasks)} outstanding tasks")
|
logger.info(f"Cancelling {len(tasks)} outstanding tasks")
|
||||||
await asyncio.gather(*tasks, return_exceptions=True)
|
await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
logger.info(f'{"Flushing metrics"}')
|
logger.info(f"{'Flushing metrics'}")
|
||||||
loop.stop()
|
loop.stop()
|
||||||
|
|
||||||
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
|
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
|
||||||
|
|||||||
@@ -1,28 +1,48 @@
|
|||||||
|
import contextvars
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import databases
|
import databases
|
||||||
import sqlalchemy
|
import sqlalchemy
|
||||||
|
|
||||||
from reflector.events import subscribers_shutdown, subscribers_startup
|
from reflector.events import subscribers_shutdown, subscribers_startup
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
|
|
||||||
database = databases.Database(settings.DATABASE_URL)
|
|
||||||
metadata = sqlalchemy.MetaData()
|
metadata = sqlalchemy.MetaData()
|
||||||
|
|
||||||
|
_database_context: contextvars.ContextVar[Optional[databases.Database]] = (
|
||||||
|
contextvars.ContextVar("database", default=None)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_database() -> databases.Database:
|
||||||
|
"""Get database instance for current asyncio context"""
|
||||||
|
db = _database_context.get()
|
||||||
|
if db is None:
|
||||||
|
db = databases.Database(settings.DATABASE_URL)
|
||||||
|
_database_context.set(db)
|
||||||
|
return db
|
||||||
|
|
||||||
|
|
||||||
# import models
|
# import models
|
||||||
|
import reflector.db.calendar_events # noqa
|
||||||
import reflector.db.meetings # noqa
|
import reflector.db.meetings # noqa
|
||||||
import reflector.db.recordings # noqa
|
import reflector.db.recordings # noqa
|
||||||
import reflector.db.rooms # noqa
|
import reflector.db.rooms # noqa
|
||||||
import reflector.db.transcripts # noqa
|
import reflector.db.transcripts # noqa
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if "sqlite" in settings.DATABASE_URL:
|
if "postgres" not in settings.DATABASE_URL:
|
||||||
kwargs["connect_args"] = {"check_same_thread": False}
|
raise Exception("Only postgres database is supported in reflector")
|
||||||
engine = sqlalchemy.create_engine(settings.DATABASE_URL, **kwargs)
|
engine = sqlalchemy.create_engine(settings.DATABASE_URL, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
@subscribers_startup.append
|
@subscribers_startup.append
|
||||||
async def database_connect(_):
|
async def database_connect(_):
|
||||||
|
database = get_database()
|
||||||
await database.connect()
|
await database.connect()
|
||||||
|
|
||||||
|
|
||||||
@subscribers_shutdown.append
|
@subscribers_shutdown.append
|
||||||
async def database_disconnect(_):
|
async def database_disconnect(_):
|
||||||
|
database = get_database()
|
||||||
await database.disconnect()
|
await database.disconnect()
|
||||||
|
|||||||
193
server/reflector/db/calendar_events.py
Normal file
193
server/reflector/db/calendar_events.py
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
|
||||||
|
from reflector.db import get_database, metadata
|
||||||
|
from reflector.utils import generate_uuid4
|
||||||
|
|
||||||
|
calendar_events = sa.Table(
|
||||||
|
"calendar_event",
|
||||||
|
metadata,
|
||||||
|
sa.Column("id", sa.String, primary_key=True),
|
||||||
|
sa.Column(
|
||||||
|
"room_id",
|
||||||
|
sa.String,
|
||||||
|
sa.ForeignKey("room.id", ondelete="CASCADE"),
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.Column("ics_uid", sa.Text, nullable=False),
|
||||||
|
sa.Column("title", sa.Text),
|
||||||
|
sa.Column("description", sa.Text),
|
||||||
|
sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.Column("end_time", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.Column("attendees", JSONB),
|
||||||
|
sa.Column("location", sa.Text),
|
||||||
|
sa.Column("ics_raw_data", sa.Text),
|
||||||
|
sa.Column("last_synced", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.Column("is_deleted", sa.Boolean, nullable=False, server_default=sa.false()),
|
||||||
|
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
|
||||||
|
sa.UniqueConstraint("room_id", "ics_uid", name="uq_room_calendar_event"),
|
||||||
|
sa.Index("idx_calendar_event_room_start", "room_id", "start_time"),
|
||||||
|
sa.Index(
|
||||||
|
"idx_calendar_event_deleted",
|
||||||
|
"is_deleted",
|
||||||
|
postgresql_where=sa.text("NOT is_deleted"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CalendarEvent(BaseModel):
|
||||||
|
id: str = Field(default_factory=generate_uuid4)
|
||||||
|
room_id: str
|
||||||
|
ics_uid: str
|
||||||
|
title: str | None = None
|
||||||
|
description: str | None = None
|
||||||
|
start_time: datetime
|
||||||
|
end_time: datetime
|
||||||
|
attendees: list[dict[str, Any]] | None = None
|
||||||
|
location: str | None = None
|
||||||
|
ics_raw_data: str | None = None
|
||||||
|
last_synced: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||||
|
is_deleted: bool = False
|
||||||
|
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||||
|
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||||
|
|
||||||
|
|
||||||
|
class CalendarEventController:
|
||||||
|
async def get_by_room(
|
||||||
|
self,
|
||||||
|
room_id: str,
|
||||||
|
include_deleted: bool = False,
|
||||||
|
start_after: datetime | None = None,
|
||||||
|
end_before: datetime | None = None,
|
||||||
|
) -> list[CalendarEvent]:
|
||||||
|
"""Get calendar events for a room."""
|
||||||
|
query = calendar_events.select().where(calendar_events.c.room_id == room_id)
|
||||||
|
|
||||||
|
if not include_deleted:
|
||||||
|
query = query.where(calendar_events.c.is_deleted == False)
|
||||||
|
|
||||||
|
if start_after:
|
||||||
|
query = query.where(calendar_events.c.start_time >= start_after)
|
||||||
|
|
||||||
|
if end_before:
|
||||||
|
query = query.where(calendar_events.c.end_time <= end_before)
|
||||||
|
|
||||||
|
query = query.order_by(calendar_events.c.start_time.asc())
|
||||||
|
|
||||||
|
results = await get_database().fetch_all(query)
|
||||||
|
return [CalendarEvent(**result) for result in results]
|
||||||
|
|
||||||
|
async def get_upcoming(
|
||||||
|
self, room_id: str, minutes_ahead: int = 30
|
||||||
|
) -> list[CalendarEvent]:
|
||||||
|
"""Get upcoming events for a room within the specified minutes."""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
future_time = now + timedelta(minutes=minutes_ahead)
|
||||||
|
|
||||||
|
query = (
|
||||||
|
calendar_events.select()
|
||||||
|
.where(
|
||||||
|
sa.and_(
|
||||||
|
calendar_events.c.room_id == room_id,
|
||||||
|
calendar_events.c.is_deleted == False,
|
||||||
|
calendar_events.c.start_time >= now,
|
||||||
|
calendar_events.c.start_time <= future_time,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.order_by(calendar_events.c.start_time.asc())
|
||||||
|
)
|
||||||
|
|
||||||
|
results = await get_database().fetch_all(query)
|
||||||
|
return [CalendarEvent(**result) for result in results]
|
||||||
|
|
||||||
|
async def get_by_ics_uid(self, room_id: str, ics_uid: str) -> CalendarEvent | None:
|
||||||
|
"""Get a calendar event by its ICS UID."""
|
||||||
|
query = calendar_events.select().where(
|
||||||
|
sa.and_(
|
||||||
|
calendar_events.c.room_id == room_id,
|
||||||
|
calendar_events.c.ics_uid == ics_uid,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
result = await get_database().fetch_one(query)
|
||||||
|
return CalendarEvent(**result) if result else None
|
||||||
|
|
||||||
|
async def upsert(self, event: CalendarEvent) -> CalendarEvent:
|
||||||
|
"""Create or update a calendar event."""
|
||||||
|
existing = await self.get_by_ics_uid(event.room_id, event.ics_uid)
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
# Update existing event
|
||||||
|
event.id = existing.id
|
||||||
|
event.created_at = existing.created_at
|
||||||
|
event.updated_at = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
query = (
|
||||||
|
calendar_events.update()
|
||||||
|
.where(calendar_events.c.id == existing.id)
|
||||||
|
.values(**event.model_dump())
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Insert new event
|
||||||
|
query = calendar_events.insert().values(**event.model_dump())
|
||||||
|
|
||||||
|
await get_database().execute(query)
|
||||||
|
return event
|
||||||
|
|
||||||
|
async def soft_delete_missing(
|
||||||
|
self, room_id: str, current_ics_uids: list[str]
|
||||||
|
) -> int:
|
||||||
|
"""Soft delete future events that are no longer in the calendar."""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
# First, get the IDs of events to delete
|
||||||
|
select_query = calendar_events.select().where(
|
||||||
|
sa.and_(
|
||||||
|
calendar_events.c.room_id == room_id,
|
||||||
|
calendar_events.c.start_time > now,
|
||||||
|
calendar_events.c.is_deleted == False,
|
||||||
|
calendar_events.c.ics_uid.notin_(current_ics_uids)
|
||||||
|
if current_ics_uids
|
||||||
|
else True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
to_delete = await get_database().fetch_all(select_query)
|
||||||
|
delete_count = len(to_delete)
|
||||||
|
|
||||||
|
if delete_count > 0:
|
||||||
|
# Now update them
|
||||||
|
update_query = (
|
||||||
|
calendar_events.update()
|
||||||
|
.where(
|
||||||
|
sa.and_(
|
||||||
|
calendar_events.c.room_id == room_id,
|
||||||
|
calendar_events.c.start_time > now,
|
||||||
|
calendar_events.c.is_deleted == False,
|
||||||
|
calendar_events.c.ics_uid.notin_(current_ics_uids)
|
||||||
|
if current_ics_uids
|
||||||
|
else True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.values(is_deleted=True, updated_at=now)
|
||||||
|
)
|
||||||
|
|
||||||
|
await get_database().execute(update_query)
|
||||||
|
|
||||||
|
return delete_count
|
||||||
|
|
||||||
|
async def delete_by_room(self, room_id: str) -> int:
|
||||||
|
"""Hard delete all events for a room (used when room is deleted)."""
|
||||||
|
query = calendar_events.delete().where(calendar_events.c.room_id == room_id)
|
||||||
|
result = await get_database().execute(query)
|
||||||
|
return result.rowcount
|
||||||
|
|
||||||
|
|
||||||
|
# Add missing import
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
calendar_events_controller = CalendarEventController()
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from reflector.db import database, metadata
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
|
||||||
|
from reflector.db import get_database, metadata
|
||||||
from reflector.db.rooms import Room
|
from reflector.db.rooms import Room
|
||||||
from reflector.utils import generate_uuid4
|
from reflector.utils import generate_uuid4
|
||||||
|
|
||||||
@@ -15,8 +17,8 @@ meetings = sa.Table(
|
|||||||
sa.Column("room_name", sa.String),
|
sa.Column("room_name", sa.String),
|
||||||
sa.Column("room_url", sa.String),
|
sa.Column("room_url", sa.String),
|
||||||
sa.Column("host_room_url", sa.String),
|
sa.Column("host_room_url", sa.String),
|
||||||
sa.Column("start_date", sa.DateTime),
|
sa.Column("start_date", sa.DateTime(timezone=True)),
|
||||||
sa.Column("end_date", sa.DateTime),
|
sa.Column("end_date", sa.DateTime(timezone=True)),
|
||||||
sa.Column("user_id", sa.String),
|
sa.Column("user_id", sa.String),
|
||||||
sa.Column("room_id", sa.String),
|
sa.Column("room_id", sa.String),
|
||||||
sa.Column("is_locked", sa.Boolean, nullable=False, server_default=sa.false()),
|
sa.Column("is_locked", sa.Boolean, nullable=False, server_default=sa.false()),
|
||||||
@@ -40,7 +42,16 @@ meetings = sa.Table(
|
|||||||
nullable=False,
|
nullable=False,
|
||||||
server_default=sa.true(),
|
server_default=sa.true(),
|
||||||
),
|
),
|
||||||
|
sa.Column(
|
||||||
|
"calendar_event_id",
|
||||||
|
sa.String,
|
||||||
|
sa.ForeignKey("calendar_event.id", ondelete="SET NULL"),
|
||||||
|
),
|
||||||
|
sa.Column("calendar_metadata", JSONB),
|
||||||
|
sa.Column("last_participant_left_at", sa.DateTime(timezone=True)),
|
||||||
|
sa.Column("grace_period_minutes", sa.Integer, server_default=sa.text("15")),
|
||||||
sa.Index("idx_meeting_room_id", "room_id"),
|
sa.Index("idx_meeting_room_id", "room_id"),
|
||||||
|
sa.Index("idx_meeting_calendar_event", "calendar_event_id"),
|
||||||
)
|
)
|
||||||
|
|
||||||
meeting_consent = sa.Table(
|
meeting_consent = sa.Table(
|
||||||
@@ -50,7 +61,7 @@ meeting_consent = sa.Table(
|
|||||||
sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id"), nullable=False),
|
sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id"), nullable=False),
|
||||||
sa.Column("user_id", sa.String),
|
sa.Column("user_id", sa.String),
|
||||||
sa.Column("consent_given", sa.Boolean, nullable=False),
|
sa.Column("consent_given", sa.Boolean, nullable=False),
|
||||||
sa.Column("consent_timestamp", sa.DateTime, nullable=False),
|
sa.Column("consent_timestamp", sa.DateTime(timezone=True), nullable=False),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -78,6 +89,11 @@ class Meeting(BaseModel):
|
|||||||
"none", "prompt", "automatic", "automatic-2nd-participant"
|
"none", "prompt", "automatic", "automatic-2nd-participant"
|
||||||
] = "automatic-2nd-participant"
|
] = "automatic-2nd-participant"
|
||||||
num_clients: int = 0
|
num_clients: int = 0
|
||||||
|
is_active: bool = True
|
||||||
|
calendar_event_id: str | None = None
|
||||||
|
calendar_metadata: dict[str, Any] | None = None
|
||||||
|
last_participant_left_at: datetime | None = None
|
||||||
|
grace_period_minutes: int = 15
|
||||||
|
|
||||||
|
|
||||||
class MeetingController:
|
class MeetingController:
|
||||||
@@ -91,6 +107,8 @@ class MeetingController:
|
|||||||
end_date: datetime,
|
end_date: datetime,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
room: Room,
|
room: Room,
|
||||||
|
calendar_event_id: str | None = None,
|
||||||
|
calendar_metadata: dict[str, Any] | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new meeting
|
Create a new meeting
|
||||||
@@ -108,9 +126,11 @@ class MeetingController:
|
|||||||
room_mode=room.room_mode,
|
room_mode=room.room_mode,
|
||||||
recording_type=room.recording_type,
|
recording_type=room.recording_type,
|
||||||
recording_trigger=room.recording_trigger,
|
recording_trigger=room.recording_trigger,
|
||||||
|
calendar_event_id=calendar_event_id,
|
||||||
|
calendar_metadata=calendar_metadata,
|
||||||
)
|
)
|
||||||
query = meetings.insert().values(**meeting.model_dump())
|
query = meetings.insert().values(**meeting.model_dump())
|
||||||
await database.execute(query)
|
await get_database().execute(query)
|
||||||
return meeting
|
return meeting
|
||||||
|
|
||||||
async def get_all_active(self) -> list[Meeting]:
|
async def get_all_active(self) -> list[Meeting]:
|
||||||
@@ -118,7 +138,7 @@ class MeetingController:
|
|||||||
Get active meetings.
|
Get active meetings.
|
||||||
"""
|
"""
|
||||||
query = meetings.select().where(meetings.c.is_active)
|
query = meetings.select().where(meetings.c.is_active)
|
||||||
return await database.fetch_all(query)
|
return await get_database().fetch_all(query)
|
||||||
|
|
||||||
async def get_by_room_name(
|
async def get_by_room_name(
|
||||||
self,
|
self,
|
||||||
@@ -128,7 +148,7 @@ class MeetingController:
|
|||||||
Get a meeting by room name.
|
Get a meeting by room name.
|
||||||
"""
|
"""
|
||||||
query = meetings.select().where(meetings.c.room_name == room_name)
|
query = meetings.select().where(meetings.c.room_name == room_name)
|
||||||
result = await database.fetch_one(query)
|
result = await get_database().fetch_one(query)
|
||||||
if not result:
|
if not result:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -137,6 +157,7 @@ class MeetingController:
|
|||||||
async def get_active(self, room: Room, current_time: datetime) -> Meeting:
|
async def get_active(self, room: Room, current_time: datetime) -> Meeting:
|
||||||
"""
|
"""
|
||||||
Get latest active meeting for a room.
|
Get latest active meeting for a room.
|
||||||
|
For backward compatibility, returns the most recent active meeting.
|
||||||
"""
|
"""
|
||||||
end_date = getattr(meetings.c, "end_date")
|
end_date = getattr(meetings.c, "end_date")
|
||||||
query = (
|
query = (
|
||||||
@@ -150,18 +171,59 @@ class MeetingController:
|
|||||||
)
|
)
|
||||||
.order_by(end_date.desc())
|
.order_by(end_date.desc())
|
||||||
)
|
)
|
||||||
result = await database.fetch_one(query)
|
result = await get_database().fetch_one(query)
|
||||||
if not result:
|
if not result:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return Meeting(**result)
|
return Meeting(**result)
|
||||||
|
|
||||||
|
async def get_all_active_for_room(
|
||||||
|
self, room: Room, current_time: datetime
|
||||||
|
) -> list[Meeting]:
|
||||||
|
"""
|
||||||
|
Get all active meetings for a room.
|
||||||
|
This supports multiple concurrent meetings per room.
|
||||||
|
"""
|
||||||
|
end_date = getattr(meetings.c, "end_date")
|
||||||
|
query = (
|
||||||
|
meetings.select()
|
||||||
|
.where(
|
||||||
|
sa.and_(
|
||||||
|
meetings.c.room_id == room.id,
|
||||||
|
meetings.c.end_date > current_time,
|
||||||
|
meetings.c.is_active,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.order_by(end_date.desc())
|
||||||
|
)
|
||||||
|
results = await get_database().fetch_all(query)
|
||||||
|
return [Meeting(**result) for result in results]
|
||||||
|
|
||||||
|
async def get_active_by_calendar_event(
|
||||||
|
self, room: Room, calendar_event_id: str, current_time: datetime
|
||||||
|
) -> Meeting | None:
|
||||||
|
"""
|
||||||
|
Get active meeting for a specific calendar event.
|
||||||
|
"""
|
||||||
|
query = meetings.select().where(
|
||||||
|
sa.and_(
|
||||||
|
meetings.c.room_id == room.id,
|
||||||
|
meetings.c.calendar_event_id == calendar_event_id,
|
||||||
|
meetings.c.end_date > current_time,
|
||||||
|
meetings.c.is_active,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
result = await get_database().fetch_one(query)
|
||||||
|
if not result:
|
||||||
|
return None
|
||||||
|
return Meeting(**result)
|
||||||
|
|
||||||
async def get_by_id(self, meeting_id: str, **kwargs) -> Meeting | None:
|
async def get_by_id(self, meeting_id: str, **kwargs) -> Meeting | None:
|
||||||
"""
|
"""
|
||||||
Get a meeting by id
|
Get a meeting by id
|
||||||
"""
|
"""
|
||||||
query = meetings.select().where(meetings.c.id == meeting_id)
|
query = meetings.select().where(meetings.c.id == meeting_id)
|
||||||
result = await database.fetch_one(query)
|
result = await get_database().fetch_one(query)
|
||||||
if not result:
|
if not result:
|
||||||
return None
|
return None
|
||||||
return Meeting(**result)
|
return Meeting(**result)
|
||||||
@@ -173,7 +235,7 @@ class MeetingController:
|
|||||||
If not found, it will raise a 404 error.
|
If not found, it will raise a 404 error.
|
||||||
"""
|
"""
|
||||||
query = meetings.select().where(meetings.c.id == meeting_id)
|
query = meetings.select().where(meetings.c.id == meeting_id)
|
||||||
result = await database.fetch_one(query)
|
result = await get_database().fetch_one(query)
|
||||||
if not result:
|
if not result:
|
||||||
raise HTTPException(status_code=404, detail="Meeting not found")
|
raise HTTPException(status_code=404, detail="Meeting not found")
|
||||||
|
|
||||||
@@ -183,9 +245,18 @@ class MeetingController:
|
|||||||
|
|
||||||
return meeting
|
return meeting
|
||||||
|
|
||||||
|
async def get_by_calendar_event(self, calendar_event_id: str) -> Meeting | None:
|
||||||
|
query = meetings.select().where(
|
||||||
|
meetings.c.calendar_event_id == calendar_event_id
|
||||||
|
)
|
||||||
|
result = await get_database().fetch_one(query)
|
||||||
|
if not result:
|
||||||
|
return None
|
||||||
|
return Meeting(**result)
|
||||||
|
|
||||||
async def update_meeting(self, meeting_id: str, **kwargs):
|
async def update_meeting(self, meeting_id: str, **kwargs):
|
||||||
query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs)
|
query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs)
|
||||||
await database.execute(query)
|
await get_database().execute(query)
|
||||||
|
|
||||||
|
|
||||||
class MeetingConsentController:
|
class MeetingConsentController:
|
||||||
@@ -193,7 +264,7 @@ class MeetingConsentController:
|
|||||||
query = meeting_consent.select().where(
|
query = meeting_consent.select().where(
|
||||||
meeting_consent.c.meeting_id == meeting_id
|
meeting_consent.c.meeting_id == meeting_id
|
||||||
)
|
)
|
||||||
results = await database.fetch_all(query)
|
results = await get_database().fetch_all(query)
|
||||||
return [MeetingConsent(**result) for result in results]
|
return [MeetingConsent(**result) for result in results]
|
||||||
|
|
||||||
async def get_by_meeting_and_user(
|
async def get_by_meeting_and_user(
|
||||||
@@ -204,7 +275,7 @@ class MeetingConsentController:
|
|||||||
meeting_consent.c.meeting_id == meeting_id,
|
meeting_consent.c.meeting_id == meeting_id,
|
||||||
meeting_consent.c.user_id == user_id,
|
meeting_consent.c.user_id == user_id,
|
||||||
)
|
)
|
||||||
result = await database.fetch_one(query)
|
result = await get_database().fetch_one(query)
|
||||||
if result is None:
|
if result is None:
|
||||||
return None
|
return None
|
||||||
return MeetingConsent(**result) if result else None
|
return MeetingConsent(**result) if result else None
|
||||||
@@ -226,14 +297,14 @@ class MeetingConsentController:
|
|||||||
consent_timestamp=consent.consent_timestamp,
|
consent_timestamp=consent.consent_timestamp,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
await database.execute(query)
|
await get_database().execute(query)
|
||||||
|
|
||||||
existing.consent_given = consent.consent_given
|
existing.consent_given = consent.consent_given
|
||||||
existing.consent_timestamp = consent.consent_timestamp
|
existing.consent_timestamp = consent.consent_timestamp
|
||||||
return existing
|
return existing
|
||||||
|
|
||||||
query = meeting_consent.insert().values(**consent.model_dump())
|
query = meeting_consent.insert().values(**consent.model_dump())
|
||||||
await database.execute(query)
|
await get_database().execute(query)
|
||||||
return consent
|
return consent
|
||||||
|
|
||||||
async def has_any_denial(self, meeting_id: str) -> bool:
|
async def has_any_denial(self, meeting_id: str) -> bool:
|
||||||
@@ -242,7 +313,7 @@ class MeetingConsentController:
|
|||||||
meeting_consent.c.meeting_id == meeting_id,
|
meeting_consent.c.meeting_id == meeting_id,
|
||||||
meeting_consent.c.consent_given.is_(False),
|
meeting_consent.c.consent_given.is_(False),
|
||||||
)
|
)
|
||||||
result = await database.fetch_one(query)
|
result = await get_database().fetch_one(query)
|
||||||
return result is not None
|
return result is not None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,56 +0,0 @@
|
|||||||
from reflector.db import database
|
|
||||||
from reflector.db.meetings import meetings
|
|
||||||
from reflector.db.rooms import rooms
|
|
||||||
from reflector.db.transcripts import transcripts
|
|
||||||
|
|
||||||
users_to_migrate = [
|
|
||||||
["123@lifex.pink", "63b727f5-485d-449f-b528-563d779b11ef", None],
|
|
||||||
["ana@monadical.com", "1bae2e4d-5c04-49c2-932f-a86266a6ca13", None],
|
|
||||||
["cspencer@sprocket.org", "614ed0be-392e-488c-bd19-6a9730fd0e9e", None],
|
|
||||||
["daniel.f.lopez.j@gmail.com", "ca9561bd-c989-4a1e-8877-7081cf62ae7f", None],
|
|
||||||
["jenalee@monadical.com", "c7c1e79e-b068-4b28-a9f4-29d98b1697ed", None],
|
|
||||||
["jennifer@rootandseed.com", "f5321727-7546-4b2b-b69d-095a931ef0c4", None],
|
|
||||||
["jose@monadical.com", "221f079c-7ce0-4677-90b7-0359b6315e27", None],
|
|
||||||
["labenclayton@gmail.com", "40078cd0-543c-40e4-9c2e-5ce57a686428", None],
|
|
||||||
["mathieu@monadical.com", "c7a36151-851e-4afa-9fab-aaca834bfd30", None],
|
|
||||||
["michal.flak.96@gmail.com", "3096eb5e-b590-41fc-a0d1-d152c1895402", None],
|
|
||||||
["sara@monadical.com", "31ab0cfe-5d2c-4c7a-84de-a29494714c99", None],
|
|
||||||
["sara@monadical.com", "b871e5f0-754e-447f-9c3d-19f629f0082b", None],
|
|
||||||
["sebastian@monadical.com", "f024f9d0-15d0-480f-8529-43959fc8b639", None],
|
|
||||||
["sergey@monadical.com", "5c4798eb-b9ab-4721-a540-bd96fc434156", None],
|
|
||||||
["sergey@monadical.com", "9dd8a6b4-247e-48fe-b1fb-4c84dd3c01bc", None],
|
|
||||||
["transient.tran@gmail.com", "617ba2d3-09b6-4b1f-a435-a7f41c3ce060", None],
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
async def migrate_user(email, user_id):
|
|
||||||
# if the email match the email in the users_to_migrate list
|
|
||||||
# reassign all transcripts/rooms/meetings to the new user_id
|
|
||||||
|
|
||||||
user_ids = [user[1] for user in users_to_migrate if user[0] == email]
|
|
||||||
if not user_ids:
|
|
||||||
return
|
|
||||||
|
|
||||||
# do not migrate back
|
|
||||||
if user_id in user_ids:
|
|
||||||
return
|
|
||||||
|
|
||||||
for old_user_id in user_ids:
|
|
||||||
query = (
|
|
||||||
transcripts.update()
|
|
||||||
.where(transcripts.c.user_id == old_user_id)
|
|
||||||
.values(user_id=user_id)
|
|
||||||
)
|
|
||||||
await database.execute(query)
|
|
||||||
|
|
||||||
query = (
|
|
||||||
rooms.update().where(rooms.c.user_id == old_user_id).values(user_id=user_id)
|
|
||||||
)
|
|
||||||
await database.execute(query)
|
|
||||||
|
|
||||||
query = (
|
|
||||||
meetings.update()
|
|
||||||
.where(meetings.c.user_id == old_user_id)
|
|
||||||
.values(user_id=user_id)
|
|
||||||
)
|
|
||||||
await database.execute(query)
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user