sync with parent

Emit multriack pipeline events
dailico track merge vibe
2025-12-21 12:49:06 +00:00 · 2025-10-21 11:59:26 -04:00 · 2025-10-21 16:31:31 +02:00 · 2025-10-21 10:30:19 -04:00 · 2025-10-21 13:33:31 +02:00 · 2025-10-17 10:00:40 -04:00
183 changed files with 10389 additions and 18344 deletions
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,90 @@
 name: Deploy to Amazon ECS
 on: [workflow_dispatch]
 env:
  # 950402358378.dkr.ecr.us-east-1.amazonaws.com/reflector
  AWS_REGION: us-east-1
  ECR_REPOSITORY: reflector
 jobs:
  build:
    strategy:
      matrix:
        include:
          - platform: linux/amd64
            runner: linux-amd64
            arch: amd64
          - platform: linux/arm64
            runner: linux-arm64
            arch: arm64
    runs-on: ${{ matrix.runner }}
    permissions:
      contents: read
    outputs:
      registry: ${{ steps.login-ecr.outputs.registry }}
    steps:
      - uses: actions/checkout@v4
      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: ${{ env.AWS_REGION }}
      - name: Login to Amazon ECR
        id: login-ecr
        uses: aws-actions/amazon-ecr-login@v2
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Build and push ${{ matrix.arch }}
        uses: docker/build-push-action@v5
        with:
          context: server
          platforms: ${{ matrix.platform }}
          push: true
          tags: ${{ steps.login-ecr.outputs.registry }}/${{ env.ECR_REPOSITORY }}:latest-${{ matrix.arch }}
          cache-from: type=gha,scope=${{ matrix.arch }}
          cache-to: type=gha,mode=max,scope=${{ matrix.arch }}
          github-token: ${{ secrets.GHA_CACHE_TOKEN }}
          provenance: false
  create-manifest:
    runs-on: ubuntu-latest
    needs: [build]
    permissions:
      deployments: write
      contents: read
    steps:
      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: ${{ env.AWS_REGION }}
      - name: Login to Amazon ECR
        uses: aws-actions/amazon-ecr-login@v2
      - name: Create and push multi-arch manifest
        run: |
          # Get the registry URL (since we can't easily access job outputs in matrix)
          ECR_REGISTRY=$(aws ecr describe-registry --query 'registryId' --output text).dkr.ecr.${{ env.AWS_REGION }}.amazonaws.com
          docker manifest create \
            $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest \
            $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest-amd64 \
            $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest-arm64
          docker manifest push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest
          echo "✅ Multi-arch manifest pushed: $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:latest"
--- a/.github/workflows/dockerhub-backend.yml
+++ b/.github/workflows/dockerhub-backend.yml
@@ -1,31 +1,35 @@
-name: Build and Push Backend Docker Image (Docker Hub)
+name: Build and Push Frontend Docker Image
 on:
  push:
-    tags:
+    branches:
-      - "v*"
+      - main
    paths:
      - 'www/**'
      - '.github/workflows/docker-frontend.yml'
  workflow_dispatch:
 env:
-  REGISTRY: docker.io
+  REGISTRY: ghcr.io
-  IMAGE_NAME: monadicalsas/reflector-backend
+  IMAGE_NAME: ${{ github.repository }}-frontend
 jobs:
  build-and-push:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
-      - name: Log in to Docker Hub
+      - name: Log in to GitHub Container Registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
-          username: monadicalsas
+          username: ${{ github.actor }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Extract metadata
        id: meta
@@ -34,7 +38,7 @@ jobs:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=ref,event=branch
-            type=ref,event=tag
+            type=sha,prefix={{branch}}-
            type=raw,value=latest,enable={{is_default_branch}}
      - name: Set up Docker Buildx
@@ -43,8 +47,8 @@ jobs:
      - name: Build and push Docker image
        uses: docker/build-push-action@v5
        with:
-          context: ./server
+          context: ./www
-          file: ./server/Dockerfile
+          file: ./www/Dockerfile
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/dockerhub-frontend.yml
+++ b/.github/workflows/dockerhub-frontend.yml
@@ -1,70 +0,0 @@
 name: Build and Push Frontend Docker Image
 on:
  push:
    tags:
      - "v*"
  workflow_dispatch:
 env:
  REGISTRY: docker.io
  IMAGE_NAME: monadicalsas/reflector-frontend
 jobs:
  build-and-push:
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Log in to Docker Hub
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: monadicalsas
          password: ${{ secrets.DOCKERHUB_TOKEN }}
      - name: Extract metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=ref,event=branch
            type=ref,event=tag
            type=raw,value=latest,enable={{is_default_branch}}
          github-token: ${{ secrets.GITHUB_TOKEN }}
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Build and push Docker image
        uses: docker/build-push-action@v5
        with:
          context: ./www
          file: ./www/Dockerfile
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          platforms: linux/amd64,linux/arm64
  deploy:
    needs: build-and-push
    runs-on: ubuntu-latest
    if: success()
    strategy:
      matrix:
        environment: [reflector-monadical, reflector-media]
    environment: ${{ matrix.environment }}
    steps:
      - name: Trigger Coolify deployment
        run: |
          curl -X POST "${{ secrets.COOLIFY_WEBHOOK_URL }}" \
            -H "Content-Type: application/json" \
            -H "Authorization: Bearer ${{ secrets.COOLIFY_WEBHOOK_TOKEN }}" \
            -f || (echo "Failed to trigger Coolify deployment for ${{ matrix.environment }}" && exit 1)
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,3 @@ CLAUDE.local.md
 www/.env.development
 www/.env.production
 .playwright-mcp
 .secrets
--- a/.secrets.example
+++ b/.secrets.example
@@ -1,24 +0,0 @@
 # Example secrets file for GitHub Actions workflows
 # Copy this to .secrets and fill in your values
 # These secrets should be configured in GitHub repository settings:
 # Settings > Secrets and variables > Actions
 # DockerHub Configuration (required for frontend and backend deployment)
 # Create a Docker Hub access token at https://hub.docker.com/settings/security
 # Username: monadicalsas
 DOCKERHUB_TOKEN=your-dockerhub-access-token
 # GitHub Token (required for frontend and backend deployment)
 # Used by docker/metadata-action for extracting image metadata
 # Can use the default GITHUB_TOKEN or create a personal access token
 GITHUB_TOKEN=your-github-token-or-use-default-GITHUB_TOKEN
 # Coolify Deployment Webhook (required for frontend deployment)
 # Used to trigger automatic deployment after image push
 # Configure these secrets in GitHub Environments:
 # Each environment should have:
 #   - COOLIFY_WEBHOOK_URL: The webhook URL for that specific deployment
 #   - COOLIFY_WEBHOOK_TOKEN: The webhook token (can be the same for both if using same token)
 # Optional: GitHub Actions Cache Token (for local testing with act)
 GHA_CACHE_TOKEN=your-github-token-or-empty
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,152 +1,5 @@
 # Changelog
 ## [0.24.0](https://github.com/Monadical-SAS/reflector/compare/v0.23.2...v0.24.0) (2025-12-18)
 ### Features
 * identify action items ([#790](https://github.com/Monadical-SAS/reflector/issues/790)) ([964cd78](https://github.com/Monadical-SAS/reflector/commit/964cd78bb699d83d012ae4b8c96565df25b90a5d))
 ### Bug Fixes
 * automatically reprocess daily recordings ([#797](https://github.com/Monadical-SAS/reflector/issues/797)) ([5f458aa](https://github.com/Monadical-SAS/reflector/commit/5f458aa4a7ec3d00ca5ec49d62fcc8ad232b138e))
 * daily video optimisation ([#789](https://github.com/Monadical-SAS/reflector/issues/789)) ([16284e1](https://github.com/Monadical-SAS/reflector/commit/16284e1ac3faede2b74f0d91b50c0b5612af2c35))
 * main menu login ([#800](https://github.com/Monadical-SAS/reflector/issues/800)) ([0bc971b](https://github.com/Monadical-SAS/reflector/commit/0bc971ba966a52d719c8c240b47dc7b3bdea4391))
 * retry on workflow timeout ([#798](https://github.com/Monadical-SAS/reflector/issues/798)) ([5f7dfad](https://github.com/Monadical-SAS/reflector/commit/5f7dfadabd3e8017406ad3720ba495a59963ee34))
 ## [0.23.2](https://github.com/Monadical-SAS/reflector/compare/v0.23.1...v0.23.2) (2025-12-11)
 ### Bug Fixes
 * build on push tags ([#785](https://github.com/Monadical-SAS/reflector/issues/785)) ([d7f140b](https://github.com/Monadical-SAS/reflector/commit/d7f140b7d1f4660d5da7a0da1357f68869e0b5cd))
 ## [0.23.1](https://github.com/Monadical-SAS/reflector/compare/v0.23.0...v0.23.1) (2025-12-11)
 ### Bug Fixes
 * populate room_name in transcript GET endpoint ([#783](https://github.com/Monadical-SAS/reflector/issues/783)) ([0eba147](https://github.com/Monadical-SAS/reflector/commit/0eba1470181c7b9e0a79964a1ef28c09bcbdd9d7))
 ## [0.23.0](https://github.com/Monadical-SAS/reflector/compare/v0.22.4...v0.23.0) (2025-12-10)
 ### Features
 * dockerhub ci ([#772](https://github.com/Monadical-SAS/reflector/issues/772)) ([00549f1](https://github.com/Monadical-SAS/reflector/commit/00549f153ade922cf4cb6c5358a7d11a39c426d2))
 * llm retries ([#739](https://github.com/Monadical-SAS/reflector/issues/739)) ([61f0e29](https://github.com/Monadical-SAS/reflector/commit/61f0e29d4c51eab54ee67af92141fbb171e8ccaa))
 ### Bug Fixes
 * celery inspect bug sidestep in restart script ([#766](https://github.com/Monadical-SAS/reflector/issues/766)) ([ec17ed7](https://github.com/Monadical-SAS/reflector/commit/ec17ed7b587cf6ee143646baaee67a7c017044d4))
 * deploy frontend to coolify ([#779](https://github.com/Monadical-SAS/reflector/issues/779)) ([91650ec](https://github.com/Monadical-SAS/reflector/commit/91650ec65f65713faa7ee0dcfb75af427b7c4ba0))
 * hide rooms settings instead of disabling ([#763](https://github.com/Monadical-SAS/reflector/issues/763)) ([3ad78be](https://github.com/Monadical-SAS/reflector/commit/3ad78be7628c0d029296b301a0e87236c76b7598))
 * return participant emails from transcript endpoint ([#769](https://github.com/Monadical-SAS/reflector/issues/769)) ([d3a5cd1](https://github.com/Monadical-SAS/reflector/commit/d3a5cd12d2d0d9c32af2d5bd9322e030ef69b85d))
 ## [0.22.4](https://github.com/Monadical-SAS/reflector/compare/v0.22.3...v0.22.4) (2025-12-02)
 ### Bug Fixes
 * Multitrack mixdown optimisation 2 ([#764](https://github.com/Monadical-SAS/reflector/issues/764)) ([bd5df1c](https://github.com/Monadical-SAS/reflector/commit/bd5df1ce2ebf35d7f3413b295e56937a9a28ef7b))
 ## [0.22.3](https://github.com/Monadical-SAS/reflector/compare/v0.22.2...v0.22.3) (2025-12-02)
 ### Bug Fixes
 * align daily room settings ([#759](https://github.com/Monadical-SAS/reflector/issues/759)) ([28f87c0](https://github.com/Monadical-SAS/reflector/commit/28f87c09dc459846873d0dde65b03e3d7b2b9399))
 ## [0.22.2](https://github.com/Monadical-SAS/reflector/compare/v0.22.1...v0.22.2) (2025-12-02)
 ### Bug Fixes
 * daily auto refresh fix ([#755](https://github.com/Monadical-SAS/reflector/issues/755)) ([fe47c46](https://github.com/Monadical-SAS/reflector/commit/fe47c46489c5aa0cc538109f7559cc9accb35c01))
 * Skip mixdown for multitrack ([#760](https://github.com/Monadical-SAS/reflector/issues/760)) ([b51b7aa](https://github.com/Monadical-SAS/reflector/commit/b51b7aa9176c1a53ba57ad99f5e976c804a1e80c))
 ## [0.22.1](https://github.com/Monadical-SAS/reflector/compare/v0.22.0...v0.22.1) (2025-11-27)
 ### Bug Fixes
 * participants update from daily ([#749](https://github.com/Monadical-SAS/reflector/issues/749)) ([7f0b728](https://github.com/Monadical-SAS/reflector/commit/7f0b728991c1b9f9aae702c96297eae63b561ef5))
 ## [0.22.0](https://github.com/Monadical-SAS/reflector/compare/v0.21.0...v0.22.0) (2025-11-26)
 ### Features
 * Multitrack segmentation ([#747](https://github.com/Monadical-SAS/reflector/issues/747)) ([d63040e](https://github.com/Monadical-SAS/reflector/commit/d63040e2fdc07e7b272e85a39eb2411cd6a14798))
 ## [0.21.0](https://github.com/Monadical-SAS/reflector/compare/v0.20.0...v0.21.0) (2025-11-26)
 ### Features
 * add transcript format parameter to GET endpoint ([#709](https://github.com/Monadical-SAS/reflector/issues/709)) ([f6ca075](https://github.com/Monadical-SAS/reflector/commit/f6ca07505f34483b02270a2ef3bd809e9d2e1045))
 ## [0.20.0](https://github.com/Monadical-SAS/reflector/compare/v0.19.0...v0.20.0) (2025-11-25)
 ### Features
 * link transcript participants ([#737](https://github.com/Monadical-SAS/reflector/issues/737)) ([9bec398](https://github.com/Monadical-SAS/reflector/commit/9bec39808fc6322612d8b87e922a6f7901fc01c1))
 * transcript restart script ([#742](https://github.com/Monadical-SAS/reflector/issues/742)) ([86d5e26](https://github.com/Monadical-SAS/reflector/commit/86d5e26224bb55a0f1cc785aeda52065bb92ee6f))
 ## [0.19.0](https://github.com/Monadical-SAS/reflector/compare/v0.18.0...v0.19.0) (2025-11-25)
 ### Features
 * dailyco api module ([#725](https://github.com/Monadical-SAS/reflector/issues/725)) ([4287f8b](https://github.com/Monadical-SAS/reflector/commit/4287f8b8aeee60e51db7539f4dcbda5f6e696bd8))
 * dailyco poll ([#730](https://github.com/Monadical-SAS/reflector/issues/730)) ([8e438ca](https://github.com/Monadical-SAS/reflector/commit/8e438ca285152bd48fdc42767e706fb448d3525c))
 * multitrack cli ([#735](https://github.com/Monadical-SAS/reflector/issues/735)) ([11731c9](https://github.com/Monadical-SAS/reflector/commit/11731c9d38439b04e93b1c3afbd7090bad11a11f))
 ### Bug Fixes
 * default platform fix ([#736](https://github.com/Monadical-SAS/reflector/issues/736)) ([c442a62](https://github.com/Monadical-SAS/reflector/commit/c442a627873ca667656eeaefb63e54ab10b8d19e))
 * parakeet vad not getting the end timestamp ([#728](https://github.com/Monadical-SAS/reflector/issues/728)) ([18ed713](https://github.com/Monadical-SAS/reflector/commit/18ed7133693653ef4ddac6c659a8c14b320d1657))
 * start raw tracks recording ([#729](https://github.com/Monadical-SAS/reflector/issues/729)) ([3e47c2c](https://github.com/Monadical-SAS/reflector/commit/3e47c2c0573504858e0d2e1798b6ed31f16b4a5d))
 ## [0.18.0](https://github.com/Monadical-SAS/reflector/compare/v0.17.0...v0.18.0) (2025-11-14)
 ### Features
 * daily QOL: participants dictionary ([#721](https://github.com/Monadical-SAS/reflector/issues/721)) ([b20cad7](https://github.com/Monadical-SAS/reflector/commit/b20cad76e69fb6a76405af299a005f1ddcf60eae))
 ### Bug Fixes
 * add proccessing page to file upload and reprocessing ([#650](https://github.com/Monadical-SAS/reflector/issues/650)) ([28a7258](https://github.com/Monadical-SAS/reflector/commit/28a7258e45317b78e60e6397be2bc503647eaace))
 * copy transcript ([#674](https://github.com/Monadical-SAS/reflector/issues/674)) ([a9a4f32](https://github.com/Monadical-SAS/reflector/commit/a9a4f32324f66c838e081eee42bb9502f38c1db1))
 ## [0.17.0](https://github.com/Monadical-SAS/reflector/compare/v0.16.0...v0.17.0) (2025-11-13)
 ### Features
 * add API key management UI ([#716](https://github.com/Monadical-SAS/reflector/issues/716)) ([372202b](https://github.com/Monadical-SAS/reflector/commit/372202b0e1a86823900b0aa77be1bfbc2893d8a1))
 * daily.co support as alternative to whereby ([#691](https://github.com/Monadical-SAS/reflector/issues/691)) ([1473fd8](https://github.com/Monadical-SAS/reflector/commit/1473fd82dc472c394cbaa2987212ad662a74bcac))
 ## [0.16.0](https://github.com/Monadical-SAS/reflector/compare/v0.15.0...v0.16.0) (2025-10-24)
 ### Features
 * search date filter ([#710](https://github.com/Monadical-SAS/reflector/issues/710)) ([962c40e](https://github.com/Monadical-SAS/reflector/commit/962c40e2b6428ac42fd10aea926782d7a6f3f902))
 ## [0.15.0](https://github.com/Monadical-SAS/reflector/compare/v0.14.0...v0.15.0) (2025-10-20)
 ### Features
 * api tokens  ([#705](https://github.com/Monadical-SAS/reflector/issues/705)) ([9a258ab](https://github.com/Monadical-SAS/reflector/commit/9a258abc0209b0ac3799532a507ea6a9125d703a))
 ## [0.14.0](https://github.com/Monadical-SAS/reflector/compare/v0.13.1...v0.14.0) (2025-10-08)
--- a/CODER_BRIEFING.md
+++ b/CODER_BRIEFING.md
@@ -0,0 +1,345 @@
 # Multi-Provider Video Platform Implementation - Coder Briefing
 ## Your Mission
 Implement multi-provider video platform support in Reflector, allowing the system to work with both Whereby and Daily.co video conferencing providers. The goal is to abstract the current Whereby-only implementation and add Daily.co as a second provider, with the ability to switch between them via environment variables.
 **Branch:** `igor/dailico-2` (you're already on it)
 **Estimated Time:** 12-16 hours (senior engineer)
 **Complexity:** Medium-High (requires careful integration with existing codebase)
 ---
 ## What You Have
 ### 1. **PLAN.md** - Your Technical Specification (2,452 lines)
   - Complete step-by-step implementation guide
   - All code examples you need
   - Architecture diagrams and design rationale
   - Testing strategy and success metrics
   - **Read this first** to understand the overall approach
 ### 2. **IMPLEMENTATION_GUIDE.md** - Your Practical Guide
   - What to copy vs. adapt vs. rewrite
   - Common pitfalls and how to avoid them
   - Verification checklists for each phase
   - Decision trees for implementation choices
   - **Use this as your day-to-day reference**
 ### 3. **Reference Implementation** - `./reflector-dailyco-reference/`
   - Working implementation from 2.5 months ago
   - Good architecture and patterns
   - **BUT:** 91 commits behind current main, DO NOT merge directly
   - Use for inspiration and code patterns only
 ---
 ## Critical Context: Why Not Just Merge?
 The reference branch (`origin/igor/feat-dailyco`) was started on August 1, 2025 and is now severely diverged from main:
 - **91 commits behind main**
 - Main has 12x more changes (45,840 insertions vs 3,689)
 - Main added: calendar integration, webhooks, full-text search, React Query migration, security fixes
 - Reference removed: features that main still has and needs
 **Merging would be a disaster.** We're implementing fresh on current main, using the reference for validated patterns.
 ---
 ## High-Level Approach
 ### Phase 1: Analysis (2 hours)
 - Study current Whereby integration
 - Define abstraction requirements
 - Create standard data models
 ### Phase 2: Abstraction Layer (4-5 hours)
 - Build platform abstraction (base class, registry, factory)
 - Extract Whereby into the abstraction
 - Update database schema (add `platform` field)
 - Integrate into rooms.py **without breaking calendar/webhooks**
 ### Phase 3: Daily.co Implementation (4-5 hours)
 - Implement Daily.co client
 - Add webhook handler
 - Create frontend components (rewrite API calls for React Query)
 - Add recording processing
 ### Phase 4: Testing (2-3 hours)
 - Unit tests for platform abstraction
 - Integration tests for webhooks
 - Manual testing with both providers
 ---
 ## Key Files You'll Touch
 ### Backend (New)
 ```
 server/reflector/video_platforms/
 ├── __init__.py
 ├── base.py              ← Abstract base class
 ├── models.py            ← Platform, MeetingData, VideoPlatformConfig
 ├── registry.py          ← Platform registration system
 ├── factory.py           ← Client creation and config
 ├── whereby.py           ← Whereby client wrapper
 ├── daily.py             ← Daily.co client
 └── mock.py              ← Mock client for testing
 server/reflector/views/daily.py       ← Daily.co webhooks
 server/tests/test_video_platforms.py  ← Platform tests
 server/tests/test_daily_webhook.py    ← Webhook tests
 ```
 ### Backend (Modified - Careful!)
 ```
 server/reflector/settings.py          ← Add Daily.co settings
 server/reflector/db/rooms.py          ← Add platform field, PRESERVE calendar fields
 server/reflector/db/meetings.py       ← Add platform field
 server/reflector/views/rooms.py       ← Integrate abstraction, PRESERVE calendar/webhooks
 server/reflector/worker/process.py    ← Add process_recording_from_url task
 server/reflector/app.py               ← Register daily router
 server/env.example                    ← Document new env vars
 ```
 ### Frontend (New)
 ```
 www/app/[roomName]/components/
 ├── RoomContainer.tsx    ← Platform router
 ├── DailyRoom.tsx        ← Daily.co component (rewrite API calls!)
 └── WherebyRoom.tsx      ← Extract existing logic
 ```
 ### Frontend (Modified)
 ```
 www/app/[roomName]/page.tsx           ← Use RoomContainer
 www/package.json                      ← Add @daily-co/daily-js
 ```
 ### Database
 ```
 server/migrations/versions/XXXXXX_add_platform_support.py  ← Generate fresh migration
 ```
 ---
 ## Critical Warnings ⚠️
 ### 1. **DO NOT Copy Database Migrations**
 The reference migration has the wrong `down_revision` and is based on old schema.
 ```bash
 # Instead:
 cd server
 uv run alembic revision -m "add_platform_support"
 # Then edit the generated file
 ```
 ### 2. **DO NOT Remove Main's Features**
 Main has calendar integration, webhooks, ICS sync that reference doesn't have.
 When modifying `rooms.py`, only change meeting creation logic, preserve everything else.
 ### 3. **DO NOT Copy Frontend API Calls**
 Reference uses old OpenAPI client. Main uses React Query.
 Check how main currently makes API calls and replicate that pattern.
 ### 4. **DO NOT Copy package.json/migrations**
 These files are severely outdated in reference.
 ### 5. **Preserve Type Safety**
 Use `TYPE_CHECKING` imports to avoid circular dependencies:
 ```python
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from reflector.db.rooms import Room
 ```
 ---
 ## How to Start
 ### Day 1 Morning: Setup & Understanding (2-3 hours)
 ```bash
 # 1. Verify you're on the right branch
 git branch
 # Should show: igor/dailico-2
 # 2. Read the docs (in order)
 # - PLAN.md (skim to understand scope, read Phase 1 carefully)
 # - IMPLEMENTATION_GUIDE.md (read fully, bookmark it)
 # 3. Study current Whereby integration
 cat server/reflector/views/rooms.py | grep -A 20 "whereby"
 cat www/app/[roomName]/page.tsx
 # 4. Check reference implementation structure
 ls -la reflector-dailyco-reference/server/reflector/video_platforms/
 ```
 ### Day 1 Afternoon: Phase 1 Execution (2-3 hours)
 ```bash
 # 5. Copy video_platforms directory from reference
 cp -r reflector-dailyco-reference/server/reflector/video_platforms/ \
      server/reflector/
 # 6. Review and fix imports
 cd server
 uv run ruff check reflector/video_platforms/
 # 7. Add settings to settings.py (see PLAN.md Phase 2.7)
 # 8. Test imports work
 uv run python -c "from reflector.video_platforms import create_platform_client; print('OK')"
 ```
 ### Day 2: Phase 2 - Database & Integration (4-5 hours)
 ```bash
 # 9. Generate migration
 uv run alembic revision -m "add_platform_support"
 # Edit the file following PLAN.md Phase 2.8
 # 10. Update Room/Meeting models
 # Add platform field, PRESERVE all existing fields
 # 11. Integrate into rooms.py
 # Carefully modify meeting creation, preserve calendar/webhooks
 # 12. Add Daily.co webhook handler
 cp reflector-dailyco-reference/server/reflector/views/daily.py \
   server/reflector/views/
 # Register in app.py
 ```
 ### Day 3: Phase 3 - Frontend & Testing (4-5 hours)
 ```bash
 # 13. Create frontend components
 mkdir -p www/app/[roomName]/components
 # 14. Add Daily.co dependency
 cd www
 pnpm add @daily-co/daily-js@^0.81.0
 # 15. Create RoomContainer, DailyRoom, WherebyRoom
 # IMPORTANT: Rewrite API calls using React Query patterns
 # 16. Regenerate types
 pnpm openapi
 # 17. Copy and adapt tests
 cp reflector-dailyco-reference/server/tests/test_*.py server/tests/
 # 18. Run tests
 cd server
 REDIS_HOST=localhost \
 CELERY_BROKER_URL=redis://localhost:6379/1 \
 uv run pytest tests/test_video_platforms.py -v
 ```
 ---
 ## Verification Checklist
 After implementation, all of these must pass:
 **Backend:**
 - [ ] `cd server && uv run ruff check .` passes
 - [ ] `uv run alembic upgrade head` works cleanly
 - [ ] `uv run pytest tests/test_video_platforms.py` passes
 - [ ] Can import: `from reflector.video_platforms import create_platform_client`
 - [ ] Settings has all Daily.co variables
 **Frontend:**
 - [ ] `cd www && pnpm lint` passes
 - [ ] No TypeScript errors
 - [ ] `pnpm openapi` generates platform field
 - [ ] No `@ts-ignore` for platform field
 **Integration:**
 - [ ] Whereby meetings still work (existing flow unchanged)
 - [ ] Calendar/webhook features still work in rooms.py
 - [ ] env.example documents all new variables
 ---
 ## When You're Stuck
 ### Check These Resources:
 1. **PLAN.md** - Detailed code examples for your exact scenario
 2. **IMPLEMENTATION_GUIDE.md** - Common pitfalls section
 3. **Reference code** - See how it was solved before
 4. **Git diff** - Compare reference to your implementation
 ### Compare Files:
 ```bash
 # See what reference did
 diff reflector-dailyco-reference/server/reflector/views/rooms.py \
     server/reflector/views/rooms.py
 # See what changed in main since reference branch
 git log --oneline --since="2025-08-01" -- server/reflector/views/rooms.py
 ```
 ### Common Issues:
 - **Circular imports:** Use `TYPE_CHECKING` pattern
 - **Tests fail with postgres error:** Use `REDIS_HOST=localhost` env vars
 - **Frontend API calls broken:** Check current React Query patterns in main
 - **Migrations fail:** Ensure you generated fresh, not copied
 ---
 ## Success Looks Like
 When you're done:
 - ✅ All tests pass
 - ✅ Linting passes
 - ✅ Can create Whereby meetings (unchanged behavior)
 - ✅ Can create Daily.co meetings (with env vars)
 - ✅ Calendar/webhooks still work
 - ✅ Frontend has no TypeScript errors
 - ✅ Platform selection via environment variables works
 ---
 ## Communication
 If you need clarification on requirements, have questions about architecture decisions, or find issues with the spec, document them clearly with:
 - What you expected
 - What you found
 - Your proposed solution
 The PLAN.md document is comprehensive but you may find edge cases. Use your engineering judgment and document decisions.
 ---
 ## Final Notes
 **This is not a simple copy-paste job.** You're doing careful integration work where you need to:
 - Understand the abstraction pattern (PLAN.md)
 - Preserve all of main's features
 - Adapt reference code to current patterns
 - Think about edge cases and testing
 Take your time with Phase 2 (rooms.py integration) - that's where most bugs will come from if you accidentally break calendar/webhook features.
 **Good luck! You've got comprehensive specs, working reference code, and a clean starting point. You can do this.**
 ---
 ## Quick Reference
 ```bash
 # Your workspace
 ├── PLAN.md                        ← Complete technical spec (read first)
 ├── IMPLEMENTATION_GUIDE.md        ← Practical guide (bookmark this)
 ├── CODER_BRIEFING.md             ← This file
 └── reflector-dailyco-reference/   ← Reference implementation (inspiration only)
 # Key commands
 cd server && uv run ruff check .                    # Lint backend
 cd www && pnpm lint                                  # Lint frontend
 cd server && uv run alembic revision -m "..."       # Create migration
 cd www && pnpm openapi                              # Regenerate types
 cd server && uv run pytest -v                       # Run tests
 ```
--- a/IMPLEMENTATION_GUIDE.md
+++ b/IMPLEMENTATION_GUIDE.md
@@ -0,0 +1,489 @@
 # Daily.co Implementation Guide
 ## Overview
 Implement multi-provider video platform support (Whereby + Daily.co) following PLAN.md.
 ## Reference Code Location
 - **Reference branch:** `origin/igor/feat-dailyco` (on remote)
 - **Worktree location:** `./reflector-dailyco-reference/`
 - **Status:** Reference only - DO NOT merge or copy directly
 ## What Exists in Reference Branch (For Inspiration)
 ### ✅ Can Use As Reference (Well-Implemented)
 ```
 server/reflector/video_platforms/
 ├── base.py              ← Platform abstraction (good design, copy-safe)
 ├── models.py            ← Data models (copy-safe)
 ├── registry.py          ← Registry pattern (copy-safe)
 ├── factory.py           ← Factory pattern (needs settings updates)
 ├── whereby.py           ← Whereby client (needs adaptation)
 ├── daily.py             ← Daily.co client (needs adaptation)
 └── mock.py              ← Mock client (copy-safe for tests)
 server/reflector/views/daily.py       ← Webhook handler (needs adaptation)
 server/tests/test_video_platforms.py  ← Tests (good reference)
 server/tests/test_daily_webhook.py    ← Tests (good reference)
 www/app/[roomName]/components/
 ├── RoomContainer.tsx    ← Platform router (needs React Query)
 ├── DailyRoom.tsx        ← Daily component (needs React Query)
 └── WherebyRoom.tsx      ← Whereby extraction (needs React Query)
 ```
 ### ⚠️ Needs Significant Changes (Use Logic Only)
 - `server/reflector/db/rooms.py` - Reference removed calendar/webhook fields that main has
 - `server/reflector/db/meetings.py` - Same issue (missing user_id handling differences)
 - `server/reflector/views/rooms.py` - Main has calendar integration, webhooks, ICS sync
 - `server/reflector/worker/process.py` - Main has different recording flow
 - Migration files - Must regenerate against current main schema
 ### ❌ Do NOT Use (Outdated/Incompatible)
 - `package.json`/`pnpm-lock.yaml` - Main uses different dependency versions
 - Frontend API client calls - Main uses React Query (reference uses old OpenAPI client)
 - Database migrations - Must create new ones from scratch
 - Any files that delete features present in main (search, calendar, webhooks)
 ## Key Differences: Reference vs Current Main
 | Aspect | Reference Branch | Current Main | Action Required |
 |--------|------------------|--------------|-----------------|
 | **API client** | Old OpenAPI generated | React Query hooks | Rewrite all API calls |
 | **Database schema** | Simplified (removed features) | Has calendar, webhooks, full-text search | Merge carefully, preserve main features |
 | **Settings** | Aug 2025 structure | Current structure | Adapt carefully |
 | **Migrations** | Branched from Aug 1 | Current main (91+ commits ahead) | Regenerate from scratch |
 | **Frontend deps** | `@daily-co/daily-js@0.81.0` | Check current versions | Update to compatible versions |
 | **Package manager** | yarn | pnpm (maybe both?) | Use what main uses |
 ## Branch Divergence Analysis
 **The reference branch is 91 commits behind main and severely diverged:**
 - Reference: 8 commits, 3,689 insertions, 425 deletions
 - Main since divergence: 320 files changed, 45,840 insertions, 16,827 deletions
 - **Main has 12x more changes**
 **Major features in main that reference lacks:**
 1. Calendar integration (ICS sync with rooms)
 2. Self-hosted GPU API infrastructure
 3. Frontend OpenAPI React Query migration
 4. Full-text search (backend + frontend)
 5. Webhook system for room events
 6. Environment variable migration
 7. Security fixes and auth improvements
 8. Docker production frontend
 9. Meeting user ID removal (schema change)
 10. NextJS version upgrades
 **High conflict risk files:**
 - `server/reflector/views/rooms.py` - 12x more changes in main
 - `server/reflector/db/rooms.py` - Main added 7+ fields
 - `www/package.json` - NextJS major version bump
 - Database migrations - 20+ new migrations in main
 ## Implementation Approach
 ### Phase 1: Copy Clean Abstractions (1-2 hours)
 **Files to copy directly from reference:**
 ```bash
 # Core abstraction (review but mostly safe to copy)
 cp -r reflector-dailyco-reference/server/reflector/video_platforms/ \
      server/reflector/
 # BUT review each file for:
 # - Import paths (make sure they match current main)
 # - Settings references (adapt to current settings.py)
 # - Type imports (ensure no circular dependencies)
 ```
 **After copying, immediately:**
 ```bash
 cd server
 # Check for issues
 uv run ruff check reflector/video_platforms/
 # Fix any import errors or type issues
 ```
 ### Phase 2: Adapt to Current Main (2-3 hours)
 **2.1 Settings Integration**
 File: `server/reflector/settings.py`
 Add at the appropriate location (near existing Whereby settings):
 ```python
 # Daily.co API Integration (NEW)
 DAILY_API_KEY: str | None = None
 DAILY_WEBHOOK_SECRET: str | None = None
 DAILY_SUBDOMAIN: str | None = None
 AWS_DAILY_S3_BUCKET: str | None = None
 AWS_DAILY_S3_REGION: str = "us-west-2"
 AWS_DAILY_ROLE_ARN: str | None = None
 # Platform Migration Feature Flags (NEW)
 DAILY_MIGRATION_ENABLED: bool = False  # Conservative default
 DAILY_MIGRATION_ROOM_IDS: list[str] = []
 DEFAULT_VIDEO_PLATFORM: Literal["whereby", "daily"] = "whereby"
 ```
 **2.2 Database Migration**
 ⚠️ **CRITICAL: Do NOT copy migration from reference**
 Generate new migration:
 ```bash
 cd server
 uv run alembic revision -m "add_platform_support"
 ```
 Edit the generated migration file to add `platform` column:
 ```python
 def upgrade():
    with op.batch_alter_table("room", schema=None) as batch_op:
        batch_op.add_column(
            sa.Column("platform", sa.String(), nullable=False, server_default="whereby")
        )
    with op.batch_alter_table("meeting", schema=None) as batch_op:
        batch_op.add_column(
            sa.Column("platform", sa.String(), nullable=False, server_default="whereby")
        )
 ```
 **2.3 Update Database Models**
 File: `server/reflector/db/rooms.py`
 Add platform field (preserve all existing fields from main):
 ```python
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from reflector.video_platforms.models import Platform
 class Room:
    # ... ALL existing fields from main (calendar, webhooks, etc.) ...
    # NEW: Platform field
    platform: "Platform" = sqlalchemy.Column(
        sqlalchemy.String,
        nullable=False,
        server_default="whereby",
    )
 ```
 File: `server/reflector/db/meetings.py`
 Same approach - add platform field, preserve everything from main.
 **2.4 Integrate Platform Abstraction into rooms.py**
 ⚠️ **This is the most delicate part - main has calendar/webhook features**
 File: `server/reflector/views/rooms.py`
 Strategy:
 1. Add imports at top
 2. Modify meeting creation logic only
 3. Preserve all calendar/webhook/ICS logic from main
 ```python
 # Add imports
 from reflector.video_platforms import (
    create_platform_client,
    get_platform_for_room,
 )
 # In create_meeting endpoint:
 # OLD: Direct Whereby API calls
 # NEW: Platform abstraction
 # Find the meeting creation section and replace:
 platform = get_platform_for_room(room.id)
 client = create_platform_client(platform)
 meeting_data = await client.create_meeting(
    room_name_prefix=room.name,
    end_date=meeting_data.end_date,
    room=room,
 )
 # Then create Meeting record with meeting_data.platform, meeting_data.meeting_id, etc.
 ```
 **2.5 Add Daily.co Webhook Handler**
 Copy from reference, minimal changes needed:
 ```bash
 cp reflector-dailyco-reference/server/reflector/views/daily.py \
   server/reflector/views/
 ```
 Register in `server/reflector/app.py`:
 ```python
 from reflector.views import daily
 app.include_router(daily.router, prefix="/v1/daily", tags=["daily"])
 ```
 **2.6 Add Recording Processing Task**
 File: `server/reflector/worker/process.py`
 Add the `process_recording_from_url` task from reference (copy the function).
 ### Phase 3: Frontend Adaptation (3-4 hours)
 **3.1 Determine Current API Client Pattern**
 First, check how main currently makes API calls:
 ```bash
 cd www
 grep -r "api\." app/ | head -20
 # Look for patterns like: api.v1Something()
 ```
 **3.2 Create Components**
 Copy component structure from reference but **rewrite all API calls**:
 ```bash
 mkdir -p www/app/[roomName]/components
 ```
 Files to create:
 - `RoomContainer.tsx` - Platform router (mostly copy-safe, just fix imports)
 - `DailyRoom.tsx` - Needs React Query API calls
 - `WherebyRoom.tsx` - Extract current room page logic
 **Example React Query pattern** (adapt to your actual API):
 ```typescript
 import { api } from '@/app/api/client'
 // In DailyRoom.tsx
 const handleConsent = async () => {
  try {
    await api.v1MeetingAudioConsent({
      path: { meeting_id: meeting.id },
      body: { consent: true },
    })
    // ...
  } catch (error) {
    // ...
  }
 }
 ```
 **3.3 Add Daily.co Dependency**
 Check current package manager:
 ```bash
 cd www
 ls package-lock.json yarn.lock pnpm-lock.yaml
 ```
 Then install:
 ```bash
 # If using pnpm
 pnpm add @daily-co/daily-js@^0.81.0
 # If using yarn
 yarn add @daily-co/daily-js@^0.81.0
 ```
 **3.4 Update TypeScript Types**
 After backend changes, regenerate types:
 ```bash
 cd www
 pnpm openapi  # or yarn openapi
 ```
 This should pick up the new `platform` field on Meeting type.
 ### Phase 4: Testing (2-3 hours)
 **4.1 Copy Test Structure**
 ```bash
 cp reflector-dailyco-reference/server/tests/test_video_platforms.py \
   server/tests/
 cp reflector-dailyco-reference/server/tests/test_daily_webhook.py \
   server/tests/
 ```
 **4.2 Fix Test Imports and Fixtures**
 Update imports to match current test infrastructure:
 - Check `server/tests/conftest.py` for fixture patterns
 - Update database access patterns if changed
 - Fix any import errors
 **4.3 Run Tests**
 ```bash
 cd server
 # Run with environment variables for Mac
 REDIS_HOST=localhost \
 CELERY_BROKER_URL=redis://localhost:6379/1 \
 CELERY_RESULT_BACKEND=redis://localhost:6379/1 \
 uv run pytest tests/test_video_platforms.py -v
 ```
 ### Phase 5: Environment Configuration
 **Update `server/env.example`:**
 Add at the end:
 ```bash
 # Daily.co API Integration
 DAILY_API_KEY=your-daily-api-key
 DAILY_WEBHOOK_SECRET=your-daily-webhook-secret
 DAILY_SUBDOMAIN=your-subdomain
 AWS_DAILY_S3_BUCKET=your-daily-bucket
 AWS_DAILY_S3_REGION=us-west-2
 AWS_DAILY_ROLE_ARN=arn:aws:iam::ACCOUNT:role/DailyRecording
 # Platform Selection
 DAILY_MIGRATION_ENABLED=false           # Master switch
 DAILY_MIGRATION_ROOM_IDS=[]            # Specific room IDs
 DEFAULT_VIDEO_PLATFORM=whereby          # Default platform
 ```
 ## Decision Tree: Copy vs Adapt vs Rewrite
 ```
 ┌─ Is it pure abstraction logic? (base.py, registry.py, models.py)
 │  YES → Copy directly, review imports
 │  NO  → Continue ↓
 │
 ├─ Does it touch database models?
 │  YES → Adapt carefully, preserve main's fields
 │  NO  → Continue ↓
 │
 ├─ Does it make API calls on frontend?
 │  YES → Rewrite using React Query
 │  NO  → Continue ↓
 │
 ├─ Is it a database migration?
 │  YES → Generate fresh from current schema
 │  NO  → Continue ↓
 │
 └─ Does it touch rooms.py or core business logic?
   YES → Merge carefully, preserve calendar/webhooks
   NO  → Safe to adapt from reference
 ```
 ## Verification Checklist
 After each phase, verify:
 **Phase 1 (Abstraction Layer):**
 - [ ] `uv run ruff check server/reflector/video_platforms/` passes
 - [ ] No circular import errors
 - [ ] Can import `from reflector.video_platforms import create_platform_client`
 **Phase 2 (Backend Integration):**
 - [ ] `uv run ruff check server/` passes
 - [ ] Migration file generated (not copied)
 - [ ] Room and Meeting models have platform field
 - [ ] rooms.py still has calendar/webhook features
 **Phase 3 (Frontend):**
 - [ ] `pnpm lint` passes
 - [ ] No TypeScript errors
 - [ ] No `@ts-ignore` for platform field
 - [ ] API calls use React Query patterns
 **Phase 4 (Testing):**
 - [ ] Tests can be collected: `pytest tests/test_video_platforms.py --collect-only`
 - [ ] Database fixtures work
 - [ ] Mock platform works
 **Phase 5 (Config):**
 - [ ] env.example has Daily.co variables
 - [ ] settings.py has all new variables
 - [ ] No duplicate variable definitions
 ## Common Pitfalls
 ### 1. Database Schema Conflicts
 **Problem:** Reference removed fields that main has (calendar, webhooks)
 **Solution:** Always preserve main's fields, only add platform field
 ### 2. Migration Conflicts
 **Problem:** Reference migration has wrong `down_revision`
 **Solution:** Always generate fresh migration from current main
 ### 3. Frontend API Calls
 **Problem:** Reference uses old API client patterns
 **Solution:** Check current main's API usage, replicate that pattern
 ### 4. Import Errors
 **Problem:** Circular imports with TYPE_CHECKING
 **Solution:** Use `if TYPE_CHECKING:` for Room/Meeting imports in video_platforms
 ### 5. Test Database Issues
 **Problem:** Tests fail with "could not translate host name 'postgres'"
 **Solution:** Use environment variables: `REDIS_HOST=localhost DATABASE_URL=...`
 ### 6. Preserved Features Broken
 **Problem:** Calendar/webhook features stop working
 **Solution:** Carefully review rooms.py diff, only change meeting creation, not calendar logic
 ## File Modification Summary
 **New files (can copy):**
 - `server/reflector/video_platforms/*.py` (entire directory)
 - `server/reflector/views/daily.py`
 - `server/tests/test_video_platforms.py`
 - `server/tests/test_daily_webhook.py`
 - `www/app/[roomName]/components/RoomContainer.tsx`
 - `www/app/[roomName]/components/DailyRoom.tsx`
 - `www/app/[roomName]/components/WherebyRoom.tsx`
 **Modified files (careful merging):**
 - `server/reflector/settings.py` - Add Daily.co settings
 - `server/reflector/db/rooms.py` - Add platform field
 - `server/reflector/db/meetings.py` - Add platform field
 - `server/reflector/views/rooms.py` - Integrate platform abstraction
 - `server/reflector/worker/process.py` - Add process_recording_from_url
 - `server/reflector/app.py` - Register daily router
 - `server/env.example` - Add Daily.co variables
 - `www/app/[roomName]/page.tsx` - Use RoomContainer
 - `www/package.json` - Add @daily-co/daily-js
 **Generated files (do not copy):**
 - `server/migrations/versions/XXXXXX_add_platform_support.py` - Generate fresh
 ## Success Metrics
 Implementation is complete when:
 - [ ] All tests pass (including new platform tests)
 - [ ] Linting passes (ruff, pnpm lint)
 - [ ] Migration applies cleanly: `uv run alembic upgrade head`
 - [ ] Can create Whereby meeting (existing flow unchanged)
 - [ ] Can create Daily.co meeting (with env vars set)
 - [ ] Frontend loads without TypeScript errors
 - [ ] No features from main were accidentally removed
 ## Getting Help
 **Reference documentation locations:**
 - Implementation plan: `PLAN.md`
 - Reference implementation: `./reflector-dailyco-reference/`
 - Current main codebase: `./ ` (current directory)
 **Compare implementations:**
 ```bash
 # Compare specific files
 diff reflector-dailyco-reference/server/reflector/video_platforms/base.py \
     server/reflector/video_platforms/base.py
 # See what changed in rooms.py between reference branch point and now
 git log --oneline --since="2025-08-01" -- server/reflector/views/rooms.py
 ```
 **Key insight:** The reference branch validates the approach and provides working code patterns, but you're implementing fresh against current main to avoid merge conflicts and preserve all new features.
--- a/PLAN.md
+++ b/PLAN.md
--- a/README.md
+++ b/README.md
@@ -168,12 +168,6 @@ You can manually process an audio file by calling the process tool:
 uv run python -m reflector.tools.process path/to/audio.wav
 ```
 ## Reprocessing any transcription
 ```bash
 uv run -m reflector.tools.process_transcript 81ec38d1-9dd7-43d2-b3f8-51f4d34a07cd --sync
 ```
 ## Build-time env variables
 Next.js projects are more used to NEXT_PUBLIC_ prefixed buildtime vars. We don't have those for the reason we need to serve a ccustomizable prebuild docker container.
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -3,8 +3,10 @@
 services:
  web:
-    image: monadicalsas/reflector-frontend:latest
+    build:
-    pull_policy: always
+      context: ./www
      dockerfile: Dockerfile
    image: reflector-frontend:latest
    environment:
      - KV_URL=${KV_URL:-redis://redis:6379}
      - SITE_URL=${SITE_URL}
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -34,20 +34,6 @@ services:
    environment:
      ENTRYPOINT: beat
  hatchet-worker:
    build:
      context: server
    volumes:
      - ./server/:/app/
      - /app/.venv
    env_file:
      - ./server/.env
    environment:
      ENTRYPOINT: hatchet-worker
    depends_on:
      hatchet:
        condition: service_healthy
  redis:
    image: redis:7.2
    ports:
@@ -69,7 +55,6 @@ services:
  postgres:
    image: postgres:17
    command: postgres -c 'max_connections=200'
    ports:
      - 5432:5432
    environment:
@@ -78,42 +63,6 @@ services:
      POSTGRES_DB: reflector
    volumes:
      - ./data/postgres:/var/lib/postgresql/data
      - ./server/docker/init-hatchet-db.sql:/docker-entrypoint-initdb.d/init-hatchet-db.sql:ro
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -d reflector -U reflector"]
      interval: 10s
      timeout: 10s
      retries: 5
      start_period: 10s
  hatchet:
    image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
    ports:
      - "8889:8888"
      - "7078:7077"
    depends_on:
      postgres:
        condition: service_healthy
    environment:
      DATABASE_URL: "postgresql://reflector:reflector@postgres:5432/hatchet?sslmode=disable"
      SERVER_AUTH_COOKIE_DOMAIN: localhost
      SERVER_AUTH_COOKIE_INSECURE: "t"
      SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
      SERVER_GRPC_INSECURE: "t"
      SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
      SERVER_GRPC_PORT: "7077"
      SERVER_URL: http://localhost:8889
      SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
      # SERVER_DEFAULT_ENGINE_VERSION: "V1"  # default
      SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
    volumes:
      - ./data/hatchet-config:/config
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 30s
 networks:
  default:
--- a/docs/transcript.md
+++ b/docs/transcript.md
@@ -1,241 +0,0 @@
 # Transcript Formats
 The Reflector API provides multiple output formats for transcript data through the `transcript_format` query parameter on the GET `/v1/transcripts/{id}` endpoint.
 ## Overview
 When retrieving a transcript, you can specify the desired format using the `transcript_format` query parameter. The API supports four formats optimized for different use cases:
 - **text** - Plain text with speaker names (default)
 - **text-timestamped** - Timestamped text with speaker names
 - **webvtt-named** - WebVTT subtitle format with participant names
 - **json** - Structured JSON segments with full metadata
 All formats include participant information when available, resolving speaker IDs to actual names.
 ## Query Parameter Usage
 ```
 GET /v1/transcripts/{id}?transcript_format={format}
 ```
 ### Parameters
 - `transcript_format` (optional): The desired output format
  - Type: `"text" | "text-timestamped" | "webvtt-named" | "json"`
  - Default: `"text"`
 ## Format Descriptions
 ### Text Format (`text`)
 **Use case:** Simple, human-readable transcript for display or export.
 **Format:** Speaker names followed by their dialogue, one line per segment.
 **Example:**
 ```
 John Smith: Hello everyone
 Jane Doe: Hi there
 John Smith: How are you today?
 ```
 **Request:**
 ```bash
 GET /v1/transcripts/{id}?transcript_format=text
 ```
 **Response:**
 ```json
 {
  "id": "transcript_123",
  "name": "Meeting Recording",
  "transcript_format": "text",
  "transcript": "John Smith: Hello everyone\nJane Doe: Hi there\nJohn Smith: How are you today?",
  "participants": [
    {"id": "p1", "speaker": 0, "name": "John Smith"},
    {"id": "p2", "speaker": 1, "name": "Jane Doe"}
  ],
  ...
 }
 ```
 ### Text Timestamped Format (`text-timestamped`)
 **Use case:** Transcript with timing information for navigation or reference.
 **Format:** `[MM:SS]` timestamp prefix before each speaker and dialogue.
 **Example:**
 ```
 [00:00] John Smith: Hello everyone
 [00:05] Jane Doe: Hi there
 [00:12] John Smith: How are you today?
 ```
 **Request:**
 ```bash
 GET /v1/transcripts/{id}?transcript_format=text-timestamped
 ```
 **Response:**
 ```json
 {
  "id": "transcript_123",
  "name": "Meeting Recording",
  "transcript_format": "text-timestamped",
  "transcript": "[00:00] John Smith: Hello everyone\n[00:05] Jane Doe: Hi there\n[00:12] John Smith: How are you today?",
  "participants": [
    {"id": "p1", "speaker": 0, "name": "John Smith"},
    {"id": "p2", "speaker": 1, "name": "Jane Doe"}
  ],
  ...
 }
 ```
 ### WebVTT Named Format (`webvtt-named`)
 **Use case:** Subtitle files for video players, accessibility tools, or video editing.
 **Format:** Standard WebVTT subtitle format with voice tags using participant names.
 **Example:**
 ```
 WEBVTT
 00:00:00.000 --> 00:00:05.000
 <v John Smith>Hello everyone
 00:00:05.000 --> 00:00:12.000
 <v Jane Doe>Hi there
 00:00:12.000 --> 00:00:18.000
 <v John Smith>How are you today?
 ```
 **Request:**
 ```bash
 GET /v1/transcripts/{id}?transcript_format=webvtt-named
 ```
 **Response:**
 ```json
 {
  "id": "transcript_123",
  "name": "Meeting Recording",
  "transcript_format": "webvtt-named",
  "transcript": "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\n<v John Smith>Hello everyone\n\n...",
  "participants": [
    {"id": "p1", "speaker": 0, "name": "John Smith"},
    {"id": "p2", "speaker": 1, "name": "Jane Doe"}
  ],
  ...
 }
 ```
 ### JSON Format (`json`)
 **Use case:** Programmatic access with full timing and speaker metadata.
 **Format:** Array of segment objects with speaker information, text content, and precise timing.
 **Example:**
 ```json
 [
  {
    "speaker": 0,
    "speaker_name": "John Smith",
    "text": "Hello everyone",
    "start": 0.0,
    "end": 5.0
  },
  {
    "speaker": 1,
    "speaker_name": "Jane Doe",
    "text": "Hi there",
    "start": 5.0,
    "end": 12.0
  },
  {
    "speaker": 0,
    "speaker_name": "John Smith",
    "text": "How are you today?",
    "start": 12.0,
    "end": 18.0
  }
 ]
 ```
 **Request:**
 ```bash
 GET /v1/transcripts/{id}?transcript_format=json
 ```
 **Response:**
 ```json
 {
  "id": "transcript_123",
  "name": "Meeting Recording",
  "transcript_format": "json",
  "transcript": [
    {
      "speaker": 0,
      "speaker_name": "John Smith",
      "text": "Hello everyone",
      "start": 0.0,
      "end": 5.0
    },
    {
      "speaker": 1,
      "speaker_name": "Jane Doe",
      "text": "Hi there",
      "start": 5.0,
      "end": 12.0
    }
  ],
  "participants": [
    {"id": "p1", "speaker": 0, "name": "John Smith"},
    {"id": "p2", "speaker": 1, "name": "Jane Doe"}
  ],
  ...
 }
 ```
 ## Response Structure
 All formats return the same base transcript metadata with an additional `transcript_format` field and format-specific `transcript` field:
 ### Common Fields
 - `id`: Transcript identifier
 - `user_id`: Owner user ID (if authenticated)
 - `name`: Transcript name
 - `status`: Processing status
 - `locked`: Whether transcript is locked for editing
 - `duration`: Total duration in seconds
 - `title`: Auto-generated or custom title
 - `short_summary`: Brief summary
 - `long_summary`: Detailed summary
 - `created_at`: Creation timestamp
 - `share_mode`: Access control setting
 - `source_language`: Original audio language
 - `target_language`: Translation target language
 - `reviewed`: Whether transcript has been reviewed
 - `meeting_id`: Associated meeting ID (if applicable)
 - `source_kind`: Source type (live, file, room)
 - `room_id`: Associated room ID (if applicable)
 - `audio_deleted`: Whether audio has been deleted
 - `participants`: Array of participant objects with speaker mappings
 ### Format-Specific Fields
 - `transcript_format`: The format identifier (discriminator field)
 - `transcript`: The formatted transcript content (string for text/webvtt formats, array for json format)
 ## Speaker Name Resolution
 All formats resolve speaker IDs to participant names when available:
 - If a participant exists for the speaker ID, their name is used
 - If no participant exists, a default name like "Speaker 0" is generated
 - Speaker IDs are integers (0, 1, 2, etc.) assigned during diarization
--- a/gpu/modal_deployments/reflector_transcriber_parakeet.py
+++ b/gpu/modal_deployments/reflector_transcriber_parakeet.py
@@ -81,9 +81,9 @@ image = (
        "cuda-python==12.8.0",
        "fastapi==0.115.12",
        "numpy<2",
-        "librosa==0.11.0",
+        "librosa==0.10.1",
        "requests",
-        "silero-vad==6.2.0",
+        "silero-vad==5.1.0",
        "torch",
    )
    .entrypoint([])  # silence chatty logs by container on start
@@ -306,7 +306,6 @@ class TranscriberParakeetFile:
        ) -> Generator[TimeSegment, None, None]:
            """Generate speech segments using VAD with start/end sample indices"""
            vad_iterator = VADIterator(self.vad_model, sampling_rate=SAMPLERATE)
            audio_duration = len(audio_array) / float(SAMPLERATE)
            window_size = VAD_CONFIG["window_size"]
            start = None
@@ -333,10 +332,6 @@ class TranscriberParakeetFile:
                    yield TimeSegment(start_time, end_time)
                    start = None
            if start is not None:
                start_time = start / float(SAMPLERATE)
                yield TimeSegment(start_time, audio_duration)
            vad_iterator.reset_states()
        def batch_speech_segments(
--- a/server/DAILYCO_TEST.md
+++ b/server/DAILYCO_TEST.md
@@ -0,0 +1,613 @@
 # Daily.co Integration Test Plan
 ## ✅ IMPLEMENTATION STATUS: Real Transcription Active
 **This test validates Daily.co multitrack recording integration with REAL transcription/diarization.**
 The implementation includes complete audio processing pipeline:
 - **Multitrack recordings** from Daily.co S3 (separate audio stream per participant)
 - **PyAV-based audio mixdown** with PTS-based track alignment
 - **Real transcription** via Modal GPU backend (Whisper)
 - **Real diarization** via Modal GPU backend (speaker identification)
 - **Per-track transcription** with timestamp synchronization
 - **Complete database entities** (recording, transcript, topics, participants, words)
 **Processing pipeline** (`PipelineMainMultitrack`):
 1. Download all audio tracks from Daily.co S3
 2. Align tracks by PTS (presentation timestamp) to handle late joiners
 3. Mix tracks into single audio file for unified playback
 4. Transcribe each track individually with proper offset handling
 5. Perform diarization on mixed audio
 6. Generate topics, summaries, and word-level timestamps
 7. Convert audio to MP3 and generate waveform visualization
 **Note:** A stub processor (`process_daily_recording`) exists for testing webhook flow without GPU costs, but the production code path uses `process_multitrack_recording` with full ML pipeline.
 ---
 ## Prerequisites
 **1. Environment Variables** (check in `.env.development.local`):
 ```bash
 # Daily.co API Configuration
 DAILY_API_KEY=<key>
 DAILY_SUBDOMAIN=monadical
 DAILY_WEBHOOK_SECRET=<base64-encoded-secret>
 AWS_DAILY_S3_BUCKET=reflector-dailyco-local
 AWS_DAILY_S3_REGION=us-east-1
 AWS_DAILY_ROLE_ARN=arn:aws:iam::950402358378:role/DailyCo
 DAILY_MIGRATION_ENABLED=true
 DAILY_MIGRATION_ROOM_IDS=["552640fd-16f2-4162-9526-8cf40cd2357e"]
 # Transcription/Diarization Backend (Required for real processing)
 DIARIZATION_BACKEND=modal
 DIARIZATION_MODAL_API_KEY=<modal-api-key>
 # TRANSCRIPTION_BACKEND is not explicitly set (uses default/modal)
 ```
 **2. Services Running:**
 ```bash
 docker compose ps  # server, postgres, redis, worker, beat should be UP
 ```
 **IMPORTANT:** Worker and beat services MUST be running for transcription processing:
 ```bash
 docker compose up -d worker beat
 ```
 **3. ngrok Tunnel for Webhooks:**
 ```bash
 # Start ngrok (if not already running)
 ngrok http 1250 --log=stdout > /tmp/ngrok.log 2>&1 &
 # Get public URL
 curl -s http://localhost:4040/api/tunnels | python3 -c "import sys, json; data=json.load(sys.stdin); print(data['tunnels'][0]['public_url'])"
 ```
 **Current ngrok URL:** `https://0503947384a3.ngrok-free.app` (as of last registration)
 **4. Webhook Created:**
 ```bash
 cd server
 uv run python scripts/recreate_daily_webhook.py https://0503947384a3.ngrok-free.app/v1/daily/webhook
 # Verify: "Created webhook <uuid> (state: ACTIVE)"
 ```
 **Current webhook status:** ✅ ACTIVE (webhook ID: dad5ad16-ceca-488e-8fc5-dae8650b51d0)
 ---
 ## Test 1: Database Configuration
 **Check room platform:**
 ```bash
 docker-compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT id, name, platform, recording_type FROM room WHERE name = 'test2';"
 ```
 **Expected:**
 ```
 id: 552640fd-16f2-4162-9526-8cf40cd2357e
 name: test2
 platform: whereby  # DB value (overridden by env var DAILY_MIGRATION_ROOM_IDS)
 recording_type: cloud
 ```
 **Clear old meetings:**
 ```bash
 docker-compose exec -T postgres psql -U reflector -d reflector -c \
  "UPDATE meeting SET is_active = false WHERE room_id = '552640fd-16f2-4162-9526-8cf40cd2357e';"
 ```
 ---
 ## Test 2: Meeting Creation with Auto-Recording
 **Create meeting:**
 ```bash
 curl -s -X POST http://localhost:1250/v1/rooms/test2/meeting \
  -H "Content-Type: application/json" \
  -d '{"allow_duplicated":false}' | python3 -m json.tool
 ```
 **Expected Response:**
 ```json
 {
  "room_name": "test2-YYYYMMDDHHMMSS",  // Includes "test2" prefix!
  "room_url": "https://monadical.daily.co/test2-...?t=<JWT_TOKEN>",  // Has token!
  "platform": "daily",
  "recording_type": "cloud"  // DB value (Whereby-specific)
 }
 ```
 **Decode token to verify auto-recording:**
 ```bash
 # Extract token from room_url, decode JWT payload
 echo "<token>" | python3 -c "
 import sys, json, base64
 token = sys.stdin.read().strip()
 payload = token.split('.')[1] + '=' * (4 - len(token.split('.')[1]) % 4)
 print(json.dumps(json.loads(base64.b64decode(payload)), indent=2))
 "
 ```
 **Expected token payload:**
 ```json
 {
  "r": "test2-YYYYMMDDHHMMSS",  // Room name
  "sr": true,  // start_recording: true ✅
  "d": "...",  // Domain ID
  "iat": 1234567890
 }
 ```
 ---
 ## Test 3: Daily.co API Verification
 **Check room configuration:**
 ```bash
 ROOM_NAME="<from previous step>"
 curl -s -X GET "https://api.daily.co/v1/rooms/$ROOM_NAME" \
  -H "Authorization: Bearer $DAILY_API_KEY" | python3 -m json.tool
 ```
 **Expected config:**
 ```json
 {
  "config": {
    "enable_recording": "raw-tracks",  // ✅
    "recordings_bucket": {
      "bucket_name": "reflector-dailyco-local",
      "bucket_region": "us-east-1",
      "assume_role_arn": "arn:aws:iam::950402358378:role/DailyCo"
    }
  }
 }
 ```
 ---
 ## Test 4: Browser UI Test (Playwright MCP)
 **Using Claude Code MCP tools:**
 **Load room:**
 ```
 Use: mcp__playwright__browser_navigate
 Input: {"url": "http://localhost:3000/test2"}
 Then wait 12 seconds for iframe to load
 ```
 **Verify Daily.co iframe loaded:**
 ```
 Use: mcp__playwright__browser_snapshot
 Expected in snapshot:
 - iframe element with src containing "monadical.daily.co"
 - Daily.co pre-call UI visible
 ```
 **Take screenshot:**
 ```
 Use: mcp__playwright__browser_take_screenshot
 Input: {"filename": "test2-before-join.png"}
 Expected: Daily.co pre-call UI with "Join" button visible
 ```
 **Join meeting:**
 ```
 Note: Daily.co iframe interaction requires clicking inside iframe.
 Use: mcp__playwright__browser_click
 Input: {"element": "Join button in Daily.co iframe", "ref": "<ref-from-snapshot>"}
 Then wait 5 seconds for call to connect
 ```
 **Verify in-call:**
 ```
 Use: mcp__playwright__browser_take_screenshot
 Input: {"filename": "test2-in-call.png"}
 Expected: "Waiting for others to join" or participant video visible
 ```
 **Leave meeting:**
 ```
 Use: mcp__playwright__browser_click
 Input: {"element": "Leave button in Daily.co iframe", "ref": "<ref-from-snapshot>"}
 ```
 ---
 **Alternative: JavaScript snippets (for manual testing):**
 ```javascript
 await page.goto('http://localhost:3000/test2');
 await new Promise(f => setTimeout(f, 12000));  // Wait for load
 // Verify iframe
 const iframes = document.querySelectorAll('iframe');
 // Expected: 1 iframe with src containing "monadical.daily.co"
 // Screenshot
 await page.screenshot({ path: 'test2-before-join.png' });
 // Join
 await page.locator('iframe').contentFrame().getByRole('button', { name: 'Join' }).click();
 await new Promise(f => setTimeout(f, 5000));
 // In-call screenshot
 await page.screenshot({ path: 'test2-in-call.png' });
 // Leave
 await page.locator('iframe').contentFrame().getByRole('button', { name: 'Leave' }).click();
 ```
 ---
 ## Test 5: Webhook Verification
 **Check server logs for webhooks:**
 ```bash
 docker-compose logs --since 15m server 2>&1 | grep -i "participant joined\|recording started"
 ```
 **Expected logs:**
 ```
 [info] Participant joined | meeting_id=... | num_clients=1 | recording_type=cloud | recording_trigger=automatic-2nd-participant
 [info] Recording started | meeting_id=... | recording_id=... | platform=daily
 ```
 **Check Daily.co webhook delivery logs:**
 ```bash
 curl -s -X GET "https://api.daily.co/v1/logs/webhooks?limit=20" \
  -H "Authorization: Bearer $DAILY_API_KEY" | python3 -c "
 import sys, json
 logs = json.load(sys.stdin)
 for log in logs[:10]:
    req = json.loads(log['request'])
    room = req.get('payload', {}).get('room') or req.get('payload', {}).get('room_name', 'N/A')
    print(f\"{req['type']:30s} | room: {room:30s} | status: {log['status']}\")
 "
 ```
 **Expected output:**
 ```
 participant.joined             | room: test2-YYYYMMDDHHMMSS       | status: 200
 recording.started              | room: test2-YYYYMMDDHHMMSS       | status: 200
 participant.left               | room: test2-YYYYMMDDHHMMSS       | status: 200
 recording.ready-to-download    | room: test2-YYYYMMDDHHMMSS       | status: 200
 ```
 **Check database updated:**
 ```bash
 docker-compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT room_name, num_clients FROM meeting WHERE room_name LIKE 'test2-%' ORDER BY end_date DESC LIMIT 1;"
 ```
 **Expected:**
 ```
 room_name: test2-YYYYMMDDHHMMSS
 num_clients: 0  // After participant left
 ```
 ---
 ## Test 6: Recording in S3
 **List recent recordings:**
 ```bash
 curl -s -X GET "https://api.daily.co/v1/recordings" \
  -H "Authorization: Bearer $DAILY_API_KEY" | python3 -c "
 import sys, json
 data = json.load(sys.stdin)
 for rec in data.get('data', [])[:5]:
    if 'test2-' in rec.get('room_name', ''):
        print(f\"Room: {rec['room_name']}\")
        print(f\"Status: {rec['status']}\")
        print(f\"Duration: {rec.get('duration', 0)}s\")
        print(f\"S3 key: {rec.get('s3key', 'N/A')}\")
        print(f\"Tracks: {len(rec.get('tracks', []))} files\")
        for track in rec.get('tracks', []):
            print(f\"  - {track['type']}: {track['s3Key'].split('/')[-1]} ({track['size']} bytes)\")
        print()
 "
 ```
 **Expected output:**
 ```
 Room: test2-20251009192341
 Status: finished
 Duration: ~30-120s
 S3 key: monadical/test2-20251009192341/1760037914930
 Tracks: 2 files
  - audio: 1760037914930-<uuid>-cam-audio-1760037915265 (~400 KB)
  - video: 1760037914930-<uuid>-cam-video-1760037915269 (~10-30 MB)
 ```
 **Verify S3 path structure:**
 - `monadical/` - Daily.co subdomain
 - `test2-20251009192341/` - Reflector room name + timestamp
 - `<timestamp>-<participant-uuid>-<media-type>-<track-start>.webm` - Individual track files
 ---
 ## Test 7: Database Check - Recording and Transcript
 **Check recording created:**
 ```bash
 docker-compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT id, bucket_name, object_key, status, meeting_id, recorded_at
   FROM recording
   ORDER BY recorded_at DESC LIMIT 1;"
 ```
 **Expected:**
 ```
 id: <recording-id-from-webhook>
 bucket_name: reflector-dailyco-local
 object_key: monadical/test2-<timestamp>/<recording-timestamp>-<uuid>-cam-audio-<track-start>.webm
 status: completed
 meeting_id: <meeting-id>
 recorded_at: <recent-timestamp>
 ```
 **Check transcript created:**
 ```bash
 docker compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT id, title, status, duration, recording_id, meeting_id, room_id
   FROM transcript
   ORDER BY created_at DESC LIMIT 1;"
 ```
 **Expected (REAL transcription):**
 ```
 id: <transcript-id>
 title: <AI-generated title based on actual conversation content>
 status: uploaded  (audio file processed and available)
 duration: <actual meeting duration in seconds>
 recording_id: <same-as-recording-id-above>
 meeting_id: <meeting-id>
 room_id: 552640fd-16f2-4162-9526-8cf40cd2357e
 ```
 **Note:** Title and content will reflect the ACTUAL conversation, not mock data. Processing time depends on recording length and GPU backend availability (Modal).
 **Verify audio file exists:**
 ```bash
 ls -lh data/<transcript-id>/upload.webm
 ```
 **Expected:**
 ```
 -rw-r--r-- 1 user staff ~100-200K Oct 10 18:48 upload.webm
 ```
 **Check transcript topics (REAL transcription):**
 ```bash
 TRANSCRIPT_ID=$(docker compose exec -T postgres psql -U reflector -d reflector -t -c \
  "SELECT id FROM transcript ORDER BY created_at DESC LIMIT 1;")
 docker compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT
     jsonb_array_length(topics) as num_topics,
     jsonb_array_length(participants) as num_participants,
     short_summary,
     title
   FROM transcript
   WHERE id = '$TRANSCRIPT_ID';"
 ```
 **Expected (REAL data):**
 ```
 num_topics: <varies based on conversation>
 num_participants: <actual number of participants who spoke>
 short_summary: <AI-generated summary of actual conversation>
 title: <AI-generated title based on content>
 ```
 **Check topics contain actual transcription:**
 ```bash
 docker compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT topics->0->'title', topics->0->'summary', topics->0->'transcript'
   FROM transcript
   ORDER BY created_at DESC LIMIT 1;" | head -20
 ```
 **Expected output:** Will contain the ACTUAL transcribed conversation from the Daily.co meeting, not mock data.
 **Check participants:**
 ```bash
 docker compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT participants FROM transcript ORDER BY created_at DESC LIMIT 1;" \
  | python3 -c "import sys, json; data=json.loads(sys.stdin.read()); print(json.dumps(data, indent=2))"
 ```
 **Expected (REAL diarization):**
 ```json
 [
  {
    "id": "<uuid>",
    "speaker": 0,
    "name": "Speaker 1"
  },
  {
    "id": "<uuid>",
    "speaker": 1,
    "name": "Speaker 2"
  }
 ]
 ```
 **Note:** Speaker names will be generic ("Speaker 1", "Speaker 2", etc.) as determined by the diarization backend. Number of participants depends on how many actually spoke during the meeting.
 **Check word-level data:**
 ```bash
 docker compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT jsonb_array_length(topics->0->'words') as num_words_first_topic
   FROM transcript
   ORDER BY created_at DESC LIMIT 1;"
 ```
 **Expected:**
 ```
 num_words_first_topic: <varies based on actual conversation length and topic chunking>
 ```
 **Verify speaker diarization in words:**
 ```bash
 docker compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT
     topics->0->'words'->0->>'text' as first_word,
     topics->0->'words'->0->>'speaker' as speaker,
     topics->0->'words'->0->>'start' as start_time,
     topics->0->'words'->0->>'end' as end_time
   FROM transcript
   ORDER BY created_at DESC LIMIT 1;"
 ```
 **Expected (REAL transcription):**
 ```
 first_word: <actual first word from transcription>
 speaker: 0, 1, 2, ... (actual speaker ID from diarization)
 start_time: <actual timestamp in seconds>
 end_time: <actual end timestamp>
 ```
 **Note:** All timestamps and speaker IDs are from real transcription/diarization, synchronized across tracks.
 ---
 ## Test 8: Recording Type Verification
 **Check what Daily.co received:**
 ```bash
 curl -s -X GET "https://api.daily.co/v1/rooms/test2-<timestamp>" \
  -H "Authorization: Bearer $DAILY_API_KEY" | python3 -m json.tool | grep "enable_recording"
 ```
 **Expected:**
 ```json
 "enable_recording": "raw-tracks"
 ```
 **NOT:** `"enable_recording": "cloud"` (that would be wrong - we want raw tracks)
 ---
 ## Troubleshooting
 ### Issue: No webhooks received
 **Check webhook state:**
 ```bash
 curl -s -X GET "https://api.daily.co/v1/webhooks" \
  -H "Authorization: Bearer $DAILY_API_KEY" | python3 -m json.tool
 ```
 **If state is FAILED:**
 ```bash
 cd server
 uv run python scripts/recreate_daily_webhook.py https://<ngrok-url>/v1/daily/webhook
 ```
 ### Issue: Webhooks return 422
 **Check server logs:**
 ```bash
 docker-compose logs --tail=50 server | grep "Failed to parse webhook event"
 ```
 **Common cause:** Event structure mismatch. Daily.co events use:
 ```json
 {
  "version": "1.0.0",
  "type": "participant.joined",
  "payload": {...},  // NOT "data"
  "event_ts": 123.456  // NOT "ts"
 }
 ```
 ### Issue: Recording not starting
 1. **Check token has `sr: true`:**
   - Decode JWT token from room_url query param
   - Should contain `"sr": true`
 2. **Check Daily.co room config:**
   - `enable_recording` must be set (not false)
   - For raw-tracks: must be exactly `"raw-tracks"`
 3. **Check participant actually joined:**
   - Logs should show "Participant joined"
   - Must click "Join" button, not just pre-call screen
 ### Issue: Recording in S3 but wrong format
 **Daily.co recording types:**
 - `"cloud"` → Single MP4 file (`download_link` in webhook)
 - `"raw-tracks"` → Multiple WebM files (`tracks` array in webhook)
 - `"raw-tracks-audio-only"` → Only audio WebM files
 **Current implementation:** Always uses `"raw-tracks"` (better for transcription)
 ---
 ## Quick Validation Commands
 **One-liner to verify everything:**
 ```bash
 # 1. Check room exists
 docker-compose exec -T postgres psql -U reflector -d reflector -c \
  "SELECT name, platform FROM room WHERE name = 'test2';" && \
 # 2. Create meeting
 MEETING=$(curl -s -X POST http://localhost:1250/v1/rooms/test2/meeting \
  -H "Content-Type: application/json" -d '{"allow_duplicated":false}') && \
 echo "$MEETING" | python3 -c "import sys,json; m=json.load(sys.stdin); print(f'Room: {m[\"room_name\"]}\nURL: {m[\"room_url\"][:80]}...')" && \
 # 3. Check Daily.co config
 ROOM_NAME=$(echo "$MEETING" | python3 -c "import sys,json; print(json.load(sys.stdin)['room_name'])") && \
 curl -s -X GET "https://api.daily.co/v1/rooms/$ROOM_NAME" \
  -H "Authorization: Bearer $DAILY_API_KEY" | python3 -c "import sys,json; print(f'Recording: {json.load(sys.stdin)[\"config\"][\"enable_recording\"]}')"
 ```
 **Expected output:**
 ```
 name: test2, platform: whereby
 Room: test2-20251009192341
 URL: https://monadical.daily.co/test2-20251009192341?t=eyJhbGc...
 Recording: raw-tracks
 ```
 ---
 ## Success Criteria Checklist
 - [x] Room name includes Reflector room prefix (`test2-...`)
 - [x] Meeting URL contains JWT token (`?t=...`)
 - [x] Token has `sr: true` (auto-recording enabled)
 - [x] Daily.co room config: `enable_recording: "raw-tracks"`
 - [x] Browser loads Daily.co interface (not Whereby)
 - [x] Recording auto-starts when participant joins
 - [x] Webhooks received: participant.joined, recording.started, participant.left, recording.ready-to-download
 - [x] Recording status: `finished`
 - [x] S3 contains 2 files: audio (.webm) and video (.webm)
 - [x] S3 path: `monadical/test2-{timestamp}/{recording-start-ts}-{participant-uuid}-cam-{audio|video}-{track-start-ts}`
 - [x] Database `num_clients` increments/decrements correctly
 - [x] **Database recording entry created** with correct S3 path and status `completed`
 - [ ] **Database transcript entry created** with status `uploaded`
 - [ ] **Audio file downloaded** to `data/{transcript_id}/upload.webm`
 - [ ] **Transcript has REAL data**: AI-generated title based on conversation
 - [ ] **Transcript has topics** generated from actual content
 - [ ] **Transcript has participants** with proper speaker diarization
 - [ ] **Topics contain word-level data** with accurate timestamps and speaker IDs
 - [ ] **Total duration** matches actual meeting length
 - [ ] **MP3 and waveform files generated** by file processing pipeline
 - [ ] **Frontend transcript page loads** without "Failed to load audio" error
 - [ ] **Audio player functional** with working playback and waveform visualization
 - [ ] **Multitrack processing completed** without errors in worker logs
 - [ ] **Modal GPU backends accessible** (transcription and diarization)
--- a/server/Dockerfile
+++ b/server/Dockerfile
@@ -6,7 +6,7 @@ ENV PYTHONUNBUFFERED=1 \
 # builder install base dependencies
 WORKDIR /tmp
-RUN apt-get update && apt-get install -y curl && apt-get clean
+RUN apt-get update && apt-get install -y curl ffmpeg && apt-get clean
 ADD https://astral.sh/uv/install.sh /uv-installer.sh
 RUN sh /uv-installer.sh && rm /uv-installer.sh
 ENV PATH="/root/.local/bin/:$PATH"
--- a/server/README.md
+++ b/server/README.md
@@ -1,29 +1,3 @@
 ## API Key Management
 ### Finding Your User ID
 ```bash
 # Get your OAuth sub (user ID) - requires authentication
 curl -H "Authorization: Bearer <your_jwt>" http://localhost:1250/v1/me
 # Returns: {"sub": "your-oauth-sub-here", "email": "...", ...}
 ```
 ### Creating API Keys
 ```bash
 curl -X POST http://localhost:1250/v1/user/api-keys \
  -H "Authorization: Bearer <your_jwt>" \
  -H "Content-Type: application/json" \
  -d '{"name": "My API Key"}'
 ```
 ### Using API Keys
 ```bash
 # Use X-API-Key header instead of Authorization
 curl -H "X-API-Key: <your_api_key>" http://localhost:1250/v1/transcripts
 ```
 ## AWS S3/SQS usage clarification
 Whereby.com uploads recordings directly to our S3 bucket when meetings end.
@@ -53,36 +27,6 @@ response = sqs.receive_message(QueueUrl=queue_url, ...)
 uv run /app/requeue_uploaded_file.py TRANSCRIPT_ID
 ```
 ## Hatchet Setup (Fresh DB)
 After resetting the Hatchet database:
 ### Option A: Automatic (CLI)
 ```bash
 # Get default tenant ID and create token in one command
 TENANT_ID=$(docker compose exec -T postgres psql -U reflector -d hatchet -t -c \
  "SELECT id FROM \"Tenant\" WHERE slug = 'default';" | tr -d ' \n') && \
 TOKEN=$(docker compose exec -T hatchet /hatchet-admin token create \
  --config /config --tenant-id "$TENANT_ID" 2>/dev/null | tr -d '\n') && \
 echo "HATCHET_CLIENT_TOKEN=$TOKEN"
 ```
 Copy the output to `server/.env`.
 ### Option B: Manual (UI)
 1. Create API token at http://localhost:8889 → Settings → API Tokens
 2. Update `server/.env`: `HATCHET_CLIENT_TOKEN=<new-token>`
 ### Then restart workers
 ```bash
 docker compose restart server hatchet-worker
 ```
 Workflows register automatically when hatchet-worker starts.
 ## Pipeline Management
 ### Continue stuck pipeline from final summaries (identify_participants) step:
--- a/server/docker/init-hatchet-db.sql
+++ b/server/docker/init-hatchet-db.sql
@@ -1,2 +0,0 @@
 -- Create hatchet database for Hatchet workflow engine
 CREATE DATABASE hatchet;
--- a/server/docs/video-platforms/README.md
+++ b/server/docs/video-platforms/README.md
@@ -1,236 +0,0 @@
 # Reflector Architecture: Whereby + Daily.co Recording Storage
 ## System Overview
 ```mermaid
 graph TB
    subgraph "Actors"
        APP[Our App<br/>Reflector]
        WHEREBY[Whereby Service<br/>External]
        DAILY[Daily.co Service<br/>External]
    end
    subgraph "AWS S3 Buckets"
        TRANSCRIPT_BUCKET[Transcript Bucket<br/>reflector-transcripts<br/>Output: Processed MP3s]
        WHEREBY_BUCKET[Whereby Bucket<br/>reflector-whereby-recordings<br/>Input: Raw MP4s]
        DAILY_BUCKET[Daily.co Bucket<br/>reflector-dailyco-recordings<br/>Input: Raw WebM tracks]
    end
    subgraph "AWS Infrastructure"
        SQS[SQS Queue<br/>Whereby notifications]
    end
    subgraph "Database"
        DB[(PostgreSQL<br/>Recordings, Transcripts, Meetings)]
    end
    APP -->|Write processed| TRANSCRIPT_BUCKET
    APP -->|Read/Delete| WHEREBY_BUCKET
    APP -->|Read/Delete| DAILY_BUCKET
    APP -->|Poll| SQS
    APP -->|Store metadata| DB
    WHEREBY -->|Write recordings| WHEREBY_BUCKET
    WHEREBY_BUCKET -->|S3 Event| SQS
    WHEREBY -->|Participant webhooks<br/>room.client.joined/left| APP
    DAILY -->|Write recordings| DAILY_BUCKET
    DAILY -->|Recording webhook<br/>recording.ready-to-download| APP
 ```
 **Note on Webhook vs S3 Event for Recording Processing:**
 - **Whereby**: Uses S3 Events → SQS for recording availability (S3 as source of truth, no race conditions)
 - **Daily.co**: Uses webhooks for recording availability (more immediate, built-in reliability)
 - **Both**: Use webhooks for participant tracking (real-time updates)
 ## Credentials & Permissions
 ```mermaid
 graph LR
    subgraph "Master Credentials"
        MASTER[TRANSCRIPT_STORAGE_AWS_*<br/>Access Key ID + Secret]
    end
    subgraph "Whereby Upload Credentials"
        WHEREBY_CREDS[AWS_WHEREBY_ACCESS_KEY_*<br/>Access Key ID + Secret]
    end
    subgraph "Daily.co Upload Role"
        DAILY_ROLE[DAILY_STORAGE_AWS_ROLE_ARN<br/>IAM Role ARN]
    end
    subgraph "Our App Uses"
        MASTER -->|Read/Write/Delete| TRANSCRIPT_BUCKET[Transcript Bucket]
        MASTER -->|Read/Delete| WHEREBY_BUCKET[Whereby Bucket]
        MASTER -->|Read/Delete| DAILY_BUCKET[Daily.co Bucket]
        MASTER -->|Poll/Delete| SQS[SQS Queue]
    end
    subgraph "We Give To Services"
        WHEREBY_CREDS -->|Passed in API call| WHEREBY_SERVICE[Whereby Service]
        WHEREBY_SERVICE -->|Write Only| WHEREBY_BUCKET
        DAILY_ROLE -->|Passed in API call| DAILY_SERVICE[Daily.co Service]
        DAILY_SERVICE -->|Assume Role| DAILY_ROLE
        DAILY_SERVICE -->|Write Only| DAILY_BUCKET
    end
 ```
 # Video Platform Recording Integration
 This document explains how Reflector receives and identifies multitrack audio recordings from different video platforms.
 ## Platform Comparison
 | Platform | Delivery Method | Track Identification |
 |----------|----------------|---------------------|
 | **Daily.co** | Webhook | Explicit track list in payload |
 | **Whereby** | SQS (S3 notifications) | Single file per notification |
 ---
 ## Daily.co
 **Note:** Primary discovery via polling (`poll_daily_recordings`), webhooks as backup.
 Daily.co uses **webhooks** to notify Reflector when recordings are ready.
 ### How It Works
 1. **Daily.co sends webhook** when recording is ready
   - Event type: `recording.ready-to-download`
   - Endpoint: `/v1/daily/webhook` (`reflector/views/daily.py:46-102`)
 2. **Webhook payload explicitly includes track list**:
 ```json
 {
  "recording_id": "7443ee0a-dab1-40eb-b316-33d6c0d5ff88",
  "room_name": "daily-20251020193458",
  "tracks": [
    {
      "type": "audio",
      "s3Key": "monadical/daily-20251020193458/1760988935484-52f7f48b-fbab-431f-9a50-87b9abfc8255-cam-audio-1760988935922",
      "size": 831843
    },
    {
      "type": "audio",
      "s3Key": "monadical/daily-20251020193458/1760988935484-a37c35e3-6f8e-4274-a482-e9d0f102a732-cam-audio-1760988943823",
      "size": 408438
    },
    {
      "type": "video",
      "s3Key": "monadical/daily-20251020193458/...-video.webm",
      "size": 30000000
    }
  ]
 }
 ```
 3. **System extracts audio tracks** (`daily.py:211`):
 ```python
 track_keys = [t.s3Key for t in tracks if t.type == "audio"]
 ```
 4. **Triggers multitrack processing** (`daily.py:213-218`):
 ```python
 process_multitrack_recording.delay(
    bucket_name=bucket_name,  # reflector-dailyco-local
    room_name=room_name,      # daily-20251020193458
    recording_id=recording_id, # 7443ee0a-dab1-40eb-b316-33d6c0d5ff88
    track_keys=track_keys      # Only audio s3Keys
 )
 ```
 ### Key Advantage: No Ambiguity
 Even though multiple meetings may share the same S3 bucket/folder (`monadical/`), **there's no ambiguity** because:
 - Each webhook payload contains the exact `s3Key` list for that specific `recording_id`
 - No need to scan folders or guess which files belong together
 - Each track's s3Key includes the room timestamp subfolder (e.g., `daily-20251020193458/`)
 The room name includes timestamp (`daily-20251020193458`) to keep recordings organized, but **the webhook's explicit track list is what prevents mixing files from different meetings**.
 ### Track Timeline Extraction
 Daily.co provides timing information in two places:
 **1. PyAV WebM Metadata (current approach)**:
 ```python
 # Read from WebM container stream metadata
 stream.start_time = 8.130s  # Meeting-relative timing
 ```
 **2. Filename Timestamps (alternative approach, commit 3bae9076)**:
 ```
 Filename format: {recording_start_ts}-{uuid}-cam-audio-{track_start_ts}.webm
 Example: 1760988935484-52f7f48b-fbab-431f-9a50-87b9abfc8255-cam-audio-1760988935922.webm
 Parse timestamps:
 - recording_start_ts: 1760988935484 (Unix ms)
 - track_start_ts: 1760988935922 (Unix ms)
 - offset: (1760988935922 - 1760988935484) / 1000 = 0.438s
 ```
 **Time Difference (PyAV vs Filename)**:
 ```
 Track 0:
  Filename offset: 438ms
  PyAV metadata:   229ms
  Difference:      209ms
 Track 1:
  Filename offset: 8339ms
  PyAV metadata:   8130ms
  Difference:      209ms
 ```
 **Consistent 209ms delta** suggests network/encoding delay between file upload initiation (filename) and actual audio stream start (metadata).
 **Current implementation uses PyAV metadata** because:
 - More accurate (represents when audio actually started)
 - Padding BEFORE transcription produces correct Whisper timestamps automatically
 - No manual offset adjustment needed during transcript merge
 ### Why Re-encoding During Padding
 Padding coincidentally involves re-encoding, which is important for Daily.co + Whisper:
 **Problem:** Daily.co skips frames in recordings when microphone is muted or paused
 - WebM containers have gaps where audio frames should be
 - Whisper doesn't understand these gaps and produces incorrect timestamps
 - Example: 5s of audio with 2s muted → file has frames only for 3s, Whisper thinks duration is 3s
 **Solution:** Re-encoding via PyAV filter graph (`adelay` + `aresample`)
 - Restores missing frames as silence
 - Produces continuous audio stream without gaps
 - Whisper now sees correct duration and produces accurate timestamps
 **Why combined with padding:**
 - Already re-encoding for padding (adding initial silence)
 - More performant to do both operations in single PyAV pipeline
 - Padded values needed for mixdown anyway (creating final MP3)
 Implementation: `main_multitrack_pipeline.py:_apply_audio_padding_streaming()`
 ---
 ## Whereby (SQS-based)
 Whereby uses **AWS SQS** (via S3 notifications) to notify Reflector when files are uploaded.
 ### How It Works
 1. **Whereby uploads recording** to S3
 2. **S3 sends notification** to SQS queue (one notification per file)
 3. **Reflector polls SQS queue** (`worker/process.py:process_messages()`)
 4. **System processes single file** (`worker/process.py:process_recording()`)
 ### Key Difference from Daily.co
 **Whereby (SQS):** System receives S3 notification "file X was created" - only knows about one file at a time, would need to scan folder to find related files
 **Daily.co (Webhook):** Daily explicitly tells system which files belong together in the webhook payload
 ---
--- a/server/env.example
+++ b/server/env.example
@@ -79,22 +79,19 @@ DIARIZATION_URL=https://monadical-sas--reflector-diarizer-web.modal.run
 ## Whereby
 #WHEREBY_API_KEY=your-whereby-api-key
 #WHEREBY_WEBHOOK_SECRET=your-whereby-webhook-secret
-#WHEREBY_STORAGE_AWS_ACCESS_KEY_ID=your-aws-key
+#AWS_WHEREBY_ACCESS_KEY_ID=your-aws-key
-#WHEREBY_STORAGE_AWS_SECRET_ACCESS_KEY=your-aws-secret
+#AWS_WHEREBY_ACCESS_KEY_SECRET=your-aws-secret
 #AWS_PROCESS_RECORDING_QUEUE_URL=https://sqs.us-west-2.amazonaws.com/...
 ## Daily.co
 #DAILY_API_KEY=your-daily-api-key
 #DAILY_WEBHOOK_SECRET=your-daily-webhook-secret
 #DAILY_SUBDOMAIN=your-subdomain
-#DAILY_WEBHOOK_UUID=  # Auto-populated by recreate_daily_webhook.py script
+#AWS_DAILY_S3_BUCKET=your-daily-bucket
-#DAILYCO_STORAGE_AWS_ROLE_ARN=...  # IAM role ARN for Daily.co S3 access
+#AWS_DAILY_S3_REGION=us-west-2
-#DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco
+#AWS_DAILY_ROLE_ARN=arn:aws:iam::ACCOUNT:role/DailyRecording
 #DAILYCO_STORAGE_AWS_REGION=us-west-2
-## Whereby (optional separate bucket)
+## Platform Selection
-#WHEREBY_STORAGE_AWS_BUCKET_NAME=reflector-whereby
+#DAILY_MIGRATION_ENABLED=false           # Enable Daily.co support
-#WHEREBY_STORAGE_AWS_REGION=us-east-1
+#DAILY_MIGRATION_ROOM_IDS=[]            # Specific rooms to use Daily
 ## Platform Configuration
 #DEFAULT_VIDEO_PLATFORM=whereby          # Default platform for new rooms
--- a/server/migrations/versions/05f8688d6895_add_action_items.py
+++ b/server/migrations/versions/05f8688d6895_add_action_items.py
@@ -1,26 +0,0 @@
 """add_action_items
 Revision ID: 05f8688d6895
 Revises: bbafedfa510c
 Create Date: 2025-12-12 11:57:50.209658
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "05f8688d6895"
 down_revision: Union[str, None] = "bbafedfa510c"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.add_column("transcript", sa.Column("action_items", sa.JSON(), nullable=True))
 def downgrade() -> None:
    op.drop_column("transcript", "action_items")
--- a/server/migrations/versions/0f943fede0e0_add_workflow_run_id_to_transcript.py
+++ b/server/migrations/versions/0f943fede0e0_add_workflow_run_id_to_transcript.py
@@ -1,28 +0,0 @@
 """add workflow_run_id to transcript
 Revision ID: 0f943fede0e0
 Revises: 05f8688d6895
 Create Date: 2025-12-16 01:54:13.855106
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "0f943fede0e0"
 down_revision: Union[str, None] = "05f8688d6895"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    with op.batch_alter_table("transcript", schema=None) as batch_op:
        batch_op.add_column(sa.Column("workflow_run_id", sa.String(), nullable=True))
 def downgrade() -> None:
    with op.batch_alter_table("transcript", schema=None) as batch_op:
        batch_op.drop_column("workflow_run_id")
--- a/server/migrations/versions/1e49625677e4_add_platform_support.py
+++ b/server/migrations/versions/1e49625677e4_add_platform_support.py
@@ -1,7 +1,7 @@
 """add_platform_support
 Revision ID: 1e49625677e4
-Revises: 9e3f7b2a4c8e
+Revises: dc035ff72fd5
 Create Date: 2025-10-08 13:17:29.943612
 """
@@ -13,7 +13,7 @@ from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "1e49625677e4"
-down_revision: Union[str, None] = "9e3f7b2a4c8e"
+down_revision: Union[str, None] = "dc035ff72fd5"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
@@ -25,8 +25,8 @@ def upgrade() -> None:
            sa.Column(
                "platform",
                sa.String(),
-                nullable=True,
+                nullable=False,
-                server_default=None,
+                server_default="whereby",
            )
        )
--- a/server/migrations/versions/2b92a1b03caa_add_daily_participant_session_table_.py
+++ b/server/migrations/versions/2b92a1b03caa_add_daily_participant_session_table_.py
@@ -1,79 +0,0 @@
 """add daily participant session table with immutable left_at
 Revision ID: 2b92a1b03caa
 Revises: f8294b31f022
 Create Date: 2025-11-13 20:29:30.486577
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "2b92a1b03caa"
 down_revision: Union[str, None] = "f8294b31f022"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    # Create table
    op.create_table(
        "daily_participant_session",
        sa.Column("id", sa.String(), nullable=False),
        sa.Column("meeting_id", sa.String(), nullable=False),
        sa.Column("room_id", sa.String(), nullable=False),
        sa.Column("session_id", sa.String(), nullable=False),
        sa.Column("user_id", sa.String(), nullable=True),
        sa.Column("user_name", sa.String(), nullable=False),
        sa.Column("joined_at", sa.DateTime(timezone=True), nullable=False),
        sa.Column("left_at", sa.DateTime(timezone=True), nullable=True),
        sa.ForeignKeyConstraint(["meeting_id"], ["meeting.id"], ondelete="CASCADE"),
        sa.ForeignKeyConstraint(["room_id"], ["room.id"], ondelete="CASCADE"),
        sa.PrimaryKeyConstraint("id"),
    )
    with op.batch_alter_table("daily_participant_session", schema=None) as batch_op:
        batch_op.create_index(
            "idx_daily_session_meeting_left", ["meeting_id", "left_at"], unique=False
        )
        batch_op.create_index("idx_daily_session_room", ["room_id"], unique=False)
    # Create trigger function to prevent left_at from being updated once set
    op.execute("""
        CREATE OR REPLACE FUNCTION prevent_left_at_update()
        RETURNS TRIGGER AS $$
        BEGIN
            IF OLD.left_at IS NOT NULL THEN
                RAISE EXCEPTION 'left_at is immutable once set';
            END IF;
            RETURN NEW;
        END;
        $$ LANGUAGE plpgsql;
    """)
    # Create trigger
    op.execute("""
        CREATE TRIGGER prevent_left_at_update_trigger
        BEFORE UPDATE ON daily_participant_session
        FOR EACH ROW
        EXECUTE FUNCTION prevent_left_at_update();
    """)
 def downgrade() -> None:
    # Drop trigger
    op.execute(
        "DROP TRIGGER IF EXISTS prevent_left_at_update_trigger ON daily_participant_session;"
    )
    # Drop trigger function
    op.execute("DROP FUNCTION IF EXISTS prevent_left_at_update();")
    # Drop indexes and table
    with op.batch_alter_table("daily_participant_session", schema=None) as batch_op:
        batch_op.drop_index("idx_daily_session_room")
        batch_op.drop_index("idx_daily_session_meeting_left")
    op.drop_table("daily_participant_session")
--- a/server/migrations/versions/5d6b9df9b045_make_room_platform_non_nullable_with_.py
+++ b/server/migrations/versions/5d6b9df9b045_make_room_platform_non_nullable_with_.py
@@ -1,30 +0,0 @@
 """Make room platform non-nullable with dynamic default
 Revision ID: 5d6b9df9b045
 Revises: 2b92a1b03caa
 Create Date: 2025-11-21 13:22:25.756584
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "5d6b9df9b045"
 down_revision: Union[str, None] = "2b92a1b03caa"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.execute("UPDATE room SET platform = 'whereby' WHERE platform IS NULL")
    with op.batch_alter_table("room", schema=None) as batch_op:
        batch_op.alter_column("platform", existing_type=sa.String(), nullable=False)
 def downgrade() -> None:
    with op.batch_alter_table("room", schema=None) as batch_op:
        batch_op.alter_column("platform", existing_type=sa.String(), nullable=True)
--- a/server/migrations/versions/9e3f7b2a4c8e_add_user_api_keys.py
+++ b/server/migrations/versions/9e3f7b2a4c8e_add_user_api_keys.py
@@ -1,38 +0,0 @@
 """add user api keys
 Revision ID: 9e3f7b2a4c8e
 Revises: dc035ff72fd5
 Create Date: 2025-10-17 00:00:00.000000
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "9e3f7b2a4c8e"
 down_revision: Union[str, None] = "dc035ff72fd5"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.create_table(
        "user_api_key",
        sa.Column("id", sa.String(), nullable=False),
        sa.Column("user_id", sa.String(), nullable=False),
        sa.Column("key_hash", sa.String(), nullable=False),
        sa.Column("name", sa.String(), nullable=True),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
        sa.PrimaryKeyConstraint("id"),
    )
    with op.batch_alter_table("user_api_key", schema=None) as batch_op:
        batch_op.create_index("idx_user_api_key_hash", ["key_hash"], unique=True)
        batch_op.create_index("idx_user_api_key_user_id", ["user_id"], unique=False)
 def downgrade() -> None:
    op.drop_table("user_api_key")
--- a/server/migrations/versions/bbafedfa510c_add_user_table.py
+++ b/server/migrations/versions/bbafedfa510c_add_user_table.py
@@ -1,38 +0,0 @@
 """add user table
 Revision ID: bbafedfa510c
 Revises: 5d6b9df9b045
 Create Date: 2025-11-19 21:06:30.543262
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "bbafedfa510c"
 down_revision: Union[str, None] = "5d6b9df9b045"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    op.create_table(
        "user",
        sa.Column("id", sa.String(), nullable=False),
        sa.Column("email", sa.String(), nullable=False),
        sa.Column("authentik_uid", sa.String(), nullable=False),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
        sa.PrimaryKeyConstraint("id"),
    )
    with op.batch_alter_table("user", schema=None) as batch_op:
        batch_op.create_index("idx_user_authentik_uid", ["authentik_uid"], unique=True)
        batch_op.create_index("idx_user_email", ["email"], unique=False)
 def downgrade() -> None:
    op.drop_table("user")
--- a/server/migrations/versions/bd3a729bb379_add_use_hatchet_to_room.py
+++ b/server/migrations/versions/bd3a729bb379_add_use_hatchet_to_room.py
@@ -1,35 +0,0 @@
 """add use_hatchet to room
 Revision ID: bd3a729bb379
 Revises: 0f943fede0e0
 Create Date: 2025-12-16 16:34:03.594231
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "bd3a729bb379"
 down_revision: Union[str, None] = "0f943fede0e0"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    with op.batch_alter_table("room", schema=None) as batch_op:
        batch_op.add_column(
            sa.Column(
                "use_hatchet",
                sa.Boolean(),
                server_default=sa.text("false"),
                nullable=False,
            )
        )
 def downgrade() -> None:
    with op.batch_alter_table("room", schema=None) as batch_op:
        batch_op.drop_column("use_hatchet")
--- a/server/migrations/versions/f8294b31f022_add_track_keys.py
+++ b/server/migrations/versions/f8294b31f022_add_track_keys.py
@@ -1,28 +0,0 @@
 """add_track_keys
 Revision ID: f8294b31f022
 Revises: 1e49625677e4
 Create Date: 2025-10-27 18:52:17.589167
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
 revision: str = "f8294b31f022"
 down_revision: Union[str, None] = "1e49625677e4"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    with op.batch_alter_table("recording", schema=None) as batch_op:
        batch_op.add_column(sa.Column("track_keys", sa.JSON(), nullable=True))
 def downgrade() -> None:
    with op.batch_alter_table("recording", schema=None) as batch_op:
        batch_op.drop_column("track_keys")
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -39,7 +39,6 @@ dependencies = [
    "pytest-env>=1.1.5",
    "webvtt-py>=0.5.0",
    "icalendar>=6.0.0",
    "hatchet-sdk>=0.47.0",
 ]
 [dependency-groups]
@@ -127,7 +126,6 @@ markers = [
 select = [
    "I",       # isort - import sorting
    "F401",    # unused imports
    "E402",    # module level import not at top of file
    "PLC0415", # import-outside-top-level - detect inline imports
 ]
--- a/server/reflector/app.py
+++ b/server/reflector/app.py
@@ -27,7 +27,6 @@ from reflector.views.transcripts_upload import router as transcripts_upload_rout
 from reflector.views.transcripts_webrtc import router as transcripts_webrtc_router
 from reflector.views.transcripts_websocket import router as transcripts_websocket_router
 from reflector.views.user import router as user_router
 from reflector.views.user_api_keys import router as user_api_keys_router
 from reflector.views.user_websocket import router as user_ws_router
 from reflector.views.whereby import router as whereby_router
 from reflector.views.zulip import router as zulip_router
@@ -93,7 +92,6 @@ app.include_router(transcripts_websocket_router, prefix="/v1")
 app.include_router(transcripts_webrtc_router, prefix="/v1")
 app.include_router(transcripts_process_router, prefix="/v1")
 app.include_router(user_router, prefix="/v1")
 app.include_router(user_api_keys_router, prefix="/v1")
 app.include_router(user_ws_router, prefix="/v1")
 app.include_router(zulip_router, prefix="/v1")
 app.include_router(whereby_router, prefix="/v1")
--- a/server/reflector/asynctask.py
+++ b/server/reflector/asynctask.py
@@ -1,19 +1,13 @@
 import asyncio
 import functools
 from uuid import uuid4
 from celery import current_task
 from reflector.db import get_database
 from reflector.llm import llm_session_id
 def asynctask(f):
    @functools.wraps(f)
    def wrapper(*args, **kwargs):
        async def run_with_db():
            task_id = current_task.request.id if current_task else None
            llm_session_id.set(task_id or f"random-{uuid4().hex}")
            database = get_database()
            await database.connect()
            try:
--- a/server/reflector/auth/auth_jwt.py
+++ b/server/reflector/auth/auth_jwt.py
@@ -1,18 +1,14 @@
-from typing import Annotated, List, Optional
+from typing import Annotated, Optional
 from fastapi import Depends, HTTPException
-from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
+from fastapi.security import OAuth2PasswordBearer
 from jose import JWTError, jwt
 from pydantic import BaseModel
 from reflector.db.user_api_keys import user_api_keys_controller
 from reflector.db.users import user_controller
 from reflector.logger import logger
 from reflector.settings import settings
 from reflector.utils import generate_uuid4
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token", auto_error=False)
 api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
 jwt_public_key = open(f"reflector/auth/jwt/keys/{settings.AUTH_JWT_PUBLIC_KEY}").read()
 jwt_algorithm = settings.AUTH_JWT_ALGORITHM
@@ -30,7 +26,7 @@ class JWTException(Exception):
 class UserInfo(BaseModel):
    sub: str
-    email: Optional[str] = None
+    email: str
    def __getitem__(self, key):
        return getattr(self, key)
@@ -62,65 +58,34 @@ def authenticated(token: Annotated[str, Depends(oauth2_scheme)]):
    return None
-async def _authenticate_user(
+def current_user(
-    jwt_token: Optional[str],
+    token: Annotated[Optional[str], Depends(oauth2_scheme)],
-    api_key: Optional[str],
+    jwtauth: JWTAuth = Depends(),
-    jwtauth: JWTAuth,
+):
-) -> UserInfo | None:
+    if token is None:
-    user_infos: List[UserInfo] = []
+        raise HTTPException(status_code=401, detail="Not authenticated")
    if api_key:
        user_api_key = await user_api_keys_controller.verify_key(api_key)
        if user_api_key:
            user_infos.append(UserInfo(sub=user_api_key.user_id, email=None))
    if jwt_token:
    try:
-            payload = jwtauth.verify_token(jwt_token)
+        payload = jwtauth.verify_token(token)
-            authentik_uid = payload["sub"]
+        sub = payload["sub"]
        email = payload["email"]
-
+        return UserInfo(sub=sub, email=email)
            user = await user_controller.get_by_authentik_uid(authentik_uid)
            if not user:
                logger.info(
                    f"Creating new user on first login: {authentik_uid} ({email})"
                )
                user = await user_controller.create_or_update(
                    id=generate_uuid4(),
                    authentik_uid=authentik_uid,
                    email=email,
                )
            user_infos.append(UserInfo(sub=user.id, email=email))
    except JWTError as e:
        logger.error(f"JWT error: {e}")
        raise HTTPException(status_code=401, detail="Invalid authentication")
-    if len(user_infos) == 0:
+
 def current_user_optional(
    token: Annotated[Optional[str], Depends(oauth2_scheme)],
    jwtauth: JWTAuth = Depends(),
 ):
    # we accept no token, but if one is provided, it must be a valid one.
    if token is None:
        return None
-
+    try:
-    if len(set([x.sub for x in user_infos])) > 1:
+        payload = jwtauth.verify_token(token)
-        raise JWTException(
+        sub = payload["sub"]
-            status_code=401,
+        email = payload["email"]
-            detail="Invalid authentication: more than one user provided",
+        return UserInfo(sub=sub, email=email)
-        )
+    except JWTError as e:
-
+        logger.error(f"JWT error: {e}")
-    return user_infos[0]
+        raise HTTPException(status_code=401, detail="Invalid authentication")
 async def current_user(
    jwt_token: Annotated[Optional[str], Depends(oauth2_scheme)],
    api_key: Annotated[Optional[str], Depends(api_key_header)],
    jwtauth: JWTAuth = Depends(),
 ):
    user = await _authenticate_user(jwt_token, api_key, jwtauth)
    if user is None:
        raise HTTPException(status_code=401, detail="Not authenticated")
    return user
 async def current_user_optional(
    jwt_token: Annotated[Optional[str], Depends(oauth2_scheme)],
    api_key: Annotated[Optional[str], Depends(api_key_header)],
    jwtauth: JWTAuth = Depends(),
 ):
    return await _authenticate_user(jwt_token, api_key, jwtauth)
--- a/server/reflector/dailyco_api/README.md
+++ b/server/reflector/dailyco_api/README.md
@@ -1,6 +0,0 @@
 anything about Daily.co api interaction
 - webhook event shapes
 - REST api client
 No REST api client existing found in the wild; the official lib is about working with videocall as a bot
--- a/server/reflector/dailyco_api/init.py
+++ b/server/reflector/dailyco_api/init.py
@@ -1,110 +0,0 @@
 """
 Daily.co API Module
 """
 # Client
 from .client import DailyApiClient, DailyApiError
 # Request models
 from .requests import (
    CreateMeetingTokenRequest,
    CreateRoomRequest,
    CreateWebhookRequest,
    MeetingTokenProperties,
    RecordingsBucketConfig,
    RoomProperties,
    UpdateWebhookRequest,
 )
 # Response models
 from .responses import (
    FinishedRecordingResponse,
    MeetingParticipant,
    MeetingParticipantsResponse,
    MeetingResponse,
    MeetingTokenResponse,
    RecordingResponse,
    RecordingS3Info,
    RoomPresenceParticipant,
    RoomPresenceResponse,
    RoomResponse,
    WebhookResponse,
 )
 # Webhook utilities
 from .webhook_utils import (
    extract_room_name,
    parse_participant_joined,
    parse_participant_left,
    parse_recording_error,
    parse_recording_ready,
    parse_recording_started,
    parse_webhook_payload,
    verify_webhook_signature,
 )
 # Webhook models
 from .webhooks import (
    DailyTrack,
    DailyWebhookEvent,
    DailyWebhookEventUnion,
    ParticipantJoinedEvent,
    ParticipantJoinedPayload,
    ParticipantLeftEvent,
    ParticipantLeftPayload,
    RecordingErrorEvent,
    RecordingErrorPayload,
    RecordingReadyEvent,
    RecordingReadyToDownloadPayload,
    RecordingStartedEvent,
    RecordingStartedPayload,
 )
 __all__ = [
    # Client
    "DailyApiClient",
    "DailyApiError",
    # Requests
    "CreateRoomRequest",
    "RoomProperties",
    "RecordingsBucketConfig",
    "CreateMeetingTokenRequest",
    "MeetingTokenProperties",
    "CreateWebhookRequest",
    "UpdateWebhookRequest",
    # Responses
    "RoomResponse",
    "RoomPresenceResponse",
    "RoomPresenceParticipant",
    "MeetingParticipantsResponse",
    "MeetingParticipant",
    "MeetingResponse",
    "RecordingResponse",
    "FinishedRecordingResponse",
    "RecordingS3Info",
    "MeetingTokenResponse",
    "WebhookResponse",
    # Webhooks
    "DailyWebhookEvent",
    "DailyWebhookEventUnion",
    "DailyTrack",
    "ParticipantJoinedEvent",
    "ParticipantJoinedPayload",
    "ParticipantLeftEvent",
    "ParticipantLeftPayload",
    "RecordingStartedEvent",
    "RecordingStartedPayload",
    "RecordingReadyEvent",
    "RecordingReadyToDownloadPayload",
    "RecordingErrorEvent",
    "RecordingErrorPayload",
    # Webhook utilities
    "verify_webhook_signature",
    "extract_room_name",
    "parse_webhook_payload",
    "parse_participant_joined",
    "parse_participant_left",
    "parse_recording_started",
    "parse_recording_ready",
    "parse_recording_error",
 ]
--- a/server/reflector/dailyco_api/client.py
+++ b/server/reflector/dailyco_api/client.py
@@ -1,573 +0,0 @@
 """
 Daily.co API Client
 Complete async client for Daily.co REST API with Pydantic models.
 Reference: https://docs.daily.co/reference/rest-api
 """
 from http import HTTPStatus
 from typing import Any
 import httpx
 import structlog
 from reflector.utils.string import NonEmptyString
 from .requests import (
    CreateMeetingTokenRequest,
    CreateRoomRequest,
    CreateWebhookRequest,
    UpdateWebhookRequest,
 )
 from .responses import (
    MeetingParticipantsResponse,
    MeetingResponse,
    MeetingTokenResponse,
    RecordingResponse,
    RoomPresenceResponse,
    RoomResponse,
    WebhookResponse,
 )
 logger = structlog.get_logger(__name__)
 class DailyApiError(Exception):
    """Daily.co API error with full request/response context."""
    def __init__(self, operation: str, response: httpx.Response):
        self.operation = operation
        self.response = response
        self.status_code = response.status_code
        self.response_body = response.text
        self.url = str(response.url)
        self.request_body = (
            response.request.content.decode() if response.request.content else None
        )
        super().__init__(
            f"Daily.co API error: {operation} failed with status {self.status_code}: {response.text}"
        )
 class DailyApiClient:
    """
    Complete async client for Daily.co REST API.
    Usage:
        # Direct usage
        client = DailyApiClient(api_key="your_api_key")
        room = await client.create_room(CreateRoomRequest(name="my-room"))
        await client.close()  # Clean up when done
        # Context manager (recommended)
        async with DailyApiClient(api_key="your_api_key") as client:
            room = await client.create_room(CreateRoomRequest(name="my-room"))
    """
    BASE_URL = "https://api.daily.co/v1"
    DEFAULT_TIMEOUT = 10.0
    def __init__(
        self,
        api_key: NonEmptyString,
        webhook_secret: NonEmptyString | None = None,
        timeout: float = DEFAULT_TIMEOUT,
        base_url: NonEmptyString | None = None,
    ):
        """
        Initialize Daily.co API client.
        Args:
            api_key: Daily.co API key (Bearer token)
            webhook_secret: Base64-encoded HMAC secret for webhook verification.
                Must match the 'hmac' value provided when creating webhooks.
                Generate with: base64.b64encode(os.urandom(32)).decode()
            timeout: Default request timeout in seconds
            base_url: Override base URL (for testing)
        """
        self.api_key = api_key
        self.webhook_secret = webhook_secret
        self.timeout = timeout
        self.base_url = base_url or self.BASE_URL
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        }
        self._client: httpx.AsyncClient | None = None
    async def __aenter__(self):
        return self
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.close()
    async def _get_client(self) -> httpx.AsyncClient:
        if self._client is None:
            self._client = httpx.AsyncClient(timeout=self.timeout)
        return self._client
    async def close(self):
        if self._client is not None:
            await self._client.aclose()
            self._client = None
    async def _handle_response(
        self, response: httpx.Response, operation: str
    ) -> dict[str, Any]:
        """
        Handle API response with error logging.
        Args:
            response: HTTP response
            operation: Operation name for logging (e.g., "create_room")
        Returns:
            Parsed JSON response
        Raises:
            DailyApiError: If request failed with full context
        """
        if response.status_code >= 400:
            logger.error(
                f"Daily.co API error: {operation}",
                status_code=response.status_code,
                response_body=response.text,
                request_body=response.request.content.decode()
                if response.request.content
                else None,
                url=str(response.url),
            )
            raise DailyApiError(operation, response)
        return response.json()
    # ============================================================================
    # ROOMS
    # ============================================================================
    async def create_room(self, request: CreateRoomRequest) -> RoomResponse:
        """
        Create a new Daily.co room.
        Reference: https://docs.daily.co/reference/rest-api/rooms/create-room
        Args:
            request: Room creation request with name, privacy, and properties
        Returns:
            Created room data including URL and ID
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.post(
            f"{self.base_url}/rooms",
            headers=self.headers,
            json=request.model_dump(exclude_none=True),
        )
        data = await self._handle_response(response, "create_room")
        return RoomResponse(**data)
    async def get_room(self, room_name: NonEmptyString) -> RoomResponse:
        """
        Get room configuration.
        Args:
            room_name: Daily.co room name
        Returns:
            Room configuration data
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.get(
            f"{self.base_url}/rooms/{room_name}",
            headers=self.headers,
        )
        data = await self._handle_response(response, "get_room")
        return RoomResponse(**data)
    async def get_room_presence(
        self, room_name: NonEmptyString
    ) -> RoomPresenceResponse:
        """
        Get current participants in a room (real-time presence).
        Reference: https://docs.daily.co/reference/rest-api/rooms/get-room-presence
        Args:
            room_name: Daily.co room name
        Returns:
            List of currently present participants with join time and duration
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.get(
            f"{self.base_url}/rooms/{room_name}/presence",
            headers=self.headers,
        )
        data = await self._handle_response(response, "get_room_presence")
        return RoomPresenceResponse(**data)
    async def delete_room(self, room_name: NonEmptyString) -> None:
        """
        Delete a room (idempotent - succeeds even if room doesn't exist).
        Reference: https://docs.daily.co/reference/rest-api/rooms/delete-room
        Args:
            room_name: Daily.co room name
        Raises:
            httpx.HTTPStatusError: If API request fails (except 404)
        """
        client = await self._get_client()
        response = await client.delete(
            f"{self.base_url}/rooms/{room_name}",
            headers=self.headers,
        )
        # Idempotent delete - 404 means already deleted
        if response.status_code == HTTPStatus.NOT_FOUND:
            logger.debug("Room not found (already deleted)", room_name=room_name)
            return
        await self._handle_response(response, "delete_room")
    # ============================================================================
    # MEETINGS
    # ============================================================================
    async def get_meeting(self, meeting_id: NonEmptyString) -> MeetingResponse:
        """
        Get full meeting information including participants.
        Reference: https://docs.daily.co/reference/rest-api/meetings/get-meeting-information
        Args:
            meeting_id: Daily.co meeting/session ID
        Returns:
            Meeting metadata including room, duration, participants, and status
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.get(
            f"{self.base_url}/meetings/{meeting_id}",
            headers=self.headers,
        )
        data = await self._handle_response(response, "get_meeting")
        return MeetingResponse(**data)
    async def get_meeting_participants(
        self,
        meeting_id: NonEmptyString,
        limit: int | None = None,
        joined_after: NonEmptyString | None = None,
        joined_before: NonEmptyString | None = None,
    ) -> MeetingParticipantsResponse:
        """
        Get historical participant data from a completed meeting (paginated).
        Reference: https://docs.daily.co/reference/rest-api/meetings/get-meeting-participants
        Args:
            meeting_id: Daily.co meeting/session ID
            limit: Maximum number of participant records to return
            joined_after: Return participants who joined after this participant_id
            joined_before: Return participants who joined before this participant_id
        Returns:
            List of participants with join times and duration
        Raises:
            httpx.HTTPStatusError: If API request fails (404 when no more participants)
        Note:
            For pagination, use joined_after with the last participant_id from previous response.
            Returns 404 when no more participants remain.
        """
        params = {}
        if limit is not None:
            params["limit"] = limit
        if joined_after is not None:
            params["joined_after"] = joined_after
        if joined_before is not None:
            params["joined_before"] = joined_before
        client = await self._get_client()
        response = await client.get(
            f"{self.base_url}/meetings/{meeting_id}/participants",
            headers=self.headers,
            params=params,
        )
        data = await self._handle_response(response, "get_meeting_participants")
        return MeetingParticipantsResponse(**data)
    # ============================================================================
    # RECORDINGS
    # ============================================================================
    async def get_recording(self, recording_id: NonEmptyString) -> RecordingResponse:
        """
        https://docs.daily.co/reference/rest-api/recordings/get-recording-information
        Get recording metadata and status.
        """
        client = await self._get_client()
        response = await client.get(
            f"{self.base_url}/recordings/{recording_id}",
            headers=self.headers,
        )
        data = await self._handle_response(response, "get_recording")
        return RecordingResponse(**data)
    async def list_recordings(
        self,
        room_name: NonEmptyString | None = None,
        starting_after: str | None = None,
        ending_before: str | None = None,
        limit: int = 100,
    ) -> list[RecordingResponse]:
        """
        List recordings with optional filters.
        Reference: https://docs.daily.co/reference/rest-api/recordings
        Args:
            room_name: Filter by room name
            starting_after: Pagination cursor - recording ID to start after
            ending_before: Pagination cursor - recording ID to end before
            limit: Max results per page (default 100, max 100)
        Note: starting_after/ending_before are pagination cursors (recording IDs),
        NOT time filters. API returns recordings in reverse chronological order.
        """
        client = await self._get_client()
        params = {"limit": limit}
        if room_name:
            params["room_name"] = room_name
        if starting_after:
            params["starting_after"] = starting_after
        if ending_before:
            params["ending_before"] = ending_before
        response = await client.get(
            f"{self.base_url}/recordings",
            headers=self.headers,
            params=params,
        )
        data = await self._handle_response(response, "list_recordings")
        if not isinstance(data, dict) or "data" not in data:
            logger.error(
                "Daily.co API returned unexpected format for list_recordings",
                data_type=type(data).__name__,
                data_keys=list(data.keys()) if isinstance(data, dict) else None,
                data_sample=str(data)[:500],
                room_name=room_name,
                operation="list_recordings",
            )
            raise httpx.HTTPStatusError(
                message=f"Unexpected response format from list_recordings: {type(data).__name__}",
                request=response.request,
                response=response,
            )
        return [RecordingResponse(**r) for r in data["data"]]
    # ============================================================================
    # MEETING TOKENS
    # ============================================================================
    async def create_meeting_token(
        self, request: CreateMeetingTokenRequest
    ) -> MeetingTokenResponse:
        """
        Create a meeting token for participant authentication.
        Reference: https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token
        Args:
            request: Token properties including room name, user_id, permissions
        Returns:
            JWT meeting token
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.post(
            f"{self.base_url}/meeting-tokens",
            headers=self.headers,
            json=request.model_dump(exclude_none=True),
        )
        data = await self._handle_response(response, "create_meeting_token")
        return MeetingTokenResponse(**data)
    # ============================================================================
    # WEBHOOKS
    # ============================================================================
    async def list_webhooks(self) -> list[WebhookResponse]:
        """
        List all configured webhooks for this account.
        Reference: https://docs.daily.co/reference/rest-api/webhooks
        Returns:
            List of webhook configurations
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.get(
            f"{self.base_url}/webhooks",
            headers=self.headers,
        )
        data = await self._handle_response(response, "list_webhooks")
        # Daily.co returns array directly (not paginated)
        if isinstance(data, list):
            return [WebhookResponse(**wh) for wh in data]
        # Future-proof: handle potential pagination envelope
        if isinstance(data, dict) and "data" in data:
            return [WebhookResponse(**wh) for wh in data["data"]]
        logger.warning("Unexpected webhook list response format", data=data)
        return []
    async def create_webhook(self, request: CreateWebhookRequest) -> WebhookResponse:
        """
        Create a new webhook subscription.
        Reference: https://docs.daily.co/reference/rest-api/webhooks
        Args:
            request: Webhook configuration with URL, event types, and HMAC secret
        Returns:
            Created webhook with UUID and state
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.post(
            f"{self.base_url}/webhooks",
            headers=self.headers,
            json=request.model_dump(exclude_none=True),
        )
        data = await self._handle_response(response, "create_webhook")
        return WebhookResponse(**data)
    async def update_webhook(
        self, webhook_uuid: NonEmptyString, request: UpdateWebhookRequest
    ) -> WebhookResponse:
        """
        Update webhook configuration.
        Note: Daily.co may not support PATCH for all fields.
        Common pattern is delete + recreate.
        Reference: https://docs.daily.co/reference/rest-api/webhooks
        Args:
            webhook_uuid: Webhook UUID to update
            request: Updated webhook configuration
        Returns:
            Updated webhook configuration
        Raises:
            httpx.HTTPStatusError: If API request fails
        """
        client = await self._get_client()
        response = await client.patch(
            f"{self.base_url}/webhooks/{webhook_uuid}",
            headers=self.headers,
            json=request.model_dump(exclude_none=True),
        )
        data = await self._handle_response(response, "update_webhook")
        return WebhookResponse(**data)
    async def delete_webhook(self, webhook_uuid: NonEmptyString) -> None:
        """
        Delete a webhook.
        Reference: https://docs.daily.co/reference/rest-api/webhooks
        Args:
            webhook_uuid: Webhook UUID to delete
        Raises:
            httpx.HTTPStatusError: If webhook not found or deletion fails
        """
        client = await self._get_client()
        response = await client.delete(
            f"{self.base_url}/webhooks/{webhook_uuid}",
            headers=self.headers,
        )
        await self._handle_response(response, "delete_webhook")
    # ============================================================================
    # HELPER METHODS
    # ============================================================================
    async def find_webhook_by_url(self, url: NonEmptyString) -> WebhookResponse | None:
        """
        Find a webhook by its URL.
        Args:
            url: Webhook endpoint URL to search for
        Returns:
            Webhook if found, None otherwise
        """
        webhooks = await self.list_webhooks()
        for webhook in webhooks:
            if webhook.url == url:
                return webhook
        return None
    async def find_webhooks_by_pattern(
        self, pattern: NonEmptyString
    ) -> list[WebhookResponse]:
        """
        Find webhooks matching a URL pattern (e.g., 'ngrok').
        Args:
            pattern: String to match in webhook URLs
        Returns:
            List of matching webhooks
        """
        webhooks = await self.list_webhooks()
        return [wh for wh in webhooks if pattern in wh.url]
--- a/server/reflector/dailyco_api/requests.py
+++ b/server/reflector/dailyco_api/requests.py
@@ -1,162 +0,0 @@
 """
 Daily.co API Request Models
 Reference: https://docs.daily.co/reference/rest-api
 """
 from typing import List, Literal
 from pydantic import BaseModel, Field
 from reflector.utils.string import NonEmptyString
 class RecordingsBucketConfig(BaseModel):
    """
    S3 bucket configuration for raw-tracks recordings.
    Reference: https://docs.daily.co/reference/rest-api/rooms/create-room
    """
    bucket_name: NonEmptyString = Field(description="S3 bucket name")
    bucket_region: NonEmptyString = Field(description="AWS region (e.g., 'us-east-1')")
    assume_role_arn: NonEmptyString = Field(
        description="AWS IAM role ARN that Daily.co will assume to write recordings"
    )
    allow_api_access: bool = Field(
        default=True,
        description="Whether to allow API access to recording metadata",
    )
 class RoomProperties(BaseModel):
    """
    Room configuration properties.
    """
    enable_recording: Literal["cloud", "local", "raw-tracks"] | None = Field(
        default=None,
        description="Recording mode: 'cloud' for mixed, 'local' for local recording, 'raw-tracks' for multitrack, None to disable",
    )
    enable_chat: bool = Field(default=True, description="Enable in-meeting chat")
    enable_screenshare: bool = Field(default=True, description="Enable screen sharing")
    enable_knocking: bool = Field(
        default=False,
        description="Enable knocking for private rooms (allows participants to request access)",
    )
    start_video_off: bool = Field(
        default=False, description="Start with video off for all participants"
    )
    start_audio_off: bool = Field(
        default=False, description="Start with audio muted for all participants"
    )
    exp: int | None = Field(
        None, description="Room expiration timestamp (Unix epoch seconds)"
    )
    recordings_bucket: RecordingsBucketConfig | None = Field(
        None, description="S3 bucket configuration for raw-tracks recordings"
    )
 class CreateRoomRequest(BaseModel):
    """
    Request to create a new Daily.co room.
    Reference: https://docs.daily.co/reference/rest-api/rooms/create-room
    """
    name: NonEmptyString = Field(description="Room name (must be unique within domain)")
    privacy: Literal["public", "private"] = Field(
        default="public", description="Room privacy setting"
    )
    properties: RoomProperties = Field(
        default_factory=RoomProperties, description="Room configuration properties"
    )
 class MeetingTokenProperties(BaseModel):
    """
    Properties for meeting token creation.
    Reference: https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token
    """
    room_name: NonEmptyString = Field(description="Room name this token is valid for")
    user_id: NonEmptyString | None = Field(
        None, description="User identifier to associate with token"
    )
    is_owner: bool = Field(
        default=False, description="Grant owner privileges to token holder"
    )
    start_cloud_recording: bool = Field(
        default=False, description="Automatically start cloud recording on join"
    )
    enable_recording_ui: bool = Field(
        default=True, description="Show recording controls in UI"
    )
    eject_at_token_exp: bool = Field(
        default=False, description="Eject participant when token expires"
    )
    nbf: int | None = Field(
        None, description="Not-before timestamp (Unix epoch seconds)"
    )
    exp: int | None = Field(
        None, description="Expiration timestamp (Unix epoch seconds)"
    )
 class CreateMeetingTokenRequest(BaseModel):
    """
    Request to create a meeting token for participant authentication.
    Reference: https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token
    """
    properties: MeetingTokenProperties = Field(description="Token properties")
 class CreateWebhookRequest(BaseModel):
    """
    Request to create a webhook subscription.
    Reference: https://docs.daily.co/reference/rest-api/webhooks
    """
    url: NonEmptyString = Field(description="Webhook endpoint URL (must be HTTPS)")
    eventTypes: List[
        Literal[
            "participant.joined",
            "participant.left",
            "recording.started",
            "recording.ready-to-download",
            "recording.error",
        ]
    ] = Field(
        description="Array of event types to subscribe to (only events we handle)"
    )
    hmac: NonEmptyString = Field(
        description="Base64-encoded HMAC secret for webhook signature verification"
    )
    basicAuth: NonEmptyString | None = Field(
        None, description="Optional basic auth credentials for webhook endpoint"
    )
 class UpdateWebhookRequest(BaseModel):
    """
    Request to update an existing webhook.
    Note: Daily.co API may not support PATCH for webhooks.
    Common pattern is to delete and recreate.
    Reference: https://docs.daily.co/reference/rest-api/webhooks
    """
    url: NonEmptyString | None = Field(None, description="New webhook endpoint URL")
    eventTypes: List[NonEmptyString] | None = Field(
        None, description="New array of event types"
    )
    hmac: NonEmptyString | None = Field(None, description="New HMAC secret")
    basicAuth: NonEmptyString | None = Field(
        None, description="New basic auth credentials"
    )
--- a/server/reflector/dailyco_api/responses.py
+++ b/server/reflector/dailyco_api/responses.py
@@ -1,217 +0,0 @@
 """
 Daily.co API Response Models
 """
 from typing import Any, Dict, List, Literal
 from pydantic import BaseModel, Field
 from reflector.dailyco_api.webhooks import DailyTrack
 from reflector.utils.string import NonEmptyString
 # not documented in daily; we fill it according to observations
 RecordingStatus = Literal["in-progress", "finished"]
 class RoomResponse(BaseModel):
    """
    Response from room creation or retrieval.
    Reference: https://docs.daily.co/reference/rest-api/rooms/create-room
    """
    id: NonEmptyString = Field(description="Unique room identifier (UUID)")
    name: NonEmptyString = Field(description="Room name used in URLs")
    api_created: bool = Field(description="Whether room was created via API")
    privacy: Literal["public", "private"] = Field(description="Room privacy setting")
    url: NonEmptyString = Field(description="Full room URL")
    created_at: NonEmptyString = Field(description="ISO 8601 creation timestamp")
    config: Dict[NonEmptyString, Any] = Field(
        default_factory=dict, description="Room configuration properties"
    )
 class RoomPresenceParticipant(BaseModel):
    """
    Participant presence information in a room.
    Reference: https://docs.daily.co/reference/rest-api/rooms/get-room-presence
    """
    room: NonEmptyString = Field(description="Room name")
    id: NonEmptyString = Field(description="Participant session ID")
    userId: NonEmptyString | None = Field(None, description="User ID if provided")
    userName: NonEmptyString | None = Field(None, description="User display name")
    joinTime: NonEmptyString = Field(description="ISO 8601 join timestamp")
    duration: int = Field(description="Duration in room (seconds)")
 class RoomPresenceResponse(BaseModel):
    """
    Response from room presence endpoint.
    Reference: https://docs.daily.co/reference/rest-api/rooms/get-room-presence
    """
    total_count: int = Field(
        description="Total number of participants currently in room"
    )
    data: List[RoomPresenceParticipant] = Field(
        default_factory=list, description="Array of participant presence data"
    )
 class MeetingParticipant(BaseModel):
    """
    Historical participant data from a meeting.
    Reference: https://docs.daily.co/reference/rest-api/meetings/get-meeting-participants
    """
    user_id: NonEmptyString | None = Field(None, description="User identifier")
    participant_id: NonEmptyString = Field(description="Participant session identifier")
    user_name: NonEmptyString | None = Field(None, description="User display name")
    join_time: int = Field(description="Join timestamp (Unix epoch seconds)")
    duration: int = Field(description="Duration in meeting (seconds)")
 class MeetingParticipantsResponse(BaseModel):
    """
    Response from meeting participants endpoint.
    Reference: https://docs.daily.co/reference/rest-api/meetings/get-meeting-participants
    """
    data: List[MeetingParticipant] = Field(
        default_factory=list, description="Array of participant data"
    )
 class MeetingResponse(BaseModel):
    """
    Response from meeting information endpoint.
    Reference: https://docs.daily.co/reference/rest-api/meetings/get-meeting-information
    """
    id: NonEmptyString = Field(description="Meeting session identifier (UUID)")
    room: NonEmptyString = Field(description="Room name where meeting occurred")
    start_time: int = Field(
        description="Meeting start Unix timestamp (~15s granularity)"
    )
    duration: int = Field(description="Total meeting duration in seconds")
    ongoing: bool = Field(description="Whether meeting is currently active")
    max_participants: int = Field(description="Peak concurrent participant count")
    participants: List[MeetingParticipant] = Field(
        default_factory=list, description="Array of participant session data"
    )
 class RecordingS3Info(BaseModel):
    """
    S3 bucket information for a recording.
    Reference: https://docs.daily.co/reference/rest-api/recordings
    """
    bucket_name: NonEmptyString
    bucket_region: NonEmptyString
    endpoint: NonEmptyString | None = None
 class RecordingResponse(BaseModel):
    """
    Response from recording retrieval endpoint (network layer).
    Duration may be None for recordings still being processed by Daily.
    Use FinishedRecordingResponse for recordings ready for processing.
    Reference: https://docs.daily.co/reference/rest-api/recordings
    """
    id: NonEmptyString = Field(description="Recording identifier")
    room_name: NonEmptyString = Field(description="Room where recording occurred")
    start_ts: int = Field(description="Recording start timestamp (Unix epoch seconds)")
    status: RecordingStatus = Field(
        description="Recording status ('in-progress' or 'finished')"
    )
    max_participants: int | None = Field(
        None, description="Maximum participants during recording (may be missing)"
    )
    duration: int | None = Field(
        None, description="Recording duration in seconds (None if still processing)"
    )
    share_token: NonEmptyString | None = Field(
        None, description="Token for sharing recording"
    )
    s3: RecordingS3Info | None = Field(None, description="S3 bucket information")
    tracks: list[DailyTrack] = Field(
        default_factory=list,
        description="Track list for raw-tracks recordings (always array, never null)",
    )
    # this is not a mistake but a deliberate Daily.co naming decision
    mtgSessionId: NonEmptyString | None = Field(
        None, description="Meeting session identifier (may be missing)"
    )
    def to_finished(self) -> "FinishedRecordingResponse | None":
        """Convert to FinishedRecordingResponse if duration is available and status is finished."""
        if self.duration is None or self.status != "finished":
            return None
        return FinishedRecordingResponse(**self.model_dump())
 class FinishedRecordingResponse(RecordingResponse):
    """
    Recording with confirmed duration - ready for processing.
    This model guarantees duration is present and status is finished.
    """
    status: Literal["finished"] = Field(
        description="Recording status (always 'finished')"
    )
    duration: int = Field(description="Recording duration in seconds")
 class MeetingTokenResponse(BaseModel):
    """
    Response from meeting token creation.
    Reference: https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token
    """
    token: NonEmptyString = Field(
        description="JWT meeting token for participant authentication"
    )
 class WebhookResponse(BaseModel):
    """
    Response from webhook creation or retrieval.
    Reference: https://docs.daily.co/reference/rest-api/webhooks
    """
    uuid: NonEmptyString = Field(description="Unique webhook identifier")
    url: NonEmptyString = Field(description="Webhook endpoint URL")
    hmac: NonEmptyString | None = Field(
        None, description="Base64-encoded HMAC secret for signature verification"
    )
    basicAuth: NonEmptyString | None = Field(
        None, description="Basic auth credentials if configured"
    )
    eventTypes: List[NonEmptyString] = Field(
        default_factory=list,
        description="Array of event types (e.g., ['recording.started', 'participant.joined'])",
    )
    state: Literal["ACTIVE", "FAILED"] = Field(
        description="Webhook state - FAILED after 3+ consecutive failures"
    )
    failedCount: int = Field(default=0, description="Number of consecutive failures")
    lastMomentPushed: NonEmptyString | None = Field(
        None, description="ISO 8601 timestamp of last successful push"
    )
    domainId: NonEmptyString = Field(description="Daily.co domain/account identifier")
    createdAt: NonEmptyString = Field(description="ISO 8601 creation timestamp")
    updatedAt: NonEmptyString = Field(description="ISO 8601 last update timestamp")
--- a/server/reflector/dailyco_api/webhook_utils.py
+++ b/server/reflector/dailyco_api/webhook_utils.py
@@ -1,228 +0,0 @@
 """
 Daily.co Webhook Utilities
 Utilities for verifying and parsing Daily.co webhook events.
 Reference: https://docs.daily.co/reference/rest-api/webhooks
 """
 import base64
 import hmac
 from hashlib import sha256
 import structlog
 from .webhooks import (
    DailyWebhookEvent,
    ParticipantJoinedPayload,
    ParticipantLeftPayload,
    RecordingErrorPayload,
    RecordingReadyToDownloadPayload,
    RecordingStartedPayload,
 )
 logger = structlog.get_logger(__name__)
 def verify_webhook_signature(
    body: bytes,
    signature: str,
    timestamp: str,
    webhook_secret: str,
 ) -> bool:
    """
    Verify Daily.co webhook signature using HMAC-SHA256.
    Daily.co signature verification:
    1. Base64-decode the webhook secret
    2. Create signed content: timestamp + '.' + body
    3. Compute HMAC-SHA256(secret, signed_content)
    4. Base64-encode the result
    5. Compare with provided signature using constant-time comparison
    Reference: https://docs.daily.co/reference/rest-api/webhooks
    Args:
        body: Raw request body bytes
        signature: X-Webhook-Signature header value
        timestamp: X-Webhook-Timestamp header value
        webhook_secret: Base64-encoded HMAC secret
    Returns:
        True if signature is valid, False otherwise
    Example:
        >>> body = b'{"version":"1.0.0","type":"participant.joined",...}'
        >>> signature = "abc123..."
        >>> timestamp = "1234567890"
        >>> secret = "your-base64-secret"
        >>> is_valid = verify_webhook_signature(body, signature, timestamp, secret)
    """
    if not signature or not timestamp or not webhook_secret:
        logger.warning(
            "Missing required data for webhook verification",
            has_signature=bool(signature),
            has_timestamp=bool(timestamp),
            has_secret=bool(webhook_secret),
        )
        return False
    try:
        secret_bytes = base64.b64decode(webhook_secret)
        signed_content = timestamp.encode() + b"." + body
        expected = hmac.new(secret_bytes, signed_content, sha256).digest()
        expected_b64 = base64.b64encode(expected).decode()
        # Constant-time comparison to prevent timing attacks
        return hmac.compare_digest(expected_b64, signature)
    except (base64.binascii.Error, ValueError, TypeError, UnicodeDecodeError) as e:
        logger.error(
            "Webhook signature verification failed",
            error=str(e),
            error_type=type(e).__name__,
        )
        return False
 def extract_room_name(event: DailyWebhookEvent) -> str | None:
    """
    Extract room name from Daily.co webhook event payload.
    Args:
        event: Parsed webhook event
    Returns:
        Room name if present and is a string, None otherwise
    Example:
        >>> event = DailyWebhookEvent(**webhook_payload)
        >>> room_name = extract_room_name(event)
    """
    room = event.payload.get("room_name")
    # Ensure we return a string, not any falsy value that might be in payload
    return room if isinstance(room, str) else None
 def parse_participant_joined(event: DailyWebhookEvent) -> ParticipantJoinedPayload:
    """
    Parse participant.joined webhook event payload.
    Args:
        event: Webhook event with type "participant.joined"
    Returns:
        Parsed participant joined payload
    Raises:
        pydantic.ValidationError: If payload doesn't match expected schema
    """
    return ParticipantJoinedPayload(**event.payload)
 def parse_participant_left(event: DailyWebhookEvent) -> ParticipantLeftPayload:
    """
    Parse participant.left webhook event payload.
    Args:
        event: Webhook event with type "participant.left"
    Returns:
        Parsed participant left payload
    Raises:
        pydantic.ValidationError: If payload doesn't match expected schema
    """
    return ParticipantLeftPayload(**event.payload)
 def parse_recording_started(event: DailyWebhookEvent) -> RecordingStartedPayload:
    """
    Parse recording.started webhook event payload.
    Args:
        event: Webhook event with type "recording.started"
    Returns:
        Parsed recording started payload
    Raises:
        pydantic.ValidationError: If payload doesn't match expected schema
    """
    return RecordingStartedPayload(**event.payload)
 def parse_recording_ready(
    event: DailyWebhookEvent,
 ) -> RecordingReadyToDownloadPayload:
    """
    Parse recording.ready-to-download webhook event payload.
    This event is sent when raw-tracks recordings are complete and uploaded to S3.
    The payload includes a 'tracks' array with individual audio/video files.
    Args:
        event: Webhook event with type "recording.ready-to-download"
    Returns:
        Parsed recording ready payload with tracks array
    Raises:
        pydantic.ValidationError: If payload doesn't match expected schema
    Example:
        >>> event = DailyWebhookEvent(**webhook_payload)
        >>> if event.type == "recording.ready-to-download":
        ...     payload = parse_recording_ready(event)
        ...     audio_tracks = [t for t in payload.tracks if t.type == "audio"]
    """
    return RecordingReadyToDownloadPayload(**event.payload)
 def parse_recording_error(event: DailyWebhookEvent) -> RecordingErrorPayload:
    """
    Parse recording.error webhook event payload.
    Args:
        event: Webhook event with type "recording.error"
    Returns:
        Parsed recording error payload
    Raises:
        pydantic.ValidationError: If payload doesn't match expected schema
    """
    return RecordingErrorPayload(**event.payload)
 WEBHOOK_PARSERS = {
    "participant.joined": parse_participant_joined,
    "participant.left": parse_participant_left,
    "recording.started": parse_recording_started,
    "recording.ready-to-download": parse_recording_ready,
    "recording.error": parse_recording_error,
 }
 def parse_webhook_payload(event: DailyWebhookEvent):
    """
    Parse webhook event payload based on event type.
    Args:
        event: Webhook event
    Returns:
        Typed payload model based on event type, or raw dict if unknown
    Example:
        >>> event = DailyWebhookEvent(**webhook_payload)
        >>> payload = parse_webhook_payload(event)
        >>> if isinstance(payload, ParticipantJoinedPayload):
        ...     print(f"User {payload.user_name} joined")
    """
    parser = WEBHOOK_PARSERS.get(event.type)
    if parser:
        return parser(event)
    else:
        logger.warning("Unknown webhook event type", event_type=event.type)
        return event.payload
--- a/server/reflector/dailyco_api/webhooks.py
+++ b/server/reflector/dailyco_api/webhooks.py
@@ -1,271 +0,0 @@
 """
 Daily.co Webhook Event Models
 Reference: https://docs.daily.co/reference/rest-api/webhooks
 """
 from typing import Annotated, Any, Dict, Literal, Union
 from pydantic import BaseModel, Field, field_validator
 from reflector.utils.string import NonEmptyString
 def normalize_timestamp_to_int(v):
    """
    Normalize float timestamps to int by truncating decimal part.
    Daily.co sometimes sends timestamps as floats (e.g., 1708972279.96).
    Pydantic expects int for fields typed as `int`.
    """
    if v is None:
        return v
    if isinstance(v, float):
        return int(v)
    return v
 WebhookEventType = Literal[
    "participant.joined",
    "participant.left",
    "recording.started",
    "recording.ready-to-download",
    "recording.error",
 ]
 class DailyTrack(BaseModel):
    """
    Individual audio or video track from a multitrack recording.
    Reference: https://docs.daily.co/reference/rest-api/recordings
    """
    type: Literal["audio", "video"]
    s3Key: NonEmptyString = Field(description="S3 object key for the track file")
    size: int = Field(description="File size in bytes")
 class DailyWebhookEvent(BaseModel):
    """
    Base structure for all Daily.co webhook events.
    All events share five common fields documented below.
    Reference: https://docs.daily.co/reference/rest-api/webhooks
    """
    version: NonEmptyString = Field(
        description="Represents the version of the event. This uses semantic versioning to inform a consumer if the payload has introduced any breaking changes"
    )
    type: WebhookEventType = Field(
        description="Represents the type of the event described in the payload"
    )
    id: NonEmptyString = Field(
        description="An identifier representing this specific event"
    )
    payload: Dict[NonEmptyString, Any] = Field(
        description="An object representing the event, whose fields are described in the corresponding payload class"
    )
    event_ts: int = Field(
        description="Documenting when the webhook itself was sent. This timestamp is different than the time of the event the webhook describes. For example, a recording.started event will contain a start_ts timestamp of when the actual recording started, and a slightly later event_ts timestamp indicating when the webhook event was sent"
    )
    _normalize_event_ts = field_validator("event_ts", mode="before")(
        normalize_timestamp_to_int
    )
 class ParticipantJoinedPayload(BaseModel):
    """
    Payload for participant.joined webhook event.
    Reference: https://docs.daily.co/reference/rest-api/webhooks/events/participant-joined
    """
    room_name: NonEmptyString | None = Field(None, description="Daily.co room name")
    session_id: NonEmptyString = Field(description="Daily.co session identifier")
    user_id: NonEmptyString = Field(description="User identifier (may be encoded)")
    user_name: NonEmptyString | None = Field(None, description="User display name")
    joined_at: int = Field(description="Join timestamp in Unix epoch seconds")
    _normalize_joined_at = field_validator("joined_at", mode="before")(
        normalize_timestamp_to_int
    )
 class ParticipantLeftPayload(BaseModel):
    """
    Payload for participant.left webhook event.
    Reference: https://docs.daily.co/reference/rest-api/webhooks/events/participant-left
    """
    room_name: NonEmptyString | None = Field(None, description="Daily.co room name")
    session_id: NonEmptyString = Field(description="Daily.co session identifier")
    user_id: NonEmptyString = Field(description="User identifier (may be encoded)")
    user_name: NonEmptyString | None = Field(None, description="User display name")
    joined_at: int = Field(description="Join timestamp in Unix epoch seconds")
    duration: int | None = Field(
        None, description="Duration of participation in seconds"
    )
    _normalize_joined_at = field_validator("joined_at", mode="before")(
        normalize_timestamp_to_int
    )
 class RecordingStartedPayload(BaseModel):
    """
    Payload for recording.started webhook event.
    Reference: https://docs.daily.co/reference/rest-api/webhooks/events/recording-started
    """
    room_name: NonEmptyString | None = Field(None, description="Daily.co room name")
    recording_id: NonEmptyString = Field(description="Recording identifier")
    start_ts: int | None = Field(None, description="Recording start timestamp")
    _normalize_start_ts = field_validator("start_ts", mode="before")(
        normalize_timestamp_to_int
    )
 class RecordingReadyToDownloadPayload(BaseModel):
    """
    Payload for recording.ready-to-download webhook event.
    This is sent when raw-tracks recordings are complete and uploaded to S3.
    Reference: https://docs.daily.co/reference/rest-api/webhooks/events/recording-ready-to-download
    """
    type: Literal["cloud", "raw-tracks"] = Field(
        description="The type of recording that was generated"
    )
    recording_id: NonEmptyString = Field(
        description="An ID identifying the recording that was generated"
    )
    room_name: NonEmptyString = Field(
        description="The name of the room where the recording was made"
    )
    start_ts: int = Field(
        description="The Unix epoch time in seconds representing when the recording started"
    )
    status: Literal["finished"] = Field(
        description="The status of the given recording (always 'finished' in ready-to-download webhook, see RecordingStatus in responses.py for full API statuses)"
    )
    max_participants: int = Field(
        description="The number of participants on the call that were recorded"
    )
    duration: int = Field(description="The duration in seconds of the call")
    s3_key: NonEmptyString = Field(
        description="The location of the recording in the provided S3 bucket"
    )
    share_token: NonEmptyString | None = Field(
        None, description="undocumented documented secret field"
    )
    tracks: list[DailyTrack] | None = Field(
        None,
        description="If the recording is a raw-tracks recording, a tracks field will be provided. If role permissions have been removed, the tracks field may be null",
    )
    _normalize_start_ts = field_validator("start_ts", mode="before")(
        normalize_timestamp_to_int
    )
 class RecordingErrorPayload(BaseModel):
    """
    Payload for recording.error webhook event.
    Reference: https://docs.daily.co/reference/rest-api/webhooks/events/recording-error
    """
    action: Literal["clourd-recording-err", "cloud-recording-error"] = Field(
        description="A string describing the event that was emitted (both variants are documented)"
    )
    error_msg: NonEmptyString = Field(description="The error message returned")
    instance_id: NonEmptyString = Field(
        description="The recording instance ID that was passed into the start recording command"
    )
    room_name: NonEmptyString = Field(
        description="The name of the room where the recording was made"
    )
    timestamp: int = Field(
        description="The Unix epoch time in seconds representing when the error was emitted"
    )
    _normalize_timestamp = field_validator("timestamp", mode="before")(
        normalize_timestamp_to_int
    )
 class ParticipantJoinedEvent(BaseModel):
    version: NonEmptyString
    type: Literal["participant.joined"]
    id: NonEmptyString
    payload: ParticipantJoinedPayload
    event_ts: int
    _normalize_event_ts = field_validator("event_ts", mode="before")(
        normalize_timestamp_to_int
    )
 class ParticipantLeftEvent(BaseModel):
    version: NonEmptyString
    type: Literal["participant.left"]
    id: NonEmptyString
    payload: ParticipantLeftPayload
    event_ts: int
    _normalize_event_ts = field_validator("event_ts", mode="before")(
        normalize_timestamp_to_int
    )
 class RecordingStartedEvent(BaseModel):
    version: NonEmptyString
    type: Literal["recording.started"]
    id: NonEmptyString
    payload: RecordingStartedPayload
    event_ts: int
    _normalize_event_ts = field_validator("event_ts", mode="before")(
        normalize_timestamp_to_int
    )
 class RecordingReadyEvent(BaseModel):
    version: NonEmptyString
    type: Literal["recording.ready-to-download"]
    id: NonEmptyString
    payload: RecordingReadyToDownloadPayload
    event_ts: int
    _normalize_event_ts = field_validator("event_ts", mode="before")(
        normalize_timestamp_to_int
    )
 class RecordingErrorEvent(BaseModel):
    version: NonEmptyString
    type: Literal["recording.error"]
    id: NonEmptyString
    payload: RecordingErrorPayload
    event_ts: int
    _normalize_event_ts = field_validator("event_ts", mode="before")(
        normalize_timestamp_to_int
    )
 DailyWebhookEventUnion = Annotated[
    Union[
        ParticipantJoinedEvent,
        ParticipantLeftEvent,
        RecordingStartedEvent,
        RecordingReadyEvent,
        RecordingErrorEvent,
    ],
    Field(discriminator="type"),
 ]
--- a/server/reflector/db/init.py
+++ b/server/reflector/db/init.py
@@ -25,13 +25,10 @@ def get_database() -> databases.Database:
 # import models
 import reflector.db.calendar_events  # noqa
 import reflector.db.daily_participant_sessions  # noqa
 import reflector.db.meetings  # noqa
 import reflector.db.recordings  # noqa
 import reflector.db.rooms  # noqa
 import reflector.db.transcripts  # noqa
 import reflector.db.user_api_keys  # noqa
 import reflector.db.users  # noqa
 kwargs = {}
 if "postgres" not in settings.DATABASE_URL:
--- a/server/reflector/db/daily_participant_sessions.py
+++ b/server/reflector/db/daily_participant_sessions.py
@@ -1,229 +0,0 @@
 """Daily.co participant session tracking.
 Stores webhook data for participant.joined and participant.left events to provide
 historical session information (Daily.co API only returns current participants).
 """
 from datetime import datetime
 import sqlalchemy as sa
 from pydantic import BaseModel
 from sqlalchemy.dialects.postgresql import insert
 from reflector.db import get_database, metadata
 from reflector.utils.string import NonEmptyString
 daily_participant_sessions = sa.Table(
    "daily_participant_session",
    metadata,
    sa.Column("id", sa.String, primary_key=True),
    sa.Column(
        "meeting_id",
        sa.String,
        sa.ForeignKey("meeting.id", ondelete="CASCADE"),
        nullable=False,
    ),
    sa.Column(
        "room_id",
        sa.String,
        sa.ForeignKey("room.id", ondelete="CASCADE"),
        nullable=False,
    ),
    sa.Column("session_id", sa.String, nullable=False),
    sa.Column("user_id", sa.String, nullable=True),
    sa.Column("user_name", sa.String, nullable=False),
    sa.Column("joined_at", sa.DateTime(timezone=True), nullable=False),
    sa.Column("left_at", sa.DateTime(timezone=True), nullable=True),
    sa.Index("idx_daily_session_meeting_left", "meeting_id", "left_at"),
    sa.Index("idx_daily_session_room", "room_id"),
 )
 class DailyParticipantSession(BaseModel):
    """Daily.co participant session record.
    Tracks when a participant joined and left a meeting. Populated from webhooks:
    - participant.joined: Creates record with left_at=None
    - participant.left: Updates record with left_at
    ID format: {meeting_id}:{user_id}:{joined_at_ms}
    - Ensures idempotency (duplicate webhooks don't create duplicates)
    - Allows same user to rejoin (different joined_at = different session)
    Duration is calculated as: left_at - joined_at (not stored)
    """
    id: NonEmptyString
    meeting_id: NonEmptyString
    room_id: NonEmptyString
    session_id: NonEmptyString  # Daily.co's session_id (identifies room session)
    user_id: NonEmptyString | None = None
    user_name: str
    joined_at: datetime
    left_at: datetime | None = None
 class DailyParticipantSessionController:
    """Controller for Daily.co participant session persistence."""
    async def get_by_id(self, id: str) -> DailyParticipantSession | None:
        """Get a session by its ID."""
        query = daily_participant_sessions.select().where(
            daily_participant_sessions.c.id == id
        )
        result = await get_database().fetch_one(query)
        return DailyParticipantSession(**result) if result else None
    async def get_open_session(
        self, meeting_id: NonEmptyString, session_id: NonEmptyString
    ) -> DailyParticipantSession | None:
        """Get the open (not left) session for a user in a meeting."""
        query = daily_participant_sessions.select().where(
            sa.and_(
                daily_participant_sessions.c.meeting_id == meeting_id,
                daily_participant_sessions.c.session_id == session_id,
                daily_participant_sessions.c.left_at.is_(None),
            )
        )
        results = await get_database().fetch_all(query)
        if len(results) > 1:
            raise ValueError(
                f"Multiple open sessions for daily session {session_id} in meeting {meeting_id}: "
                f"found {len(results)} sessions"
            )
        return DailyParticipantSession(**results[0]) if results else None
    async def upsert_joined(self, session: DailyParticipantSession) -> None:
        """Insert or update when participant.joined webhook arrives.
        Idempotent: Duplicate webhooks with same ID are safely ignored.
        Out-of-order: If left webhook arrived first, preserves left_at.
        """
        query = insert(daily_participant_sessions).values(**session.model_dump())
        query = query.on_conflict_do_update(
            index_elements=["id"],
            set_={"user_name": session.user_name},
        )
        await get_database().execute(query)
    async def upsert_left(self, session: DailyParticipantSession) -> None:
        """Update session when participant.left webhook arrives.
        Finds the open session for this user in this meeting and updates left_at.
        Works around Daily.co webhook timestamp inconsistency (joined_at differs by ~4ms between webhooks).
        Handles three cases:
        1. Normal flow: open session exists → updates left_at
        2. Out-of-order: left arrives first → creates new record with left data
        3. Duplicate: left arrives again → idempotent (DB trigger prevents left_at modification)
        """
        if session.left_at is None:
            raise ValueError("left_at is required for upsert_left")
        if session.left_at <= session.joined_at:
            raise ValueError(
                f"left_at ({session.left_at}) must be after joined_at ({session.joined_at})"
            )
        # Find existing open session (works around timestamp mismatch in webhooks)
        existing = await self.get_open_session(session.meeting_id, session.session_id)
        if existing:
            # Update existing open session
            query = (
                daily_participant_sessions.update()
                .where(daily_participant_sessions.c.id == existing.id)
                .values(left_at=session.left_at)
            )
            await get_database().execute(query)
        else:
            # Out-of-order or first webhook: insert new record
            query = insert(daily_participant_sessions).values(**session.model_dump())
            query = query.on_conflict_do_nothing(index_elements=["id"])
            await get_database().execute(query)
    async def get_by_meeting(self, meeting_id: str) -> list[DailyParticipantSession]:
        """Get all participant sessions for a meeting (active and ended)."""
        query = daily_participant_sessions.select().where(
            daily_participant_sessions.c.meeting_id == meeting_id
        )
        results = await get_database().fetch_all(query)
        return [DailyParticipantSession(**result) for result in results]
    async def get_active_by_meeting(
        self, meeting_id: str
    ) -> list[DailyParticipantSession]:
        """Get only active (not left) participant sessions for a meeting."""
        query = daily_participant_sessions.select().where(
            sa.and_(
                daily_participant_sessions.c.meeting_id == meeting_id,
                daily_participant_sessions.c.left_at.is_(None),
            )
        )
        results = await get_database().fetch_all(query)
        return [DailyParticipantSession(**result) for result in results]
    async def get_all_sessions_for_meeting(
        self, meeting_id: NonEmptyString
    ) -> dict[NonEmptyString, DailyParticipantSession]:
        query = daily_participant_sessions.select().where(
            daily_participant_sessions.c.meeting_id == meeting_id
        )
        results = await get_database().fetch_all(query)
        # TODO DailySessionId custom type
        return {row["session_id"]: DailyParticipantSession(**row) for row in results}
    async def batch_upsert_sessions(
        self, sessions: list[DailyParticipantSession]
    ) -> None:
        """Upsert multiple sessions in single query.
        Uses ON CONFLICT for idempotency. Updates user_name on conflict since they may change it during a meeting.
        """
        if not sessions:
            return
        values = [session.model_dump() for session in sessions]
        query = insert(daily_participant_sessions).values(values)
        query = query.on_conflict_do_update(
            index_elements=["id"],
            set_={
                # Preserve existing left_at to prevent race conditions
                "left_at": sa.func.coalesce(
                    daily_participant_sessions.c.left_at,
                    query.excluded.left_at,
                ),
                "user_name": query.excluded.user_name,
            },
        )
        await get_database().execute(query)
    async def batch_close_sessions(
        self, session_ids: list[NonEmptyString], left_at: datetime
    ) -> None:
        """Mark multiple sessions as left in single query.
        Only updates sessions where left_at is NULL (protects already-closed sessions).
        Left_at mismatch for existing sessions is ignored, assumed to be not important issue if ever happens.
        """
        if not session_ids:
            return
        query = (
            daily_participant_sessions.update()
            .where(
                sa.and_(
                    daily_participant_sessions.c.id.in_(session_ids),
                    daily_participant_sessions.c.left_at.is_(None),
                )
            )
            .values(left_at=left_at)
        )
        await get_database().execute(query)
 daily_participant_sessions_controller = DailyParticipantSessionController()
--- a/server/reflector/db/meetings.py
+++ b/server/reflector/db/meetings.py
@@ -7,9 +7,8 @@ from sqlalchemy.dialects.postgresql import JSONB
 from reflector.db import get_database, metadata
 from reflector.db.rooms import Room
-from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
+from reflector.platform_types import Platform
 from reflector.utils import generate_uuid4
 from reflector.utils.string import assert_equal
 meetings = sa.Table(
    "meeting",
@@ -61,7 +60,7 @@ meetings = sa.Table(
        "platform",
        sa.String,
        nullable=False,
-        server_default=assert_equal(WHEREBY_PLATFORM, "whereby"),
+        server_default="whereby",
    ),
    sa.Index("idx_meeting_room_id", "room_id"),
    sa.Index("idx_meeting_calendar_event", "calendar_event_id"),
@@ -109,7 +108,7 @@ class Meeting(BaseModel):
    is_active: bool = True
    calendar_event_id: str | None = None
    calendar_metadata: dict[str, Any] | None = None
-    platform: Platform = WHEREBY_PLATFORM
+    platform: Platform = "whereby"
 class MeetingController:
@@ -124,6 +123,7 @@ class MeetingController:
        room: Room,
        calendar_event_id: str | None = None,
        calendar_metadata: dict[str, Any] | None = None,
        platform: Platform = "whereby",
    ):
        meeting = Meeting(
            id=id,
@@ -139,19 +139,15 @@ class MeetingController:
            recording_trigger=room.recording_trigger,
            calendar_event_id=calendar_event_id,
            calendar_metadata=calendar_metadata,
-            platform=room.platform,
+            platform=platform,
        )
        query = meetings.insert().values(**meeting.model_dump())
        await get_database().execute(query)
        return meeting
-    async def get_all_active(self, platform: str | None = None) -> list[Meeting]:
+    async def get_all_active(self) -> list[Meeting]:
-        conditions = [meetings.c.is_active]
+        query = meetings.select().where(meetings.c.is_active)
-        if platform is not None:
+        return await get_database().fetch_all(query)
            conditions.append(meetings.c.platform == platform)
        query = meetings.select().where(sa.and_(*conditions))
        results = await get_database().fetch_all(query)
        return [Meeting(**result) for result in results]
    async def get_by_room_name(
        self,
@@ -161,14 +157,16 @@ class MeetingController:
        Get a meeting by room name.
        For backward compatibility, returns the most recent meeting.
        """
        end_date = getattr(meetings.c, "end_date")
        query = (
            meetings.select()
            .where(meetings.c.room_name == room_name)
-            .order_by(meetings.c.end_date.desc())
+            .order_by(end_date.desc())
        )
        result = await get_database().fetch_one(query)
        if not result:
            return None
        return Meeting(**result)
    async def get_active(self, room: Room, current_time: datetime) -> Meeting | None:
@@ -191,6 +189,7 @@ class MeetingController:
        result = await get_database().fetch_one(query)
        if not result:
            return None
        return Meeting(**result)
    async def get_all_active_for_room(
@@ -230,27 +229,17 @@ class MeetingController:
            return None
        return Meeting(**result)
-    async def get_by_id(
+    async def get_by_id(self, meeting_id: str, **kwargs) -> Meeting | None:
        self, meeting_id: str, room: Room | None = None
    ) -> Meeting | None:
        query = meetings.select().where(meetings.c.id == meeting_id)
        if room:
            query = query.where(meetings.c.room_id == room.id)
        result = await get_database().fetch_one(query)
        if not result:
            return None
        return Meeting(**result)
-    async def get_by_calendar_event(
+    async def get_by_calendar_event(self, calendar_event_id: str) -> Meeting | None:
        self, calendar_event_id: str, room: Room
    ) -> Meeting | None:
        query = meetings.select().where(
            meetings.c.calendar_event_id == calendar_event_id
        )
        if room:
            query = query.where(meetings.c.room_id == room.id)
        result = await get_database().fetch_one(query)
        if not result:
            return None
@@ -260,7 +249,7 @@ class MeetingController:
        query = meetings.update().where(meetings.c.id == meeting_id).values(**kwargs)
        await get_database().execute(query)
-    async def increment_num_clients(self, meeting_id: str) -> None:
+    async def increment_num_clients(self, meeting_id: str):
        """Atomically increment participant count."""
        query = (
            meetings.update()
@@ -269,7 +258,7 @@ class MeetingController:
        )
        await get_database().execute(query)
-    async def decrement_num_clients(self, meeting_id: str) -> None:
+    async def decrement_num_clients(self, meeting_id: str):
        """Atomically decrement participant count (min 0)."""
        query = (
            meetings.update()
--- a/server/reflector/db/recordings.py
+++ b/server/reflector/db/recordings.py
@@ -3,7 +3,6 @@ from typing import Literal
 import sqlalchemy as sa
 from pydantic import BaseModel, Field
 from sqlalchemy import or_
 from reflector.db import get_database, metadata
 from reflector.utils import generate_uuid4
@@ -22,7 +21,6 @@ recordings = sa.Table(
        server_default="pending",
    ),
    sa.Column("meeting_id", sa.String),
    sa.Column("track_keys", sa.JSON, nullable=True),
    sa.Index("idx_recording_meeting_id", "meeting_id"),
 )
@@ -30,20 +28,10 @@ recordings = sa.Table(
 class Recording(BaseModel):
    id: str = Field(default_factory=generate_uuid4)
    bucket_name: str
    # for single-track
    object_key: str
    recorded_at: datetime
    status: Literal["pending", "processing", "completed", "failed"] = "pending"
    meeting_id: str | None = None
    # for multitrack reprocessing
    # track_keys can be empty list [] if recording finished but no audio was captured (silence/muted)
    # None means not a multitrack recording, [] means multitrack with no tracks
    track_keys: list[str] | None = None
    @property
    def is_multitrack(self) -> bool:
        """True if recording has separate audio tracks (1+ tracks counts as multitrack)."""
        return self.track_keys is not None and len(self.track_keys) > 0
 class RecordingController:
@@ -52,14 +40,12 @@ class RecordingController:
        await get_database().execute(query)
        return recording
-    async def get_by_id(self, id: str) -> Recording | None:
+    async def get_by_id(self, id: str) -> Recording:
        query = recordings.select().where(recordings.c.id == id)
        result = await get_database().fetch_one(query)
        return Recording(**result) if result else None
-    async def get_by_object_key(
+    async def get_by_object_key(self, bucket_name: str, object_key: str) -> Recording:
        self, bucket_name: str, object_key: str
    ) -> Recording | None:
        query = recordings.select().where(
            recordings.c.bucket_name == bucket_name,
            recordings.c.object_key == object_key,
@@ -71,44 +57,5 @@ class RecordingController:
        query = recordings.delete().where(recordings.c.id == id)
        await get_database().execute(query)
    # no check for existence
    async def get_by_ids(self, recording_ids: list[str]) -> list[Recording]:
        if not recording_ids:
            return []
        query = recordings.select().where(recordings.c.id.in_(recording_ids))
        results = await get_database().fetch_all(query)
        return [Recording(**row) for row in results]
    async def get_multitrack_needing_reprocessing(
        self, bucket_name: str
    ) -> list[Recording]:
        """
        Get multitrack recordings that need reprocessing:
        - Have track_keys (multitrack)
        - Either have no transcript OR transcript has error status
        This is more efficient than fetching all recordings and filtering in Python.
        """
        from reflector.db.transcripts import (
            transcripts,  # noqa: PLC0415 cyclic import
        )
        query = (
            recordings.select()
            .outerjoin(transcripts, recordings.c.id == transcripts.c.recording_id)
            .where(
                recordings.c.bucket_name == bucket_name,
                recordings.c.track_keys.isnot(None),
                or_(
                    transcripts.c.id.is_(None),
                    transcripts.c.status == "error",
                ),
            )
        )
        results = await get_database().fetch_all(query)
        recordings_list = [Recording(**row) for row in results]
        return [r for r in recordings_list if r.is_multitrack]
 recordings_controller = RecordingController()
--- a/server/reflector/db/rooms.py
+++ b/server/reflector/db/rooms.py
@@ -1,7 +1,7 @@
 import secrets
 from datetime import datetime, timezone
 from sqlite3 import IntegrityError
-from typing import Literal
+from typing import Literal, Optional
 import sqlalchemy
 from fastapi import HTTPException
@@ -9,8 +9,7 @@ from pydantic import BaseModel, Field
 from sqlalchemy.sql import false, or_
 from reflector.db import get_database, metadata
-from reflector.schemas.platform import Platform
+from reflector.platform_types import Platform
 from reflector.settings import settings
 from reflector.utils import generate_uuid4
 rooms = sqlalchemy.Table(
@@ -56,12 +55,7 @@ rooms = sqlalchemy.Table(
        "platform",
        sqlalchemy.String,
        nullable=False,
-    ),
+        server_default="whereby",
    sqlalchemy.Column(
        "use_hatchet",
        sqlalchemy.Boolean,
        nullable=False,
        server_default=false(),
    ),
    sqlalchemy.Index("idx_room_is_shared", "is_shared"),
    sqlalchemy.Index("idx_room_ics_enabled", "ics_enabled"),
@@ -90,8 +84,7 @@ class Room(BaseModel):
    ics_enabled: bool = False
    ics_last_sync: datetime | None = None
    ics_last_etag: str | None = None
-    platform: Platform = Field(default_factory=lambda: settings.DEFAULT_VIDEO_PLATFORM)
+    platform: Platform = "whereby"
    use_hatchet: bool = False
 class RoomController:
@@ -145,7 +138,7 @@ class RoomController:
        ics_url: str | None = None,
        ics_fetch_interval: int = 300,
        ics_enabled: bool = False,
-        platform: Platform = settings.DEFAULT_VIDEO_PLATFORM,
+        platform: Optional[Platform] = None,
    ):
        """
        Add a new room
@@ -153,26 +146,24 @@ class RoomController:
        if webhook_url and not webhook_secret:
            webhook_secret = secrets.token_urlsafe(32)
-        room_data = {
+        room = Room(
-            "name": name,
+            name=name,
-            "user_id": user_id,
+            user_id=user_id,
-            "zulip_auto_post": zulip_auto_post,
+            zulip_auto_post=zulip_auto_post,
-            "zulip_stream": zulip_stream,
+            zulip_stream=zulip_stream,
-            "zulip_topic": zulip_topic,
+            zulip_topic=zulip_topic,
-            "is_locked": is_locked,
+            is_locked=is_locked,
-            "room_mode": room_mode,
+            room_mode=room_mode,
-            "recording_type": recording_type,
+            recording_type=recording_type,
-            "recording_trigger": recording_trigger,
+            recording_trigger=recording_trigger,
-            "is_shared": is_shared,
+            is_shared=is_shared,
-            "webhook_url": webhook_url,
+            webhook_url=webhook_url,
-            "webhook_secret": webhook_secret,
+            webhook_secret=webhook_secret,
-            "ics_url": ics_url,
+            ics_url=ics_url,
-            "ics_fetch_interval": ics_fetch_interval,
+            ics_fetch_interval=ics_fetch_interval,
-            "ics_enabled": ics_enabled,
+            ics_enabled=ics_enabled,
-            "platform": platform,
+            platform=platform or "whereby",
-        }
+        )
        room = Room(**room_data)
        query = rooms.insert().values(**room.model_dump())
        try:
            await get_database().execute(query)
--- a/server/reflector/db/search.py
+++ b/server/reflector/db/search.py
@@ -135,8 +135,6 @@ class SearchParameters(BaseModel):
    user_id: str | None = None
    room_id: str | None = None
    source_kind: SourceKind | None = None
    from_datetime: datetime | None = None
    to_datetime: datetime | None = None
 class SearchResultDB(BaseModel):
@@ -404,14 +402,6 @@ class SearchController:
            base_query = base_query.where(
                transcripts.c.source_kind == params.source_kind
            )
        if params.from_datetime:
            base_query = base_query.where(
                transcripts.c.created_at >= params.from_datetime
            )
        if params.to_datetime:
            base_query = base_query.where(
                transcripts.c.created_at <= params.to_datetime
            )
        if params.query_text is not None:
            order_by = sqlalchemy.desc(sqlalchemy.text("rank"))
--- a/server/reflector/db/transcripts.py
+++ b/server/reflector/db/transcripts.py
@@ -21,7 +21,7 @@ from reflector.db.utils import is_postgresql
 from reflector.logger import logger
 from reflector.processors.types import Word as ProcessorWord
 from reflector.settings import settings
-from reflector.storage import get_transcripts_storage
+from reflector.storage import get_recordings_storage, get_transcripts_storage
 from reflector.utils import generate_uuid4
 from reflector.utils.webvtt import topics_to_webvtt
@@ -44,7 +44,6 @@ transcripts = sqlalchemy.Table(
    sqlalchemy.Column("title", sqlalchemy.String),
    sqlalchemy.Column("short_summary", sqlalchemy.String),
    sqlalchemy.Column("long_summary", sqlalchemy.String),
    sqlalchemy.Column("action_items", sqlalchemy.JSON),
    sqlalchemy.Column("topics", sqlalchemy.JSON),
    sqlalchemy.Column("events", sqlalchemy.JSON),
    sqlalchemy.Column("participants", sqlalchemy.JSON),
@@ -84,8 +83,6 @@ transcripts = sqlalchemy.Table(
    sqlalchemy.Column("audio_deleted", sqlalchemy.Boolean),
    sqlalchemy.Column("room_id", sqlalchemy.String),
    sqlalchemy.Column("webvtt", sqlalchemy.Text),
    # Hatchet workflow run ID for resumption of failed workflows
    sqlalchemy.Column("workflow_run_id", sqlalchemy.String),
    sqlalchemy.Index("idx_transcript_recording_id", "recording_id"),
    sqlalchemy.Index("idx_transcript_user_id", "user_id"),
    sqlalchemy.Index("idx_transcript_created_at", "created_at"),
@@ -167,10 +164,6 @@ class TranscriptFinalLongSummary(BaseModel):
    long_summary: str
 class TranscriptActionItems(BaseModel):
    action_items: dict
 class TranscriptFinalTitle(BaseModel):
    title: str
@@ -193,7 +186,6 @@ class TranscriptParticipant(BaseModel):
    id: str = Field(default_factory=generate_uuid4)
    speaker: int | None
    name: str
    user_id: str | None = None
 class Transcript(BaseModel):
@@ -211,7 +203,6 @@ class Transcript(BaseModel):
    locked: bool = False
    short_summary: str | None = None
    long_summary: str | None = None
    action_items: dict | None = None
    topics: list[TranscriptTopic] = []
    events: list[TranscriptEvent] = []
    participants: list[TranscriptParticipant] | None = []
@@ -225,7 +216,6 @@ class Transcript(BaseModel):
    zulip_message_id: int | None = None
    audio_deleted: bool | None = None
    webvtt: str | None = None
    workflow_run_id: str | None = None  # Hatchet workflow run ID for resumption
    @field_serializer("created_at", when_used="json")
    def serialize_datetime(self, dt: datetime) -> str:
@@ -377,12 +367,7 @@ class TranscriptController:
        room_id: str | None = None,
        search_term: str | None = None,
        return_query: bool = False,
-        exclude_columns: list[str] = [
+        exclude_columns: list[str] = ["topics", "events", "participants"],
            "topics",
            "events",
            "participants",
            "action_items",
        ],
    ) -> list[Transcript]:
        """
        Get all transcripts
@@ -638,9 +623,7 @@ class TranscriptController:
                )
                if recording:
                    try:
-                        await get_transcripts_storage().delete_file(
+                        await get_recordings_storage().delete_file(recording.object_key)
                            recording.object_key, bucket=recording.bucket_name
                        )
                    except Exception as e:
                        logger.warning(
                            "Failed to delete recording object from S3",
@@ -742,13 +725,11 @@ class TranscriptController:
        """
        Download audio from storage
        """
-        storage = get_transcripts_storage()
+        transcript.audio_mp3_filename.write_bytes(
-        try:
+            await get_transcripts_storage().get_file(
-            with open(transcript.audio_mp3_filename, "wb") as f:
+                transcript.storage_audio_path,
-                await storage.stream_to_fileobj(transcript.storage_audio_path, f)
+            )
-        except Exception:
+        )
            transcript.audio_mp3_filename.unlink(missing_ok=True)
            raise
    async def upsert_participant(
        self,
--- a/server/reflector/db/user_api_keys.py
+++ b/server/reflector/db/user_api_keys.py
@@ -1,91 +0,0 @@
 import hmac
 import secrets
 from datetime import datetime, timezone
 from hashlib import sha256
 import sqlalchemy
 from pydantic import BaseModel, Field
 from reflector.db import get_database, metadata
 from reflector.settings import settings
 from reflector.utils import generate_uuid4
 from reflector.utils.string import NonEmptyString
 user_api_keys = sqlalchemy.Table(
    "user_api_key",
    metadata,
    sqlalchemy.Column("id", sqlalchemy.String, primary_key=True),
    sqlalchemy.Column("user_id", sqlalchemy.String, nullable=False),
    sqlalchemy.Column("key_hash", sqlalchemy.String, nullable=False),
    sqlalchemy.Column("name", sqlalchemy.String, nullable=True),
    sqlalchemy.Column("created_at", sqlalchemy.DateTime(timezone=True), nullable=False),
    sqlalchemy.Index("idx_user_api_key_hash", "key_hash", unique=True),
    sqlalchemy.Index("idx_user_api_key_user_id", "user_id"),
 )
 class UserApiKey(BaseModel):
    id: NonEmptyString = Field(default_factory=generate_uuid4)
    user_id: NonEmptyString
    key_hash: NonEmptyString
    name: NonEmptyString | None = None
    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
 class UserApiKeyController:
    @staticmethod
    def generate_key() -> NonEmptyString:
        return secrets.token_urlsafe(48)
    @staticmethod
    def hash_key(key: NonEmptyString) -> str:
        return hmac.new(
            settings.SECRET_KEY.encode(), key.encode(), digestmod=sha256
        ).hexdigest()
    @classmethod
    async def create_key(
        cls,
        user_id: NonEmptyString,
        name: NonEmptyString | None = None,
    ) -> tuple[UserApiKey, NonEmptyString]:
        plaintext = cls.generate_key()
        api_key = UserApiKey(
            user_id=user_id,
            key_hash=cls.hash_key(plaintext),
            name=name,
        )
        query = user_api_keys.insert().values(**api_key.model_dump())
        await get_database().execute(query)
        return api_key, plaintext
    @classmethod
    async def verify_key(cls, plaintext_key: NonEmptyString) -> UserApiKey | None:
        key_hash = cls.hash_key(plaintext_key)
        query = user_api_keys.select().where(
            user_api_keys.c.key_hash == key_hash,
        )
        result = await get_database().fetch_one(query)
        return UserApiKey(**result) if result else None
    @staticmethod
    async def list_by_user_id(user_id: NonEmptyString) -> list[UserApiKey]:
        query = (
            user_api_keys.select()
            .where(user_api_keys.c.user_id == user_id)
            .order_by(user_api_keys.c.created_at.desc())
        )
        results = await get_database().fetch_all(query)
        return [UserApiKey(**r) for r in results]
    @staticmethod
    async def delete_key(key_id: NonEmptyString, user_id: NonEmptyString) -> bool:
        query = user_api_keys.delete().where(
            (user_api_keys.c.id == key_id) & (user_api_keys.c.user_id == user_id)
        )
        result = await get_database().execute(query)
        # asyncpg returns None for DELETE, consider it success if no exception
        return result is None or result > 0
 user_api_keys_controller = UserApiKeyController()
--- a/server/reflector/db/users.py
+++ b/server/reflector/db/users.py
@@ -1,98 +0,0 @@
 """User table for storing Authentik user information."""
 from datetime import datetime, timezone
 import sqlalchemy
 from pydantic import BaseModel, Field
 from reflector.db import get_database, metadata
 from reflector.utils import generate_uuid4
 from reflector.utils.string import NonEmptyString
 users = sqlalchemy.Table(
    "user",
    metadata,
    sqlalchemy.Column("id", sqlalchemy.String, primary_key=True),
    sqlalchemy.Column("email", sqlalchemy.String, nullable=False),
    sqlalchemy.Column("authentik_uid", sqlalchemy.String, nullable=False),
    sqlalchemy.Column("created_at", sqlalchemy.DateTime(timezone=True), nullable=False),
    sqlalchemy.Column("updated_at", sqlalchemy.DateTime(timezone=True), nullable=False),
    sqlalchemy.Index("idx_user_authentik_uid", "authentik_uid", unique=True),
    sqlalchemy.Index("idx_user_email", "email", unique=False),
 )
 class User(BaseModel):
    id: NonEmptyString = Field(default_factory=generate_uuid4)
    email: NonEmptyString
    authentik_uid: NonEmptyString
    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
    updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
 class UserController:
    @staticmethod
    async def get_by_id(user_id: NonEmptyString) -> User | None:
        query = users.select().where(users.c.id == user_id)
        result = await get_database().fetch_one(query)
        return User(**result) if result else None
    @staticmethod
    async def get_by_authentik_uid(authentik_uid: NonEmptyString) -> User | None:
        query = users.select().where(users.c.authentik_uid == authentik_uid)
        result = await get_database().fetch_one(query)
        return User(**result) if result else None
    @staticmethod
    async def get_by_email(email: NonEmptyString) -> User | None:
        query = users.select().where(users.c.email == email)
        result = await get_database().fetch_one(query)
        return User(**result) if result else None
    @staticmethod
    async def create_or_update(
        id: NonEmptyString, authentik_uid: NonEmptyString, email: NonEmptyString
    ) -> User:
        existing = await UserController.get_by_authentik_uid(authentik_uid)
        now = datetime.now(timezone.utc)
        if existing:
            query = (
                users.update()
                .where(users.c.authentik_uid == authentik_uid)
                .values(email=email, updated_at=now)
            )
            await get_database().execute(query)
            return User(
                id=existing.id,
                authentik_uid=authentik_uid,
                email=email,
                created_at=existing.created_at,
                updated_at=now,
            )
        else:
            user = User(
                id=id,
                authentik_uid=authentik_uid,
                email=email,
                created_at=now,
                updated_at=now,
            )
            query = users.insert().values(**user.model_dump())
            await get_database().execute(query)
            return user
    @staticmethod
    async def list_all() -> list[User]:
        query = users.select().order_by(users.c.created_at.desc())
        results = await get_database().fetch_all(query)
        return [User(**r) for r in results]
    @staticmethod
    async def get_by_ids(user_ids: list[NonEmptyString]) -> dict[str, User]:
        query = users.select().where(users.c.id.in_(user_ids))
        results = await get_database().fetch_all(query)
        return {user.id: User(**user) for user in results}
 user_controller = UserController()
--- a/server/reflector/hatchet/init.py
+++ b/server/reflector/hatchet/init.py
@@ -1,5 +0,0 @@
 """Hatchet workflow orchestration for Reflector."""
 from reflector.hatchet.client import HatchetClientManager
 __all__ = ["HatchetClientManager"]
--- a/server/reflector/hatchet/broadcast.py
+++ b/server/reflector/hatchet/broadcast.py
@@ -1,98 +0,0 @@
 """WebSocket broadcasting helpers for Hatchet workflows.
 DUPLICATION NOTE: To be kept when Celery is deprecated. Currently dupes Celery logic.
 Provides WebSocket broadcasting for Hatchet that matches Celery's @broadcast_to_sockets
 decorator behavior. Events are broadcast to transcript rooms and user rooms.
 """
 from typing import Any
 import structlog
 from reflector.db.transcripts import Transcript, TranscriptEvent, transcripts_controller
 from reflector.utils.string import NonEmptyString
 from reflector.ws_manager import get_ws_manager
 # Events that should also be sent to user room (matches Celery behavior)
 USER_ROOM_EVENTS = {"STATUS", "FINAL_TITLE", "DURATION"}
 async def broadcast_event(
    transcript_id: NonEmptyString,
    event: TranscriptEvent,
    logger: structlog.BoundLogger,
 ) -> None:
    """Broadcast a TranscriptEvent to WebSocket subscribers.
    Fire-and-forget: errors are logged but don't interrupt workflow execution.
    """
    logger.info(
        "Broadcasting event",
        transcript_id=transcript_id,
        event_type=event.event,
    )
    try:
        ws_manager = get_ws_manager()
        await ws_manager.send_json(
            room_id=f"ts:{transcript_id}",
            message=event.model_dump(mode="json"),
        )
        logger.info(
            "Event sent to transcript room",
            transcript_id=transcript_id,
            event_type=event.event,
        )
        if event.event in USER_ROOM_EVENTS:
            transcript = await transcripts_controller.get_by_id(transcript_id)
            if transcript and transcript.user_id:
                await ws_manager.send_json(
                    room_id=f"user:{transcript.user_id}",
                    message={
                        "event": f"TRANSCRIPT_{event.event}",
                        "data": {"id": transcript_id, **event.data},
                    },
                )
    except Exception as e:
        logger.warning(
            "Failed to broadcast event",
            error=str(e),
            transcript_id=transcript_id,
            event_type=event.event,
        )
 async def set_status_and_broadcast(
    transcript_id: NonEmptyString,
    status: str,
    logger: structlog.BoundLogger,
 ) -> None:
    """Set transcript status and broadcast to WebSocket.
    Wrapper around transcripts_controller.set_status that adds WebSocket broadcasting.
    """
    event = await transcripts_controller.set_status(transcript_id, status)
    if event:
        await broadcast_event(transcript_id, event, logger=logger)
 async def append_event_and_broadcast(
    transcript_id: NonEmptyString,
    transcript: Transcript,
    event_name: str,
    data: Any,
    logger: structlog.BoundLogger,
 ) -> TranscriptEvent:
    """Append event to transcript and broadcast to WebSocket.
    Wrapper around transcripts_controller.append_event that adds WebSocket broadcasting.
    """
    event = await transcripts_controller.append_event(
        transcript=transcript,
        event=event_name,
        data=data,
    )
    await broadcast_event(transcript_id, event, logger=logger)
    return event
--- a/server/reflector/hatchet/client.py
+++ b/server/reflector/hatchet/client.py
@@ -1,111 +0,0 @@
 """Hatchet Python client wrapper.
 Uses singleton pattern because:
 1. Hatchet client maintains persistent gRPC connections for workflow registration
 2. Creating multiple clients would cause registration conflicts and resource leaks
 3. The SDK is designed for a single client instance per process
 4. Tests use `HatchetClientManager.reset()` to isolate state between tests
 """
 import logging
 import threading
 from hatchet_sdk import ClientConfig, Hatchet
 from hatchet_sdk.clients.rest.models import V1TaskStatus
 from reflector.logger import logger
 from reflector.settings import settings
 class HatchetClientManager:
    """Singleton manager for Hatchet client connections.
    See module docstring for rationale. For test isolation, use `reset()`.
    """
    _instance: Hatchet | None = None
    _lock = threading.Lock()
    @classmethod
    def get_client(cls) -> Hatchet:
        """Get or create the Hatchet client (thread-safe singleton)."""
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    if not settings.HATCHET_CLIENT_TOKEN:
                        raise ValueError("HATCHET_CLIENT_TOKEN must be set")
                    # Pass root logger to Hatchet so workflow logs appear in dashboard
                    root_logger = logging.getLogger()
                    cls._instance = Hatchet(
                        debug=settings.HATCHET_DEBUG,
                        config=ClientConfig(logger=root_logger),
                    )
        return cls._instance
    @classmethod
    async def start_workflow(
        cls,
        workflow_name: str,
        input_data: dict,
        additional_metadata: dict | None = None,
    ) -> str:
        """Start a workflow and return the workflow run ID.
        Args:
            workflow_name: Name of the workflow to trigger.
            input_data: Input data for the workflow run.
            additional_metadata: Optional metadata for filtering in dashboard
                (e.g., transcript_id, recording_id).
        """
        client = cls.get_client()
        result = await client.runs.aio_create(
            workflow_name,
            input_data,
            additional_metadata=additional_metadata,
        )
        return result.run.metadata.id
    @classmethod
    async def get_workflow_run_status(cls, workflow_run_id: str) -> V1TaskStatus:
        client = cls.get_client()
        return await client.runs.aio_get_status(workflow_run_id)
    @classmethod
    async def cancel_workflow(cls, workflow_run_id: str) -> None:
        client = cls.get_client()
        await client.runs.aio_cancel(workflow_run_id)
        logger.info("[Hatchet] Cancelled workflow", workflow_run_id=workflow_run_id)
    @classmethod
    async def replay_workflow(cls, workflow_run_id: str) -> None:
        client = cls.get_client()
        await client.runs.aio_replay(workflow_run_id)
        logger.info("[Hatchet] Replaying workflow", workflow_run_id=workflow_run_id)
    @classmethod
    async def can_replay(cls, workflow_run_id: str) -> bool:
        """Check if workflow can be replayed (is FAILED)."""
        try:
            status = await cls.get_workflow_run_status(workflow_run_id)
            return status == V1TaskStatus.FAILED or status == V1TaskStatus.CANCELLED
        except Exception as e:
            logger.warning(
                "[Hatchet] Failed to check replay status",
                workflow_run_id=workflow_run_id,
                error=str(e),
            )
            return False
    @classmethod
    async def get_workflow_status(cls, workflow_run_id: str) -> dict:
        """Get the full workflow run details as dict."""
        client = cls.get_client()
        run = await client.runs.aio_get(workflow_run_id)
        return run.to_dict()
    @classmethod
    def reset(cls) -> None:
        """Reset the client instance (for testing)."""
        with cls._lock:
            cls._instance = None
--- a/server/reflector/hatchet/run_workers.py
+++ b/server/reflector/hatchet/run_workers.py
@@ -1,63 +0,0 @@
 """
 Run Hatchet workers for the diarization pipeline.
 Runs as a separate process, just like Celery workers.
 Usage:
    uv run -m reflector.hatchet.run_workers
    # Or via docker:
    docker compose exec server uv run -m reflector.hatchet.run_workers
 """
 import signal
 import sys
 from reflector.logger import logger
 from reflector.settings import settings
 def main() -> None:
    """Start Hatchet worker polling."""
    if not settings.HATCHET_ENABLED:
        logger.error("HATCHET_ENABLED is False, not starting workers")
        sys.exit(1)
    if not settings.HATCHET_CLIENT_TOKEN:
        logger.error("HATCHET_CLIENT_TOKEN is not set")
        sys.exit(1)
    logger.info(
        "Starting Hatchet workers",
        debug=settings.HATCHET_DEBUG,
    )
    # Import here (not top-level) - workflow modules call HatchetClientManager.get_client()
    # at module level because Hatchet SDK decorators (@workflow.task) bind at import time.
    # Can't use lazy init: decorators need the client object when function is defined.
    from reflector.hatchet.client import HatchetClientManager  # noqa: PLC0415
    from reflector.hatchet.workflows import (  # noqa: PLC0415
        diarization_pipeline,
        track_workflow,
    )
    hatchet = HatchetClientManager.get_client()
    worker = hatchet.worker(
        "reflector-diarization-worker",
        workflows=[diarization_pipeline, track_workflow],
    )
    def shutdown_handler(signum: int, frame) -> None:
        logger.info("Received shutdown signal, stopping workers...")
        # Worker cleanup happens automatically on exit
        sys.exit(0)
    signal.signal(signal.SIGINT, shutdown_handler)
    signal.signal(signal.SIGTERM, shutdown_handler)
    logger.info("Starting Hatchet worker polling...")
    worker.start()
 if __name__ == "__main__":
    main()
--- a/server/reflector/hatchet/workflows/init.py
+++ b/server/reflector/hatchet/workflows/init.py
@@ -1,14 +0,0 @@
 """Hatchet workflow definitions."""
 from reflector.hatchet.workflows.diarization_pipeline import (
    PipelineInput,
    diarization_pipeline,
 )
 from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
 __all__ = [
    "diarization_pipeline",
    "track_workflow",
    "PipelineInput",
    "TrackInput",
 ]
--- a/server/reflector/hatchet/workflows/diarization_pipeline.py
+++ b/server/reflector/hatchet/workflows/diarization_pipeline.py
@@ -1,961 +0,0 @@
 """
 Hatchet main workflow: DiarizationPipeline
 Multitrack diarization pipeline for Daily.co recordings.
 Orchestrates the full processing flow from recording metadata to final transcript.
 Note: This file uses deferred imports (inside functions/tasks) intentionally.
 Hatchet workers run in forked processes; fresh imports per task ensure DB connections
 are not shared across forks, avoiding connection pooling issues.
 """
 import asyncio
 import functools
 import tempfile
 from contextlib import asynccontextmanager
 from datetime import timedelta
 from pathlib import Path
 from typing import Callable
 import httpx
 from hatchet_sdk import Context
 from pydantic import BaseModel
 from reflector.dailyco_api.client import DailyApiClient
 from reflector.hatchet.broadcast import (
    append_event_and_broadcast,
    set_status_and_broadcast,
 )
 from reflector.hatchet.client import HatchetClientManager
 from reflector.hatchet.workflows.models import (
    ConsentResult,
    FinalizeResult,
    MixdownResult,
    PaddedTrackInfo,
    ParticipantsResult,
    ProcessTracksResult,
    RecordingResult,
    SummaryResult,
    TitleResult,
    TopicsResult,
    WaveformResult,
    WebhookResult,
    ZulipResult,
 )
 from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
 from reflector.logger import logger
 from reflector.pipelines import topic_processing
 from reflector.processors import AudioFileWriterProcessor
 from reflector.processors.types import (
    TitleSummary,
    TitleSummaryWithId,
    Word,
 )
 from reflector.processors.types import (
    Transcript as TranscriptType,
 )
 from reflector.settings import settings
 from reflector.storage.storage_aws import AwsStorage
 from reflector.utils.audio_constants import (
    PRESIGNED_URL_EXPIRATION_SECONDS,
    WAVEFORM_SEGMENTS,
 )
 from reflector.utils.audio_mixdown import (
    detect_sample_rate_from_tracks,
    mixdown_tracks_pyav,
 )
 from reflector.utils.audio_waveform import get_audio_waveform
 from reflector.utils.daily import (
    filter_cam_audio_tracks,
    parse_daily_recording_filename,
 )
 from reflector.utils.string import NonEmptyString, assert_non_none_and_non_empty
 from reflector.zulip import post_transcript_notification
 class PipelineInput(BaseModel):
    """Input to trigger the diarization pipeline."""
    recording_id: NonEmptyString
    tracks: list[dict]  # List of {"s3_key": str}
    bucket_name: NonEmptyString
    transcript_id: NonEmptyString
    room_id: NonEmptyString | None = None
 hatchet = HatchetClientManager.get_client()
 diarization_pipeline = hatchet.workflow(
    name="DiarizationPipeline", input_validator=PipelineInput
 )
@asynccontextmanager
 async def fresh_db_connection():
    """Context manager for database connections in Hatchet workers.
    TECH DEBT: Made to make connection fork-aware without changing db code too much.
    The real fix would be making the db module fork-aware instead of bypassing it.
    Current pattern is acceptable given Hatchet's process model.
    """
    import databases  # noqa: PLC0415
    from reflector.db import _database_context  # noqa: PLC0415
    _database_context.set(None)
    db = databases.Database(settings.DATABASE_URL)
    _database_context.set(db)
    await db.connect()
    try:
        yield db
    finally:
        await db.disconnect()
        _database_context.set(None)
 async def set_workflow_error_status(transcript_id: NonEmptyString) -> bool:
    """Set transcript status to 'error' on workflow failure.
    Returns:
        True if status was set successfully, False if failed.
        Failure is logged as CRITICAL since it means transcript may be stuck.
    """
    try:
        async with fresh_db_connection():
            await set_status_and_broadcast(transcript_id, "error", logger=logger)
            return True
    except Exception as e:
        logger.critical(
            "[Hatchet] CRITICAL: Failed to set error status - transcript may be stuck in 'processing'",
            transcript_id=transcript_id,
            error=str(e),
            exc_info=True,
        )
        return False
 def _spawn_storage():
    """Create fresh storage instance."""
    return AwsStorage(
        aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
        aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
        aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
        aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
    )
 def with_error_handling(step_name: str, set_error_status: bool = True) -> Callable:
    """Decorator that handles task failures uniformly.
    Args:
        step_name: Name of the step for logging and progress tracking.
        set_error_status: Whether to set transcript status to 'error' on failure.
    """
    def decorator(func: Callable) -> Callable:
        @functools.wraps(func)
        async def wrapper(input: PipelineInput, ctx: Context):
            try:
                return await func(input, ctx)
            except Exception as e:
                logger.error(
                    f"[Hatchet] {step_name} failed",
                    transcript_id=input.transcript_id,
                    error=str(e),
                    exc_info=True,
                )
                if set_error_status:
                    await set_workflow_error_status(input.transcript_id)
                raise
        return wrapper
    return decorator
@diarization_pipeline.task(execution_timeout=timedelta(seconds=60), retries=3)
@with_error_handling("get_recording")
 async def get_recording(input: PipelineInput, ctx: Context) -> RecordingResult:
    """Fetch recording metadata from Daily.co API."""
    ctx.log(f"get_recording: recording_id={input.recording_id}")
    # Set transcript status to "processing" at workflow start (broadcasts to WebSocket)
    async with fresh_db_connection():
        from reflector.db.transcripts import transcripts_controller  # noqa: PLC0415
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if transcript:
            await set_status_and_broadcast(
                input.transcript_id, "processing", logger=logger
            )
            ctx.log(f"Set transcript status to processing: {input.transcript_id}")
    if not settings.DAILY_API_KEY:
        raise ValueError("DAILY_API_KEY not configured")
    async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
        recording = await client.get_recording(input.recording_id)
    ctx.log(
        f"get_recording complete: room={recording.room_name}, duration={recording.duration}s"
    )
    return RecordingResult(
        id=recording.id,
        mtg_session_id=recording.mtgSessionId,
        duration=recording.duration,
    )
@diarization_pipeline.task(
    parents=[get_recording], execution_timeout=timedelta(seconds=60), retries=3
 )
@with_error_handling("get_participants")
 async def get_participants(input: PipelineInput, ctx: Context) -> ParticipantsResult:
    """Fetch participant list from Daily.co API and update transcript in database."""
    ctx.log(f"get_participants: transcript_id={input.transcript_id}")
    recording = ctx.task_output(get_recording)
    mtg_session_id = recording.mtg_session_id
    async with fresh_db_connection():
        from reflector.db.transcripts import (  # noqa: PLC0415
            TranscriptParticipant,
            transcripts_controller,
        )
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if transcript:
            # Note: title NOT cleared - preserves existing titles
            await transcripts_controller.update(
                transcript,
                {
                    "events": [],
                    "topics": [],
                    "participants": [],
                },
            )
        mtg_session_id = assert_non_none_and_non_empty(
            mtg_session_id, "mtg_session_id is required"
        )
        daily_api_key = assert_non_none_and_non_empty(
            settings.DAILY_API_KEY, "DAILY_API_KEY is required"
        )
        async with DailyApiClient(api_key=daily_api_key) as client:
            participants = await client.get_meeting_participants(mtg_session_id)
        id_to_name = {}
        id_to_user_id = {}
        for p in participants.data:
            if p.user_name:
                id_to_name[p.participant_id] = p.user_name
            if p.user_id:
                id_to_user_id[p.participant_id] = p.user_id
        track_keys = [t["s3_key"] for t in input.tracks]
        cam_audio_keys = filter_cam_audio_tracks(track_keys)
        participants_list = []
        for idx, key in enumerate(cam_audio_keys):
            try:
                parsed = parse_daily_recording_filename(key)
                participant_id = parsed.participant_id
            except ValueError as e:
                logger.error(
                    "Failed to parse Daily recording filename",
                    error=str(e),
                    key=key,
                )
                continue
            default_name = f"Speaker {idx}"
            name = id_to_name.get(participant_id, default_name)
            user_id = id_to_user_id.get(participant_id)
            participant = TranscriptParticipant(
                id=participant_id, speaker=idx, name=name, user_id=user_id
            )
            await transcripts_controller.upsert_participant(transcript, participant)
            participants_list.append(
                {
                    "participant_id": participant_id,
                    "user_name": name,
                    "speaker": idx,
                }
            )
        ctx.log(f"get_participants complete: {len(participants_list)} participants")
    return ParticipantsResult(
        participants=participants_list,
        num_tracks=len(input.tracks),
        source_language=transcript.source_language if transcript else "en",
        target_language=transcript.target_language if transcript else "en",
    )
@diarization_pipeline.task(
    parents=[get_participants], execution_timeout=timedelta(seconds=600), retries=3
 )
@with_error_handling("process_tracks")
 async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksResult:
    """Spawn child workflows for each track (dynamic fan-out)."""
    ctx.log(f"process_tracks: spawning {len(input.tracks)} track workflows")
    participants_result = ctx.task_output(get_participants)
    source_language = participants_result.source_language
    child_coroutines = [
        track_workflow.aio_run(
            TrackInput(
                track_index=i,
                s3_key=track["s3_key"],
                bucket_name=input.bucket_name,
                transcript_id=input.transcript_id,
                language=source_language,
            )
        )
        for i, track in enumerate(input.tracks)
    ]
    results = await asyncio.gather(*child_coroutines)
    target_language = participants_result.target_language
    track_words = []
    padded_tracks = []
    created_padded_files = set()
    for result in results:
        transcribe_result = result.get("transcribe_track", {})
        track_words.append(transcribe_result.get("words", []))
        pad_result = result.get("pad_track", {})
        padded_key = pad_result.get("padded_key")
        bucket_name = pad_result.get("bucket_name")
        # Store S3 key info (not presigned URL) - consumer tasks presign on demand
        if padded_key:
            padded_tracks.append(
                PaddedTrackInfo(key=padded_key, bucket_name=bucket_name)
            )
        track_index = pad_result.get("track_index")
        if pad_result.get("size", 0) > 0 and track_index is not None:
            storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{track_index}.webm"
            created_padded_files.add(storage_path)
    all_words = [word for words in track_words for word in words]
    all_words.sort(key=lambda w: w.get("start", 0))
    ctx.log(
        f"process_tracks complete: {len(all_words)} words from {len(input.tracks)} tracks"
    )
    return ProcessTracksResult(
        all_words=all_words,
        padded_tracks=padded_tracks,
        word_count=len(all_words),
        num_tracks=len(input.tracks),
        target_language=target_language,
        created_padded_files=list(created_padded_files),
    )
@diarization_pipeline.task(
    parents=[process_tracks], execution_timeout=timedelta(seconds=300), retries=3
 )
@with_error_handling("mixdown_tracks")
 async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
    """Mix all padded tracks into single audio file using PyAV (same as Celery)."""
    ctx.log("mixdown_tracks: mixing padded tracks into single audio file")
    track_result = ctx.task_output(process_tracks)
    padded_tracks = track_result.padded_tracks
    # TODO think of NonEmpty type to avoid those checks, e.g. sized.NonEmpty from https://github.com/antonagestam/phantom-types/
    if not padded_tracks:
        raise ValueError("No padded tracks to mixdown")
    storage = _spawn_storage()
    # Presign URLs on demand (avoids stale URLs on workflow replay)
    padded_urls = []
    for track_info in padded_tracks:
        if track_info.key:
            url = await storage.get_file_url(
                track_info.key,
                operation="get_object",
                expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
                bucket=track_info.bucket_name,
            )
            padded_urls.append(url)
    valid_urls = [url for url in padded_urls if url]
    if not valid_urls:
        raise ValueError("No valid padded tracks to mixdown")
    target_sample_rate = detect_sample_rate_from_tracks(valid_urls, logger=logger)
    if not target_sample_rate:
        logger.error("Mixdown failed - no decodable audio frames found")
        raise ValueError("No decodable audio frames in any track")
    output_path = tempfile.mktemp(suffix=".mp3")
    duration_ms_callback_capture_container = [0.0]
    async def capture_duration(d):
        duration_ms_callback_capture_container[0] = d
    writer = AudioFileWriterProcessor(path=output_path, on_duration=capture_duration)
    await mixdown_tracks_pyav(
        valid_urls,
        writer,
        target_sample_rate,
        offsets_seconds=None,
        logger=logger,
    )
    await writer.flush()
    file_size = Path(output_path).stat().st_size
    storage_path = f"{input.transcript_id}/audio.mp3"
    with open(output_path, "rb") as mixed_file:
        await storage.put_file(storage_path, mixed_file)
    Path(output_path).unlink(missing_ok=True)
    async with fresh_db_connection():
        from reflector.db.transcripts import transcripts_controller  # noqa: PLC0415
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if transcript:
            await transcripts_controller.update(
                transcript, {"audio_location": "storage"}
            )
    ctx.log(f"mixdown_tracks complete: uploaded {file_size} bytes to {storage_path}")
    return MixdownResult(
        audio_key=storage_path,
        duration=duration_ms_callback_capture_container[0],
        tracks_mixed=len(valid_urls),
    )
@diarization_pipeline.task(
    parents=[mixdown_tracks], execution_timeout=timedelta(seconds=120), retries=3
 )
@with_error_handling("generate_waveform")
 async def generate_waveform(input: PipelineInput, ctx: Context) -> WaveformResult:
    """Generate audio waveform visualization using AudioWaveformProcessor (matches Celery)."""
    ctx.log(f"generate_waveform: transcript_id={input.transcript_id}")
    from reflector.db.transcripts import (  # noqa: PLC0415
        TranscriptWaveform,
        transcripts_controller,
    )
    mixdown_result = ctx.task_output(mixdown_tracks)
    audio_key = mixdown_result.audio_key
    storage = _spawn_storage()
    audio_url = await storage.get_file_url(
        audio_key,
        operation="get_object",
        expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
    )
    # Download MP3 to temp file (AudioWaveformProcessor needs local file)
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
        temp_path = temp_file.name
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(audio_url, timeout=120)
            response.raise_for_status()
            with open(temp_path, "wb") as f:
                f.write(response.content)
        waveform = get_audio_waveform(
            path=Path(temp_path), segments_count=WAVEFORM_SEGMENTS
        )
        async with fresh_db_connection():
            transcript = await transcripts_controller.get_by_id(input.transcript_id)
            if transcript:
                waveform_data = TranscriptWaveform(waveform=waveform)
                await append_event_and_broadcast(
                    input.transcript_id,
                    transcript,
                    "WAVEFORM",
                    waveform_data,
                    logger=logger,
                )
    finally:
        Path(temp_path).unlink(missing_ok=True)
    ctx.log("generate_waveform complete")
    return WaveformResult(waveform_generated=True)
@diarization_pipeline.task(
    parents=[mixdown_tracks], execution_timeout=timedelta(seconds=300), retries=3
 )
@with_error_handling("detect_topics")
 async def detect_topics(input: PipelineInput, ctx: Context) -> TopicsResult:
    """Detect topics using LLM and save to database (matches Celery on_topic callback)."""
    ctx.log("detect_topics: analyzing transcript for topics")
    track_result = ctx.task_output(process_tracks)
    words = track_result.all_words
    target_language = track_result.target_language
    from reflector.db.transcripts import (  # noqa: PLC0415
        TranscriptTopic,
        transcripts_controller,
    )
    word_objects = [Word(**w) for w in words]
    transcript_type = TranscriptType(words=word_objects)
    empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
    async with fresh_db_connection():
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        async def on_topic_callback(data):
            topic = TranscriptTopic(
                title=data.title,
                summary=data.summary,
                timestamp=data.timestamp,
                transcript=data.transcript.text,
                words=data.transcript.words,
            )
            if isinstance(
                data, TitleSummaryWithId
            ):  # Celery parity: main_live_pipeline.py
                topic.id = data.id
            await transcripts_controller.upsert_topic(transcript, topic)
            await append_event_and_broadcast(
                input.transcript_id, transcript, "TOPIC", topic, logger=logger
            )
        topics = await topic_processing.detect_topics(
            transcript_type,
            target_language,
            on_topic_callback=on_topic_callback,
            empty_pipeline=empty_pipeline,
        )
    topics_list = [t.model_dump() for t in topics]
    ctx.log(f"detect_topics complete: found {len(topics_list)} topics")
    return TopicsResult(topics=topics_list)
@diarization_pipeline.task(
    parents=[detect_topics], execution_timeout=timedelta(seconds=120), retries=3
 )
@with_error_handling("generate_title")
 async def generate_title(input: PipelineInput, ctx: Context) -> TitleResult:
    """Generate meeting title using LLM and save to database (matches Celery on_title callback)."""
    ctx.log("generate_title: generating title from topics")
    topics_result = ctx.task_output(detect_topics)
    topics = topics_result.topics
    from reflector.db.transcripts import (  # noqa: PLC0415
        TranscriptFinalTitle,
        transcripts_controller,
    )
    topic_objects = [TitleSummary(**t) for t in topics]
    empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
    title_result = None
    async with fresh_db_connection():
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        async def on_title_callback(data):
            nonlocal title_result
            title_result = data.title
            final_title = TranscriptFinalTitle(title=data.title)
            if not transcript.title:
                await transcripts_controller.update(
                    transcript,
                    {"title": final_title.title},
                )
            await append_event_and_broadcast(
                input.transcript_id,
                transcript,
                "FINAL_TITLE",
                final_title,
                logger=logger,
            )
        await topic_processing.generate_title(
            topic_objects,
            on_title_callback=on_title_callback,
            empty_pipeline=empty_pipeline,
            logger=logger,
        )
    ctx.log(f"generate_title complete: '{title_result}'")
    return TitleResult(title=title_result)
@diarization_pipeline.task(
    parents=[detect_topics], execution_timeout=timedelta(seconds=300), retries=3
 )
@with_error_handling("generate_summary")
 async def generate_summary(input: PipelineInput, ctx: Context) -> SummaryResult:
    """Generate meeting summary using LLM and save to database (matches Celery callbacks)."""
    ctx.log("generate_summary: generating long and short summaries")
    topics_result = ctx.task_output(detect_topics)
    topics = topics_result.topics
    from reflector.db.transcripts import (  # noqa: PLC0415
        TranscriptActionItems,
        TranscriptFinalLongSummary,
        TranscriptFinalShortSummary,
        transcripts_controller,
    )
    topic_objects = [TitleSummary(**t) for t in topics]
    empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
    summary_result = None
    short_summary_result = None
    action_items_result = None
    async with fresh_db_connection():
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        async def on_long_summary_callback(data):
            nonlocal summary_result
            summary_result = data.long_summary
            final_long_summary = TranscriptFinalLongSummary(
                long_summary=data.long_summary
            )
            await transcripts_controller.update(
                transcript,
                {"long_summary": final_long_summary.long_summary},
            )
            await append_event_and_broadcast(
                input.transcript_id,
                transcript,
                "FINAL_LONG_SUMMARY",
                final_long_summary,
                logger=logger,
            )
        async def on_short_summary_callback(data):
            nonlocal short_summary_result
            short_summary_result = data.short_summary
            final_short_summary = TranscriptFinalShortSummary(
                short_summary=data.short_summary
            )
            await transcripts_controller.update(
                transcript,
                {"short_summary": final_short_summary.short_summary},
            )
            await append_event_and_broadcast(
                input.transcript_id,
                transcript,
                "FINAL_SHORT_SUMMARY",
                final_short_summary,
                logger=logger,
            )
        async def on_action_items_callback(data):
            nonlocal action_items_result
            action_items_result = data.action_items
            action_items = TranscriptActionItems(action_items=data.action_items)
            await transcripts_controller.update(
                transcript,
                {"action_items": action_items.action_items},
            )
            await append_event_and_broadcast(
                input.transcript_id,
                transcript,
                "ACTION_ITEMS",
                action_items,
                logger=logger,
            )
        await topic_processing.generate_summaries(
            topic_objects,
            transcript,
            on_long_summary_callback=on_long_summary_callback,
            on_short_summary_callback=on_short_summary_callback,
            on_action_items_callback=on_action_items_callback,
            empty_pipeline=empty_pipeline,
            logger=logger,
        )
    ctx.log("generate_summary complete")
    return SummaryResult(
        summary=summary_result,
        short_summary=short_summary_result,
        action_items=action_items_result,
    )
@diarization_pipeline.task(
    parents=[generate_waveform, generate_title, generate_summary],
    execution_timeout=timedelta(seconds=60),
    retries=3,
 )
@with_error_handling("finalize")
 async def finalize(input: PipelineInput, ctx: Context) -> FinalizeResult:
    """Finalize transcript: save words, emit TRANSCRIPT event, set status to 'ended'.
    Matches Celery's on_transcript + set_status behavior.
    Note: Title and summaries are already saved by their respective task callbacks.
    """
    ctx.log("finalize: saving transcript and setting status to 'ended'")
    mixdown_result = ctx.task_output(mixdown_tracks)
    track_result = ctx.task_output(process_tracks)
    duration = mixdown_result.duration
    all_words = track_result.all_words
    # Cleanup temporary padded S3 files (deferred until finalize for semantic parity with Celery)
    created_padded_files = track_result.created_padded_files
    if created_padded_files:
        ctx.log(f"Cleaning up {len(created_padded_files)} temporary S3 files")
        storage = _spawn_storage()
        cleanup_results = await asyncio.gather(
            *[storage.delete_file(path) for path in created_padded_files],
            return_exceptions=True,
        )
        for storage_path, result in zip(created_padded_files, cleanup_results):
            if isinstance(result, Exception):
                logger.warning(
                    "[Hatchet] Failed to cleanup temporary padded track",
                    storage_path=storage_path,
                    error=str(result),
                )
    async with fresh_db_connection():
        from reflector.db.transcripts import (  # noqa: PLC0415
            TranscriptDuration,
            TranscriptText,
            transcripts_controller,
        )
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if transcript is None:
            raise ValueError(f"Transcript {input.transcript_id} not found in database")
        word_objects = [Word(**w) for w in all_words]
        merged_transcript = TranscriptType(words=word_objects, translation=None)
        await append_event_and_broadcast(
            input.transcript_id,
            transcript,
            "TRANSCRIPT",
            TranscriptText(
                text=merged_transcript.text,
                translation=merged_transcript.translation,
            ),
            logger=logger,
        )
        # Save duration and clear workflow_run_id (workflow completed successfully)
        # Note: title/long_summary/short_summary already saved by their callbacks
        await transcripts_controller.update(
            transcript,
            {
                "duration": duration,
                "workflow_run_id": None,  # Clear on success - no need to resume
            },
        )
        duration_data = TranscriptDuration(duration=duration)
        await append_event_and_broadcast(
            input.transcript_id, transcript, "DURATION", duration_data, logger=logger
        )
        await set_status_and_broadcast(input.transcript_id, "ended", logger=logger)
        ctx.log(
            f"finalize complete: transcript {input.transcript_id} status set to 'ended'"
        )
    return FinalizeResult(status="COMPLETED")
@diarization_pipeline.task(
    parents=[finalize], execution_timeout=timedelta(seconds=60), retries=3
 )
@with_error_handling("cleanup_consent", set_error_status=False)
 async def cleanup_consent(input: PipelineInput, ctx: Context) -> ConsentResult:
    """Check consent and delete audio files if any participant denied."""
    ctx.log(f"cleanup_consent: transcript_id={input.transcript_id}")
    async with fresh_db_connection():
        from reflector.db.meetings import (  # noqa: PLC0415
            meeting_consent_controller,
            meetings_controller,
        )
        from reflector.db.recordings import recordings_controller  # noqa: PLC0415
        from reflector.db.transcripts import transcripts_controller  # noqa: PLC0415
        from reflector.storage import get_transcripts_storage  # noqa: PLC0415
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if not transcript:
            ctx.log("cleanup_consent: transcript not found")
            return ConsentResult()
        consent_denied = False
        if transcript.meeting_id:
            meeting = await meetings_controller.get_by_id(transcript.meeting_id)
            if meeting:
                consent_denied = await meeting_consent_controller.has_any_denial(
                    meeting.id
                )
        if not consent_denied:
            ctx.log("cleanup_consent: consent approved, keeping all files")
            return ConsentResult()
        ctx.log("cleanup_consent: consent denied, deleting audio files")
        input_track_keys = set(t["s3_key"] for t in input.tracks)
        # Detect if recording.track_keys was manually modified after workflow started
        if transcript.recording_id:
            recording = await recordings_controller.get_by_id(transcript.recording_id)
            if recording and recording.track_keys:
                db_track_keys = set(filter_cam_audio_tracks(recording.track_keys))
                if input_track_keys != db_track_keys:
                    added = db_track_keys - input_track_keys
                    removed = input_track_keys - db_track_keys
                    logger.warning(
                        "[Hatchet] Track keys mismatch: DB changed since workflow start",
                        transcript_id=input.transcript_id,
                        recording_id=transcript.recording_id,
                        input_count=len(input_track_keys),
                        db_count=len(db_track_keys),
                        added_in_db=list(added) if added else None,
                        removed_from_db=list(removed) if removed else None,
                    )
                    ctx.log(
                        f"WARNING: track_keys mismatch - "
                        f"input has {len(input_track_keys)}, DB has {len(db_track_keys)}. "
                        f"Using input tracks for deletion."
                    )
        deletion_errors = []
        if input_track_keys and input.bucket_name:
            master_storage = get_transcripts_storage()
            for key in input_track_keys:
                try:
                    await master_storage.delete_file(key, bucket=input.bucket_name)
                    ctx.log(f"Deleted recording file: {input.bucket_name}/{key}")
                except Exception as e:
                    error_msg = f"Failed to delete {key}: {e}"
                    logger.error(error_msg, exc_info=True)
                    deletion_errors.append(error_msg)
        if transcript.audio_location == "storage":
            storage = get_transcripts_storage()
            try:
                await storage.delete_file(transcript.storage_audio_path)
                ctx.log(f"Deleted processed audio: {transcript.storage_audio_path}")
            except Exception as e:
                error_msg = f"Failed to delete processed audio: {e}"
                logger.error(error_msg, exc_info=True)
                deletion_errors.append(error_msg)
        if deletion_errors:
            logger.warning(
                "[Hatchet] cleanup_consent completed with errors",
                transcript_id=input.transcript_id,
                error_count=len(deletion_errors),
                errors=deletion_errors,
            )
            ctx.log(f"cleanup_consent completed with {len(deletion_errors)} errors")
        else:
            await transcripts_controller.update(transcript, {"audio_deleted": True})
            ctx.log("cleanup_consent: all audio deleted successfully")
    return ConsentResult()
@diarization_pipeline.task(
    parents=[cleanup_consent], execution_timeout=timedelta(seconds=60), retries=5
 )
@with_error_handling("post_zulip", set_error_status=False)
 async def post_zulip(input: PipelineInput, ctx: Context) -> ZulipResult:
    """Post notification to Zulip."""
    ctx.log(f"post_zulip: transcript_id={input.transcript_id}")
    if not settings.ZULIP_REALM:
        ctx.log("post_zulip skipped (Zulip not configured)")
        return ZulipResult(zulip_message_id=None, skipped=True)
    async with fresh_db_connection():
        from reflector.db.transcripts import transcripts_controller  # noqa: PLC0415
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if transcript:
            message_id = await post_transcript_notification(transcript)
            ctx.log(f"post_zulip complete: zulip_message_id={message_id}")
        else:
            message_id = None
    return ZulipResult(zulip_message_id=message_id)
@diarization_pipeline.task(
    parents=[post_zulip], execution_timeout=timedelta(seconds=120), retries=30
 )
@with_error_handling("send_webhook", set_error_status=False)
 async def send_webhook(input: PipelineInput, ctx: Context) -> WebhookResult:
    """Send completion webhook to external service."""
    ctx.log(f"send_webhook: transcript_id={input.transcript_id}")
    if not input.room_id:
        ctx.log("send_webhook skipped (no room_id)")
        return WebhookResult(webhook_sent=False, skipped=True)
    async with fresh_db_connection():
        from reflector.db.rooms import rooms_controller  # noqa: PLC0415
        from reflector.db.transcripts import transcripts_controller  # noqa: PLC0415
        room = await rooms_controller.get_by_id(input.room_id)
        transcript = await transcripts_controller.get_by_id(input.transcript_id)
        if room and room.webhook_url and transcript:
            webhook_payload = {
                "event": "transcript.completed",
                "transcript_id": input.transcript_id,
                "title": transcript.title,
                "duration": transcript.duration,
            }
            async with httpx.AsyncClient() as client:
                response = await client.post(
                    room.webhook_url, json=webhook_payload, timeout=30
                )
                response.raise_for_status()
            ctx.log(f"send_webhook complete: status_code={response.status_code}")
            return WebhookResult(webhook_sent=True, response_code=response.status_code)
    return WebhookResult(webhook_sent=False, skipped=True)
--- a/server/reflector/hatchet/workflows/models.py
+++ b/server/reflector/hatchet/workflows/models.py
@@ -1,124 +0,0 @@
 """
 Pydantic models for Hatchet workflow task return types.
 Provides static typing for all task outputs, enabling type checking
 and better IDE support.
 """
 from typing import Any
 from pydantic import BaseModel
 from reflector.utils.string import NonEmptyString
 class PadTrackResult(BaseModel):
    """Result from pad_track task."""
    padded_key: NonEmptyString  # S3 key (not presigned URL) - presign on demand to avoid stale URLs on replay
    bucket_name: (
        NonEmptyString | None
    )  # None means use default transcript storage bucket
    size: int
    track_index: int
 class TranscribeTrackResult(BaseModel):
    """Result from transcribe_track task."""
    words: list[dict[str, Any]]
    track_index: int
 class RecordingResult(BaseModel):
    """Result from get_recording task."""
    id: NonEmptyString | None
    mtg_session_id: NonEmptyString | None
    duration: float
 class ParticipantsResult(BaseModel):
    """Result from get_participants task."""
    participants: list[dict[str, Any]]
    num_tracks: int
    source_language: NonEmptyString
    target_language: NonEmptyString
 class PaddedTrackInfo(BaseModel):
    """Info for a padded track - S3 key + bucket for on-demand presigning."""
    key: NonEmptyString
    bucket_name: NonEmptyString | None  # None = use default storage bucket
 class ProcessTracksResult(BaseModel):
    """Result from process_tracks task."""
    all_words: list[dict[str, Any]]
    padded_tracks: list[PaddedTrackInfo]  # S3 keys, not presigned URLs
    word_count: int
    num_tracks: int
    target_language: NonEmptyString
    created_padded_files: list[NonEmptyString]
 class MixdownResult(BaseModel):
    """Result from mixdown_tracks task."""
    audio_key: NonEmptyString
    duration: float
    tracks_mixed: int
 class WaveformResult(BaseModel):
    """Result from generate_waveform task."""
    waveform_generated: bool
 class TopicsResult(BaseModel):
    """Result from detect_topics task."""
    topics: list[dict[str, Any]]
 class TitleResult(BaseModel):
    """Result from generate_title task."""
    title: str | None
 class SummaryResult(BaseModel):
    """Result from generate_summary task."""
    summary: str | None
    short_summary: str | None
    action_items: dict | None = None
 class FinalizeResult(BaseModel):
    """Result from finalize task."""
    status: NonEmptyString
 class ConsentResult(BaseModel):
    """Result from cleanup_consent task."""
 class ZulipResult(BaseModel):
    """Result from post_zulip task."""
    zulip_message_id: int | None = None
    skipped: bool = False
 class WebhookResult(BaseModel):
    """Result from send_webhook task."""
    webhook_sent: bool
    skipped: bool = False
    response_code: int | None = None
--- a/server/reflector/hatchet/workflows/track_processing.py
+++ b/server/reflector/hatchet/workflows/track_processing.py
@@ -1,222 +0,0 @@
 """
 Hatchet child workflow: TrackProcessing
 Handles individual audio track processing: padding and transcription.
 Spawned dynamically by the main diarization pipeline for each track.
 Architecture note: This is a separate workflow (not inline tasks in DiarizationPipeline)
 because Hatchet workflow DAGs are defined statically, but the number of tracks varies
 at runtime. Child workflow spawning via `aio_run()` + `asyncio.gather()` is the
 standard pattern for dynamic fan-out. See `process_tracks` in diarization_pipeline.py.
 Note: This file uses deferred imports (inside tasks) intentionally.
 Hatchet workers run in forked processes; fresh imports per task ensure
 storage/DB connections are not shared across forks.
 """
 import tempfile
 from datetime import timedelta
 from pathlib import Path
 import av
 from hatchet_sdk import Context
 from pydantic import BaseModel
 from reflector.hatchet.client import HatchetClientManager
 from reflector.hatchet.workflows.models import PadTrackResult, TranscribeTrackResult
 from reflector.logger import logger
 from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
 from reflector.utils.audio_padding import (
    apply_audio_padding_to_file,
    extract_stream_start_time_from_container,
 )
 class TrackInput(BaseModel):
    """Input for individual track processing."""
    track_index: int
    s3_key: str
    bucket_name: str
    transcript_id: str
    language: str = "en"
 hatchet = HatchetClientManager.get_client()
 track_workflow = hatchet.workflow(name="TrackProcessing", input_validator=TrackInput)
@track_workflow.task(execution_timeout=timedelta(seconds=300), retries=3)
 async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
    """Pad single audio track with silence for alignment.
    Extracts stream.start_time from WebM container metadata and applies
    silence padding using PyAV filter graph (adelay).
    """
    ctx.log(f"pad_track: track {input.track_index}, s3_key={input.s3_key}")
    logger.info(
        "[Hatchet] pad_track",
        track_index=input.track_index,
        s3_key=input.s3_key,
        transcript_id=input.transcript_id,
    )
    try:
        # Create fresh storage instance to avoid aioboto3 fork issues
        from reflector.settings import settings  # noqa: PLC0415
        from reflector.storage.storage_aws import AwsStorage  # noqa: PLC0415
        storage = AwsStorage(
            aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
            aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
            aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
            aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
        )
        source_url = await storage.get_file_url(
            input.s3_key,
            operation="get_object",
            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
            bucket=input.bucket_name,
        )
        with av.open(source_url) as in_container:
            start_time_seconds = extract_stream_start_time_from_container(
                in_container, input.track_index, logger=logger
            )
            # If no padding needed, return original S3 key
            if start_time_seconds <= 0:
                logger.info(
                    f"Track {input.track_index} requires no padding",
                    track_index=input.track_index,
                )
                return PadTrackResult(
                    padded_key=input.s3_key,
                    bucket_name=input.bucket_name,
                    size=0,
                    track_index=input.track_index,
                )
            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
                temp_path = temp_file.name
            try:
                apply_audio_padding_to_file(
                    in_container,
                    temp_path,
                    start_time_seconds,
                    input.track_index,
                    logger=logger,
                )
                file_size = Path(temp_path).stat().st_size
                storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
                logger.info(
                    f"About to upload padded track",
                    key=storage_path,
                    size=file_size,
                )
                with open(temp_path, "rb") as padded_file:
                    await storage.put_file(storage_path, padded_file)
                logger.info(
                    f"Uploaded padded track to S3",
                    key=storage_path,
                    size=file_size,
                )
            finally:
                Path(temp_path).unlink(missing_ok=True)
        ctx.log(f"pad_track complete: track {input.track_index} -> {storage_path}")
        logger.info(
            "[Hatchet] pad_track complete",
            track_index=input.track_index,
            padded_key=storage_path,
        )
        # Return S3 key (not presigned URL) - consumer tasks presign on demand
        # This avoids stale URLs when workflow is replayed
        return PadTrackResult(
            padded_key=storage_path,
            bucket_name=None,  # None = use default transcript storage bucket
            size=file_size,
            track_index=input.track_index,
        )
    except Exception as e:
        logger.error("[Hatchet] pad_track failed", error=str(e), exc_info=True)
        raise
@track_workflow.task(
    parents=[pad_track], execution_timeout=timedelta(seconds=600), retries=3
 )
 async def transcribe_track(input: TrackInput, ctx: Context) -> TranscribeTrackResult:
    """Transcribe audio track using GPU (Modal.com) or local Whisper."""
    ctx.log(f"transcribe_track: track {input.track_index}, language={input.language}")
    logger.info(
        "[Hatchet] transcribe_track",
        track_index=input.track_index,
        language=input.language,
    )
    try:
        pad_result = ctx.task_output(pad_track)
        padded_key = pad_result.padded_key
        bucket_name = pad_result.bucket_name
        if not padded_key:
            raise ValueError("Missing padded_key from pad_track")
        # Presign URL on demand (avoids stale URLs on workflow replay)
        from reflector.settings import settings  # noqa: PLC0415
        from reflector.storage.storage_aws import AwsStorage  # noqa: PLC0415
        storage = AwsStorage(
            aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
            aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
            aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
            aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
        )
        audio_url = await storage.get_file_url(
            padded_key,
            operation="get_object",
            expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
            bucket=bucket_name,
        )
        from reflector.pipelines.transcription_helpers import (  # noqa: PLC0415
            transcribe_file_with_processor,
        )
        transcript = await transcribe_file_with_processor(audio_url, input.language)
        # Tag all words with speaker index
        words = []
        for word in transcript.words:
            word_dict = word.model_dump()
            word_dict["speaker"] = input.track_index
            words.append(word_dict)
        ctx.log(
            f"transcribe_track complete: track {input.track_index}, {len(words)} words"
        )
        logger.info(
            "[Hatchet] transcribe_track complete",
            track_index=input.track_index,
            word_count=len(words),
        )
        return TranscribeTrackResult(
            words=words,
            track_index=input.track_index,
        )
    except Exception as e:
        logger.error("[Hatchet] transcribe_track failed", error=str(e), exc_info=True)
        raise
--- a/server/reflector/llm.py
+++ b/server/reflector/llm.py
@@ -1,32 +1,13 @@
-import logging
+from typing import Type, TypeVar
 from contextvars import ContextVar
 from typing import Generic, Type, TypeVar
 from uuid import uuid4
 from llama_index.core import Settings
 from llama_index.core.output_parsers import PydanticOutputParser
 from llama_index.core.program import LLMTextCompletionProgram
 from llama_index.core.response_synthesizers import TreeSummarize
 from llama_index.core.workflow import (
    Context,
    Event,
    StartEvent,
    StopEvent,
    Workflow,
    step,
 )
 from llama_index.llms.openai_like import OpenAILike
-from pydantic import BaseModel, ValidationError
+from pydantic import BaseModel
 from workflows.errors import WorkflowTimeoutError
 from reflector.utils.retry import retry
 T = TypeVar("T", bound=BaseModel)
 OutputT = TypeVar("OutputT", bound=BaseModel)
 # Session ID for LiteLLM request grouping - set per processing run
 llm_session_id: ContextVar[str | None] = ContextVar("llm_session_id", default=None)
 logger = logging.getLogger(__name__)
 STRUCTURED_RESPONSE_PROMPT_TEMPLATE = """
 Based on the following analysis, provide the information in the requested JSON format:
@@ -38,158 +19,6 @@ Analysis:
 """
 class LLMParseError(Exception):
    """Raised when LLM output cannot be parsed after retries."""
    def __init__(self, output_cls: Type[BaseModel], error_msg: str, attempts: int):
        self.output_cls = output_cls
        self.error_msg = error_msg
        self.attempts = attempts
        super().__init__(
            f"Failed to parse {output_cls.__name__} after {attempts} attempts: {error_msg}"
        )
 class ExtractionDone(Event):
    """Event emitted when LLM JSON formatting completes."""
    output: str
 class ValidationErrorEvent(Event):
    """Event emitted when validation fails."""
    error: str
    wrong_output: str
 class StructuredOutputWorkflow(Workflow, Generic[OutputT]):
    """Workflow for structured output extraction with validation retry.
    This workflow handles parse/validation retries only. Network error retries
    are handled internally by Settings.llm (OpenAILike max_retries=3).
    The caller should NOT wrap this workflow in additional retry logic.
    """
    def __init__(
        self,
        output_cls: Type[OutputT],
        max_retries: int = 3,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.output_cls: Type[OutputT] = output_cls
        self.max_retries = max_retries
        self.output_parser = PydanticOutputParser(output_cls)
    @step
    async def extract(
        self, ctx: Context, ev: StartEvent | ValidationErrorEvent
    ) -> StopEvent | ExtractionDone:
        """Extract structured data from text using two-step LLM process.
        Step 1 (first call only): TreeSummarize generates text analysis
        Step 2 (every call): Settings.llm.acomplete formats analysis as JSON
        """
        current_retries = await ctx.store.get("retries", default=0)
        await ctx.store.set("retries", current_retries + 1)
        if current_retries >= self.max_retries:
            last_error = await ctx.store.get("last_error", default=None)
            logger.error(
                f"Max retries ({self.max_retries}) reached for {self.output_cls.__name__}"
            )
            return StopEvent(result={"error": last_error, "attempts": current_retries})
        if isinstance(ev, StartEvent):
            # First call: run TreeSummarize to get analysis, store in context
            prompt = ev.get("prompt")
            texts = ev.get("texts")
            tone_name = ev.get("tone_name")
            if not prompt or not isinstance(texts, list):
                raise ValueError(
                    "StartEvent must contain 'prompt' (str) and 'texts' (list)"
                )
            summarizer = TreeSummarize(verbose=False)
            analysis = await summarizer.aget_response(
                prompt, texts, tone_name=tone_name
            )
            await ctx.store.set("analysis", str(analysis))
            reflection = ""
        else:
            # Retry: reuse analysis from context
            analysis = await ctx.store.get("analysis")
            if not analysis:
                raise RuntimeError("Internal error: analysis not found in context")
            wrong_output = ev.wrong_output
            if len(wrong_output) > 2000:
                wrong_output = wrong_output[:2000] + "... [truncated]"
            reflection = (
                f"\n\nYour previous response could not be parsed:\n{wrong_output}\n\n"
                f"Error:\n{ev.error}\n\n"
                "Please try again. Return ONLY valid JSON matching the schema above, "
                "with no markdown formatting or extra text."
            )
        # Step 2: Format analysis as JSON using LLM completion
        format_instructions = self.output_parser.format(
            "Please structure the above information in the following JSON format:"
        )
        json_prompt = STRUCTURED_RESPONSE_PROMPT_TEMPLATE.format(
            analysis=analysis,
            format_instructions=format_instructions + reflection,
        )
        # Network retries handled by OpenAILike (max_retries=3)
        response = await Settings.llm.acomplete(json_prompt)
        return ExtractionDone(output=response.text)
    @step
    async def validate(
        self, ctx: Context, ev: ExtractionDone
    ) -> StopEvent | ValidationErrorEvent:
        """Validate extracted output against Pydantic schema."""
        raw_output = ev.output
        retries = await ctx.store.get("retries", default=0)
        try:
            parsed = self.output_parser.parse(raw_output)
            if retries > 1:
                logger.info(
                    f"LLM parse succeeded on attempt {retries}/{self.max_retries} "
                    f"for {self.output_cls.__name__}"
                )
            return StopEvent(result={"success": parsed})
        except (ValidationError, ValueError) as e:
            error_msg = self._format_error(e, raw_output)
            await ctx.store.set("last_error", error_msg)
            logger.error(
                f"LLM parse error (attempt {retries}/{self.max_retries}): "
                f"{type(e).__name__}: {e}\nRaw response: {raw_output[:500]}"
            )
            return ValidationErrorEvent(
                error=error_msg,
                wrong_output=raw_output,
            )
    def _format_error(self, error: Exception, raw_output: str) -> str:
        """Format error for LLM feedback."""
        if isinstance(error, ValidationError):
            error_messages = []
            for err in error.errors():
                field = ".".join(str(loc) for loc in err["loc"])
                error_messages.append(f"- {err['msg']} in field '{field}'")
            return "Schema validation errors:\n" + "\n".join(error_messages)
        else:
            return f"Parse error: {str(error)}"
 class LLM:
    def __init__(self, settings, temperature: float = 0.4, max_tokens: int = 2048):
        self.settings_obj = settings
@@ -200,12 +29,11 @@ class LLM:
        self.temperature = temperature
        self.max_tokens = max_tokens
        # Configure llamaindex Settings
        self._configure_llamaindex()
    def _configure_llamaindex(self):
        """Configure llamaindex Settings with OpenAILike LLM"""
        session_id = llm_session_id.get() or f"fallback-{uuid4().hex}"
        Settings.llm = OpenAILike(
            model=self.model_name,
            api_base=self.url,
@@ -215,7 +43,6 @@ class LLM:
            is_function_calling_model=False,
            temperature=self.temperature,
            max_tokens=self.max_tokens,
            additional_kwargs={"extra_body": {"litellm_session_id": session_id}},
        )
    async def get_response(
@@ -232,38 +59,25 @@ class LLM:
        texts: list[str],
        output_cls: Type[T],
        tone_name: str | None = None,
        timeout: int | None = None,
    ) -> T:
-        """Get structured output from LLM with validation retry via Workflow."""
+        """Get structured output from LLM for non-function-calling models"""
-        if timeout is None:
+        summarizer = TreeSummarize(verbose=True)
-            timeout = self.settings_obj.LLM_STRUCTURED_RESPONSE_TIMEOUT
+        response = await summarizer.aget_response(prompt, texts, tone_name=tone_name)
-        async def run_workflow():
+        output_parser = PydanticOutputParser(output_cls)
-            workflow = StructuredOutputWorkflow(
+
-                output_cls=output_cls,
+        program = LLMTextCompletionProgram.from_defaults(
-                max_retries=self.settings_obj.LLM_PARSE_MAX_RETRIES + 1,
+            output_parser=output_parser,
-                timeout=timeout,
+            prompt_template_str=STRUCTURED_RESPONSE_PROMPT_TEMPLATE,
            verbose=False,
        )
-            result = await workflow.run(
+        format_instructions = output_parser.format(
-                prompt=prompt,
+            "Please structure the above information in the following JSON format:"
                texts=texts,
                tone_name=tone_name,
        )
-            if "error" in result:
+        output = await program.acall(
-                error_msg = result["error"] or "Max retries exceeded"
+            analysis=str(response), format_instructions=format_instructions
                raise LLMParseError(
                    output_cls=output_cls,
                    error_msg=error_msg,
                    attempts=result.get("attempts", 0),
        )
-            return result["success"]
+        return output
        return await retry(run_workflow)(
            retry_attempts=3,
            retry_backoff_interval=1.0,
            retry_backoff_max=30.0,
            retry_ignore_exc_types=(WorkflowTimeoutError,),
        )
--- a/server/reflector/pipelines/MULTITRACK_FIX_SUMMARY.md
+++ b/server/reflector/pipelines/MULTITRACK_FIX_SUMMARY.md
@@ -0,0 +1,84 @@
 # Multitrack Pipeline Fix Summary
 ## Problem
 Whisper timestamps were incorrect because it ignores leading silence in audio files. Daily.co tracks can have arbitrary amounts of silence before speech starts.
 ## Solution
 **Pad tracks BEFORE transcription using stream metadata `start_time`**
 This makes Whisper timestamps automatically correct relative to recording start.
 ## Key Changes in `main_multitrack_pipeline_fixed.py`
 ### 1. Added `pad_track_for_transcription()` method (lines 55-172)
 ```python
 async def pad_track_for_transcription(
    self,
    track_data: bytes,
    track_idx: int,
    storage,
 ) -> tuple[bytes, str]:
 ```
 - Extracts stream metadata `start_time` using PyAV
 - Creates PyAV filter graph with `adelay` filter to add padding
 - Stores padded track to S3 and returns URL
 - Uses same audio processing library (PyAV) already in the pipeline
 ### 2. Modified `process()` method
 #### REMOVED (lines 255-302):
 - Entire filename parsing for offsets - NOT NEEDED ANYMORE
 - The complex regex parsing of Daily.co filenames
 - Offset adjustment after transcription
 #### ADDED (lines 371-382):
 - Padding step BEFORE transcription:
 ```python
 # PAD TRACKS BEFORE TRANSCRIPTION - THIS IS THE KEY FIX!
 padded_track_urls: list[str] = []
 for idx, data in enumerate(track_datas):
    if not data:
        padded_track_urls.append("")
        continue
    _, padded_url = await self.pad_track_for_transcription(
        data, idx, storage
    )
    padded_track_urls.append(padded_url)
 ```
 #### MODIFIED (lines 385-435):
 - Transcribe PADDED tracks instead of raw tracks
 - Removed all timestamp offset adjustment code
 - Just set speaker ID - timestamps already correct!
 ```python
 # NO OFFSET ADJUSTMENT NEEDED!
 # Timestamps are already correct because we transcribed padded tracks
 # Just set speaker ID
 for w in t.words:
    w.speaker = idx
 ```
 ## Why This Works
 1. **Stream metadata is authoritative**: Daily.co sets `start_time` in the WebM container
 2. **PyAV respects metadata**: `audio_stream.start_time * audio_stream.time_base` gives seconds
 3. **Padding before transcription**: Whisper sees continuous audio from time 0
 4. **Automatic alignment**: Word at 51s in padded track = 51s in recording
 ## Testing
 Process the test recording (daily-20251020193458) and verify:
 - Participant 0 words appear at ~2s
 - Participant 1 words appear at ~51s
 - No word interleaving
 - Correct chronological order
 ## Files
 - **Original**: `main_multitrack_pipeline.py`
 - **Fixed**: `main_multitrack_pipeline_fixed.py`
 - **Test data**: `/Users/firfi/work/clients/monadical/reflector/1760988935484-*.webm`
--- a/server/reflector/pipelines/init.py
+++ b/server/reflector/pipelines/init.py
@@ -1 +0,0 @@
 """Pipeline modules for audio processing."""
--- a/server/reflector/pipelines/main_file_pipeline.py
+++ b/server/reflector/pipelines/main_file_pipeline.py
@@ -23,18 +23,23 @@ from reflector.db.transcripts import (
    transcripts_controller,
 )
 from reflector.logger import logger
 from reflector.pipelines import topic_processing
 from reflector.pipelines.main_live_pipeline import (
    PipelineMainBase,
    broadcast_to_sockets,
    task_cleanup_consent,
    task_pipeline_post_to_zulip,
 )
-from reflector.pipelines.transcription_helpers import transcribe_file_with_processor
+from reflector.processors import (
-from reflector.processors import AudioFileWriterProcessor
+    AudioFileWriterProcessor,
    TranscriptFinalSummaryProcessor,
    TranscriptFinalTitleProcessor,
    TranscriptTopicDetectorProcessor,
 )
 from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
 from reflector.processors.file_diarization import FileDiarizationInput
 from reflector.processors.file_diarization_auto import FileDiarizationAutoProcessor
 from reflector.processors.file_transcript import FileTranscriptInput
 from reflector.processors.file_transcript_auto import FileTranscriptAutoProcessor
 from reflector.processors.transcript_diarization_assembler import (
    TranscriptDiarizationAssemblerInput,
    TranscriptDiarizationAssemblerProcessor,
@@ -51,6 +56,19 @@ from reflector.storage import get_transcripts_storage
 from reflector.worker.webhook import send_transcript_webhook
 class EmptyPipeline:
    """Empty pipeline for processors that need a pipeline reference"""
    def __init__(self, logger: structlog.BoundLogger):
        self.logger = logger
    def get_pref(self, k, d=None):
        return d
    async def emit(self, event):
        pass
 class PipelineMainFile(PipelineMainBase):
    """
    Optimized file processing pipeline.
@@ -63,7 +81,7 @@ class PipelineMainFile(PipelineMainBase):
    def __init__(self, transcript_id: str):
        super().__init__(transcript_id=transcript_id)
        self.logger = logger.bind(transcript_id=self.transcript_id)
-        self.empty_pipeline = topic_processing.EmptyPipeline(logger=self.logger)
+        self.empty_pipeline = EmptyPipeline(logger=self.logger)
    def _handle_gather_exceptions(self, results: list, operation: str) -> None:
        """Handle exceptions from asyncio.gather with return_exceptions=True"""
@@ -97,8 +115,13 @@ class PipelineMainFile(PipelineMainBase):
                },
            )
        # Extract audio and write to transcript location
        audio_path = await self.extract_and_write_audio(file_path, transcript)
        # Upload for processing
        audio_url = await self.upload_audio(audio_path, transcript)
        # Run parallel processing
        await self.run_parallel_processing(
            audio_path,
            audio_url,
@@ -192,6 +215,7 @@ class PipelineMainFile(PipelineMainBase):
        transcript_result = results[0]
        diarization_result = results[1]
        # Handle errors - raise any exception that occurred
        self._handle_gather_exceptions(results, "parallel processing")
        for result in results:
            if isinstance(result, Exception):
@@ -206,6 +230,7 @@ class PipelineMainFile(PipelineMainBase):
            transcript=transcript_result, diarization=diarization_result or []
        )
        # Store result for retrieval
        diarized_transcript: Transcript | None = None
        async def capture_result(transcript):
@@ -237,7 +262,24 @@ class PipelineMainFile(PipelineMainBase):
    async def transcribe_file(self, audio_url: str, language: str) -> TranscriptType:
        """Transcribe complete file"""
-        return await transcribe_file_with_processor(audio_url, language)
+        processor = FileTranscriptAutoProcessor()
        input_data = FileTranscriptInput(audio_url=audio_url, language=language)
        # Store result for retrieval
        result: TranscriptType | None = None
        async def capture_result(transcript):
            nonlocal result
            result = transcript
        processor.on(capture_result)
        await processor.push(input_data)
        await processor.flush()
        if not result:
            raise ValueError("No transcript captured")
        return result
    async def diarize_file(self, audio_url: str) -> list[DiarizationSegment] | None:
        """Get diarization for file"""
@@ -280,32 +322,63 @@ class PipelineMainFile(PipelineMainBase):
    async def detect_topics(
        self, transcript: TranscriptType, target_language: str
    ) -> list[TitleSummary]:
-        return await topic_processing.detect_topics(
+        """Detect topics from complete transcript"""
-            transcript,
+        chunk_size = 300
-            target_language,
+        topics: list[TitleSummary] = []
-            on_topic_callback=self.on_topic,
+
-            empty_pipeline=self.empty_pipeline,
+        async def on_topic(topic: TitleSummary):
            topics.append(topic)
            return await self.on_topic(topic)
        topic_detector = TranscriptTopicDetectorProcessor(callback=on_topic)
        topic_detector.set_pipeline(self.empty_pipeline)
        for i in range(0, len(transcript.words), chunk_size):
            chunk_words = transcript.words[i : i + chunk_size]
            if not chunk_words:
                continue
            chunk_transcript = TranscriptType(
                words=chunk_words, translation=transcript.translation
            )
            await topic_detector.push(chunk_transcript)
        await topic_detector.flush()
        return topics
    async def generate_title(self, topics: list[TitleSummary]):
-        return await topic_processing.generate_title(
+        """Generate title from topics"""
-            topics,
+        if not topics:
-            on_title_callback=self.on_title,
+            self.logger.warning("No topics for title generation")
-            empty_pipeline=self.empty_pipeline,
+            return
-            logger=self.logger,
+
-        )
+        processor = TranscriptFinalTitleProcessor(callback=self.on_title)
        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
    async def generate_summaries(self, topics: list[TitleSummary]):
        """Generate long and short summaries from topics"""
        if not topics:
            self.logger.warning("No topics for summary generation")
            return
        transcript = await self.get_transcript()
-        return await topic_processing.generate_summaries(
+        processor = TranscriptFinalSummaryProcessor(
-            topics,
+            transcript=transcript,
-            transcript,
+            callback=self.on_long_summary,
-            on_long_summary_callback=self.on_long_summary,
+            on_short_summary=self.on_short_summary,
            on_short_summary_callback=self.on_short_summary,
            on_action_items_callback=self.on_action_items,
            empty_pipeline=self.empty_pipeline,
            logger=self.logger,
        )
        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
@shared_task
@@ -334,6 +407,7 @@ async def task_send_webhook_if_needed(*, transcript_id: str):
@asynctask
 async def task_pipeline_file_process(*, transcript_id: str):
    """Celery task for file pipeline processing"""
    transcript = await transcripts_controller.get_by_id(transcript_id)
    if not transcript:
        raise Exception(f"Transcript {transcript_id} not found")
@@ -342,6 +416,7 @@ async def task_pipeline_file_process(*, transcript_id: str):
    try:
        await pipeline.set_status(transcript_id, "processing")
        # Find the file to process
        audio_file = next(transcript.data_path.glob("upload.*"), None)
        if not audio_file:
            audio_file = next(transcript.data_path.glob("audio.*"), None)
@@ -351,12 +426,7 @@ async def task_pipeline_file_process(*, transcript_id: str):
        await pipeline.process(audio_file)
-    except Exception as e:
+    except Exception:
        logger.error(
            f"File pipeline failed for transcript {transcript_id}: {type(e).__name__}: {str(e)}",
            exc_info=True,
            transcript_id=transcript_id,
        )
        await pipeline.set_status(transcript_id, "error")
        raise
--- a/server/reflector/pipelines/main_live_pipeline.py
+++ b/server/reflector/pipelines/main_live_pipeline.py
@@ -17,6 +17,7 @@ from contextlib import asynccontextmanager
 from typing import Generic
 import av
 import boto3
 from celery import chord, current_task, group, shared_task
 from pydantic import BaseModel
 from structlog import BoundLogger as Logger
@@ -27,7 +28,6 @@ from reflector.db.recordings import recordings_controller
 from reflector.db.rooms import rooms_controller
 from reflector.db.transcripts import (
    Transcript,
    TranscriptActionItems,
    TranscriptDuration,
    TranscriptFinalLongSummary,
    TranscriptFinalShortSummary,
@@ -307,23 +307,6 @@ class PipelineMainBase(PipelineRunner[PipelineMessage], Generic[PipelineMessage]
                data=final_short_summary,
            )
    @broadcast_to_sockets
    async def on_action_items(self, data):
        action_items = TranscriptActionItems(action_items=data.action_items)
        async with self.transaction():
            transcript = await self.get_transcript()
            await transcripts_controller.update(
                transcript,
                {
                    "action_items": action_items.action_items,
                },
            )
            return await transcripts_controller.append_event(
                transcript=transcript,
                event="ACTION_ITEMS",
                data=action_items,
            )
    @broadcast_to_sockets
    async def on_duration(self, data):
        async with self.transaction():
@@ -483,7 +466,6 @@ class PipelineMainFinalSummaries(PipelineMainFromTopics):
                transcript=self._transcript,
                callback=self.on_long_summary,
                on_short_summary=self.on_short_summary,
                on_action_items=self.on_action_items,
            ),
        ]
@@ -602,7 +584,6 @@ async def cleanup_consent(transcript: Transcript, logger: Logger):
    consent_denied = False
    recording = None
    meeting = None
    try:
        if transcript.recording_id:
            recording = await recordings_controller.get_by_id(transcript.recording_id)
@@ -613,8 +594,8 @@ async def cleanup_consent(transcript: Transcript, logger: Logger):
                        meeting.id
                    )
    except Exception as e:
-        logger.error(f"Failed to fetch consent: {e}", exc_info=e)
+        logger.error(f"Failed to get fetch consent: {e}", exc_info=e)
-        raise
+        consent_denied = True
    if not consent_denied:
        logger.info("Consent approved, keeping all files")
@@ -622,24 +603,25 @@ async def cleanup_consent(transcript: Transcript, logger: Logger):
    logger.info("Consent denied, cleaning up all related audio files")
-    deletion_errors = []
+    if recording and recording.bucket_name and recording.object_key:
-    if recording and recording.bucket_name:
+        s3_whereby = boto3.client(
-        keys_to_delete = []
+            "s3",
-        if recording.track_keys:
+            aws_access_key_id=settings.AWS_WHEREBY_ACCESS_KEY_ID,
-            keys_to_delete = recording.track_keys
+            aws_secret_access_key=settings.AWS_WHEREBY_ACCESS_KEY_SECRET,
-        elif recording.object_key:
+        )
            keys_to_delete = [recording.object_key]
        master_storage = get_transcripts_storage()
        for key in keys_to_delete:
        try:
-                await master_storage.delete_file(key, bucket=recording.bucket_name)
+            s3_whereby.delete_object(
-                logger.info(f"Deleted recording file: {recording.bucket_name}/{key}")
+                Bucket=recording.bucket_name, Key=recording.object_key
            )
            logger.info(
                f"Deleted original Whereby recording: {recording.bucket_name}/{recording.object_key}"
            )
        except Exception as e:
-                error_msg = f"Failed to delete {key}: {e}"
+            logger.error(f"Failed to delete Whereby recording: {e}", exc_info=e)
                logger.error(error_msg, exc_info=e)
                deletion_errors.append(error_msg)
    # non-transactional, files marked for deletion not actually deleted is possible
    await transcripts_controller.update(transcript, {"audio_deleted": True})
    # 2. Delete processed audio from transcript storage S3 bucket
    if transcript.audio_location == "storage":
        storage = get_transcripts_storage()
        try:
@@ -648,28 +630,18 @@ async def cleanup_consent(transcript: Transcript, logger: Logger):
                f"Deleted processed audio from storage: {transcript.storage_audio_path}"
            )
        except Exception as e:
-            error_msg = f"Failed to delete processed audio: {e}"
+            logger.error(f"Failed to delete processed audio: {e}", exc_info=e)
            logger.error(error_msg, exc_info=e)
            deletion_errors.append(error_msg)
    # 3. Delete local audio files
    try:
        if hasattr(transcript, "audio_mp3_filename") and transcript.audio_mp3_filename:
            transcript.audio_mp3_filename.unlink(missing_ok=True)
        if hasattr(transcript, "audio_wav_filename") and transcript.audio_wav_filename:
            transcript.audio_wav_filename.unlink(missing_ok=True)
    except Exception as e:
-        error_msg = f"Failed to delete local audio files: {e}"
+        logger.error(f"Failed to delete local audio files: {e}", exc_info=e)
        logger.error(error_msg, exc_info=e)
        deletion_errors.append(error_msg)
-    if deletion_errors:
+    logger.info("Consent cleanup done")
        logger.warning(
            f"Consent cleanup completed with {len(deletion_errors)} errors",
            errors=deletion_errors,
        )
    else:
        await transcripts_controller.update(transcript, {"audio_deleted": True})
        logger.info("Consent cleanup done - all audio deleted")
@get_transcript
--- a/server/reflector/pipelines/main_multitrack_pipeline.backup.py
+++ b/server/reflector/pipelines/main_multitrack_pipeline.backup.py
@@ -0,0 +1,510 @@
 import asyncio
 import io
 from fractions import Fraction
 import av
 import boto3
 import structlog
 from av.audio.resampler import AudioResampler
 from celery import chain, shared_task
 from reflector.asynctask import asynctask
 from reflector.db.transcripts import (
    TranscriptStatus,
    TranscriptText,
    transcripts_controller,
 )
 from reflector.logger import logger
 from reflector.pipelines.main_file_pipeline import task_send_webhook_if_needed
 from reflector.pipelines.main_live_pipeline import (
    PipelineMainBase,
    task_cleanup_consent,
    task_pipeline_post_to_zulip,
 )
 from reflector.processors import (
    AudioFileWriterProcessor,
    TranscriptFinalSummaryProcessor,
    TranscriptFinalTitleProcessor,
    TranscriptTopicDetectorProcessor,
 )
 from reflector.processors.file_transcript import FileTranscriptInput
 from reflector.processors.file_transcript_auto import FileTranscriptAutoProcessor
 from reflector.processors.types import TitleSummary
 from reflector.processors.types import (
    Transcript as TranscriptType,
 )
 from reflector.settings import settings
 from reflector.storage import get_transcripts_storage
 class EmptyPipeline:
    def __init__(self, logger: structlog.BoundLogger):
        self.logger = logger
    def get_pref(self, k, d=None):
        return d
    async def emit(self, event):
        pass
 class PipelineMainMultitrack(PipelineMainBase):
    """Process multiple participant tracks for a transcript without mixing audio."""
    def __init__(self, transcript_id: str):
        super().__init__(transcript_id=transcript_id)
        self.logger = logger.bind(transcript_id=self.transcript_id)
        self.empty_pipeline = EmptyPipeline(logger=self.logger)
    async def mixdown_tracks(
        self,
        track_datas: list[bytes],
        writer: AudioFileWriterProcessor,
        offsets_seconds: list[float] | None = None,
    ) -> None:
        """
        Minimal multi-track mixdown using a PyAV filter graph (amix), no resampling.
        """
        # Discover target sample rate from first decodable frame
        target_sample_rate: int | None = None
        for data in track_datas:
            if not data:
                continue
            try:
                container = av.open(io.BytesIO(data))
                try:
                    for frame in container.decode(audio=0):
                        target_sample_rate = frame.sample_rate
                        break
                finally:
                    container.close()
            except Exception:
                continue
            if target_sample_rate:
                break
        if not target_sample_rate:
            self.logger.warning("Mixdown skipped - no decodable audio frames found")
            return
        # Build PyAV filter graph:
        # N abuffer (s32/stereo)
        #   -> optional adelay per input (for alignment)
        #   -> amix (s32)
        #   -> aformat(s16)
        #   -> sink
        graph = av.filter.Graph()
        inputs = []
        valid_track_datas = [d for d in track_datas if d]
        # Align offsets list with the filtered inputs (skip empties)
        input_offsets_seconds = None
        if offsets_seconds is not None:
            input_offsets_seconds = [
                offsets_seconds[i] for i, d in enumerate(track_datas) if d
            ]
        for idx, data in enumerate(valid_track_datas):
            args = (
                f"time_base=1/{target_sample_rate}:"
                f"sample_rate={target_sample_rate}:"
                f"sample_fmt=s32:"
                f"channel_layout=stereo"
            )
            in_ctx = graph.add("abuffer", args=args, name=f"in{idx}")
            inputs.append(in_ctx)
        if not inputs:
            self.logger.warning("Mixdown skipped - no valid inputs for graph")
            return
        mixer = graph.add("amix", args=f"inputs={len(inputs)}:normalize=0", name="mix")
        fmt = graph.add(
            "aformat",
            args=(
                f"sample_fmts=s32:channel_layouts=stereo:sample_rates={target_sample_rate}"
            ),
            name="fmt",
        )
        sink = graph.add("abuffersink", name="out")
        # Optional per-input delay before mixing
        delays_ms: list[int] = []
        if input_offsets_seconds is not None:
            base = min(input_offsets_seconds) if input_offsets_seconds else 0.0
            delays_ms = [
                max(0, int(round((o - base) * 1000))) for o in input_offsets_seconds
            ]
        else:
            delays_ms = [0 for _ in inputs]
        for idx, in_ctx in enumerate(inputs):
            delay_ms = delays_ms[idx] if idx < len(delays_ms) else 0
            if delay_ms > 0:
                # adelay requires one value per channel; use same for stereo
                adelay = graph.add(
                    "adelay",
                    args=f"delays={delay_ms}|{delay_ms}:all=1",
                    name=f"delay{idx}",
                )
                in_ctx.link_to(adelay)
                adelay.link_to(mixer, 0, idx)
            else:
                in_ctx.link_to(mixer, 0, idx)
        mixer.link_to(fmt)
        fmt.link_to(sink)
        graph.configure()
        # Open containers for decoding
        containers = []
        for i, d in enumerate(valid_track_datas):
            try:
                c = av.open(io.BytesIO(d))
                containers.append(c)
            except Exception as e:
                self.logger.warning(
                    "Mixdown: failed to open container", input=i, error=str(e)
                )
                containers.append(None)
        # Filter out Nones for decoders
        containers = [c for c in containers if c is not None]
        decoders = [c.decode(audio=0) for c in containers]
        active = [True] * len(decoders)
        # Per-input resamplers to enforce s32/stereo at the same rate (no resample of rate)
        resamplers = [
            AudioResampler(format="s32", layout="stereo", rate=target_sample_rate)
            for _ in decoders
        ]
        try:
            # Round-robin feed frames into graph, pull mixed frames as they become available
            while any(active):
                for i, (dec, is_active) in enumerate(zip(decoders, active)):
                    if not is_active:
                        continue
                    try:
                        frame = next(dec)
                    except StopIteration:
                        active[i] = False
                        continue
                    # Enforce same sample rate; convert format/layout to s16/stereo (no resample)
                    if frame.sample_rate != target_sample_rate:
                        # Skip frames with differing rate
                        continue
                    out_frames = resamplers[i].resample(frame) or []
                    for rf in out_frames:
                        rf.sample_rate = target_sample_rate
                        rf.time_base = Fraction(1, target_sample_rate)
                        inputs[i].push(rf)
                    # Drain available mixed frames
                    while True:
                        try:
                            mixed = sink.pull()
                        except Exception:
                            break
                        mixed.sample_rate = target_sample_rate
                        mixed.time_base = Fraction(1, target_sample_rate)
                        await writer.push(mixed)
            # Signal EOF to inputs and drain remaining
            for in_ctx in inputs:
                in_ctx.push(None)
            while True:
                try:
                    mixed = sink.pull()
                except Exception:
                    break
                mixed.sample_rate = target_sample_rate
                mixed.time_base = Fraction(1, target_sample_rate)
                await writer.push(mixed)
        finally:
            for c in containers:
                c.close()
    async def set_status(self, transcript_id: str, status: TranscriptStatus):
        async with self.lock_transaction():
            return await transcripts_controller.set_status(transcript_id, status)
    async def process(self, bucket_name: str, track_keys: list[str]):
        transcript = await self.get_transcript()
        s3 = boto3.client(
            "s3",
            region_name=settings.RECORDING_STORAGE_AWS_REGION,
            aws_access_key_id=settings.RECORDING_STORAGE_AWS_ACCESS_KEY_ID,
            aws_secret_access_key=settings.RECORDING_STORAGE_AWS_SECRET_ACCESS_KEY,
        )
        storage = get_transcripts_storage()
        # Pre-download bytes for all tracks for mixing and transcription
        track_datas: list[bytes] = []
        for key in track_keys:
            try:
                obj = s3.get_object(Bucket=bucket_name, Key=key)
                track_datas.append(obj["Body"].read())
            except Exception as e:
                self.logger.warning(
                    "Skipping track - cannot read S3 object", key=key, error=str(e)
                )
                track_datas.append(b"")
        # Extract offsets from Daily.co filename timestamps
        # Format: {rec_start_ts}-{uuid}-{media_type}-{track_start_ts}.{ext}
        # Example: 1760988935484-uuid-cam-audio-1760988935922
        import re
        offsets_seconds: list[float] = []
        recording_start_ts: int | None = None
        for key in track_keys:
            # Parse Daily.co raw-tracks filename pattern
            match = re.search(r"(\d+)-([0-9a-f-]{36})-(cam-audio)-(\d+)", key)
            if not match:
                self.logger.warning(
                    "Track key doesn't match Daily.co pattern, using 0.0 offset",
                    key=key,
                )
                offsets_seconds.append(0.0)
                continue
            rec_start_ts = int(match.group(1))
            track_start_ts = int(match.group(4))
            # Validate all tracks belong to same recording
            if recording_start_ts is None:
                recording_start_ts = rec_start_ts
            elif rec_start_ts != recording_start_ts:
                self.logger.error(
                    "Track belongs to different recording",
                    key=key,
                    expected_start=recording_start_ts,
                    got_start=rec_start_ts,
                )
                offsets_seconds.append(0.0)
                continue
            # Calculate offset in seconds
            offset_ms = track_start_ts - rec_start_ts
            offset_s = offset_ms / 1000.0
            self.logger.info(
                "Parsed track offset from filename",
                key=key,
                recording_start=rec_start_ts,
                track_start=track_start_ts,
                offset_seconds=offset_s,
            )
            offsets_seconds.append(max(0.0, offset_s))
        # Mixdown all available tracks into transcript.audio_mp3_filename, preserving sample rate
        try:
            mp3_writer = AudioFileWriterProcessor(
                path=str(transcript.audio_mp3_filename)
            )
            await self.mixdown_tracks(track_datas, mp3_writer, offsets_seconds)
            await mp3_writer.flush()
        except Exception as e:
            self.logger.error("Mixdown failed", error=str(e))
        speaker_transcripts: list[TranscriptType] = []
        for idx, key in enumerate(track_keys):
            ext = ".mp4"
            try:
                obj = s3.get_object(Bucket=bucket_name, Key=key)
                data = obj["Body"].read()
            except Exception as e:
                self.logger.error(
                    "Skipping track - cannot read S3 object", key=key, error=str(e)
                )
                continue
            storage_path = f"file_pipeline/{transcript.id}/tracks/track_{idx}{ext}"
            try:
                await storage.put_file(storage_path, data)
                audio_url = await storage.get_file_url(storage_path)
            except Exception as e:
                self.logger.error(
                    "Skipping track - cannot upload to storage", key=key, error=str(e)
                )
                continue
            try:
                t = await self.transcribe_file(audio_url, transcript.source_language)
            except Exception as e:
                self.logger.error(
                    "Transcription via default backend failed, trying local whisper",
                    key=key,
                    url=audio_url,
                    error=str(e),
                )
                try:
                    fallback = FileTranscriptAutoProcessor(name="whisper")
                    result = None
                    async def capture_result(r):
                        nonlocal result
                        result = r
                    fallback.on(capture_result)
                    await fallback.push(
                        FileTranscriptInput(
                            audio_url=audio_url, language=transcript.source_language
                        )
                    )
                    await fallback.flush()
                    if not result:
                        raise Exception("No transcript captured in fallback")
                    t = result
                except Exception as e2:
                    self.logger.error(
                        "Skipping track - transcription failed after fallback",
                        key=key,
                        url=audio_url,
                        error=str(e2),
                    )
                    continue
            if not t.words:
                continue
            # Shift word timestamps by the track's offset so all are relative to 00:00
            track_offset = offsets_seconds[idx] if idx < len(offsets_seconds) else 0.0
            for w in t.words:
                try:
                    if hasattr(w, "start") and w.start is not None:
                        w.start = float(w.start) + track_offset
                    if hasattr(w, "end") and w.end is not None:
                        w.end = float(w.end) + track_offset
                except Exception:
                    pass
                w.speaker = idx
            speaker_transcripts.append(t)
        if not speaker_transcripts:
            raise Exception("No valid track transcriptions")
        merged_words = []
        for t in speaker_transcripts:
            merged_words.extend(t.words)
        merged_words.sort(key=lambda w: w.start)
        merged_transcript = TranscriptType(words=merged_words, translation=None)
        await transcripts_controller.append_event(
            transcript,
            event="TRANSCRIPT",
            data=TranscriptText(
                text=merged_transcript.text, translation=merged_transcript.translation
            ),
        )
        topics = await self.detect_topics(merged_transcript, transcript.target_language)
        await asyncio.gather(
            self.generate_title(topics),
            self.generate_summaries(topics),
            return_exceptions=False,
        )
        await self.set_status(transcript.id, "ended")
    async def transcribe_file(self, audio_url: str, language: str) -> TranscriptType:
        processor = FileTranscriptAutoProcessor()
        input_data = FileTranscriptInput(audio_url=audio_url, language=language)
        result: TranscriptType | None = None
        async def capture_result(transcript):
            nonlocal result
            result = transcript
        processor.on(capture_result)
        await processor.push(input_data)
        await processor.flush()
        if not result:
            raise ValueError("No transcript captured")
        return result
    async def detect_topics(
        self, transcript: TranscriptType, target_language: str
    ) -> list[TitleSummary]:
        chunk_size = 300
        topics: list[TitleSummary] = []
        async def on_topic(topic: TitleSummary):
            topics.append(topic)
            return await self.on_topic(topic)
        topic_detector = TranscriptTopicDetectorProcessor(callback=on_topic)
        topic_detector.set_pipeline(self.empty_pipeline)
        for i in range(0, len(transcript.words), chunk_size):
            chunk_words = transcript.words[i : i + chunk_size]
            if not chunk_words:
                continue
            chunk_transcript = TranscriptType(
                words=chunk_words, translation=transcript.translation
            )
            await topic_detector.push(chunk_transcript)
        await topic_detector.flush()
        return topics
    async def generate_title(self, topics: list[TitleSummary]):
        if not topics:
            self.logger.warning("No topics for title generation")
            return
        processor = TranscriptFinalTitleProcessor(callback=self.on_title)
        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
    async def generate_summaries(self, topics: list[TitleSummary]):
        if not topics:
            self.logger.warning("No topics for summary generation")
            return
        transcript = await self.get_transcript()
        processor = TranscriptFinalSummaryProcessor(
            transcript=transcript,
            callback=self.on_long_summary,
            on_short_summary=self.on_short_summary,
        )
        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
@shared_task
@asynctask
 async def task_pipeline_multitrack_process(
    *, transcript_id: str, bucket_name: str, track_keys: list[str]
 ):
    pipeline = PipelineMainMultitrack(transcript_id=transcript_id)
    try:
        await pipeline.set_status(transcript_id, "processing")
        await pipeline.process(bucket_name, track_keys)
    except Exception:
        await pipeline.set_status(transcript_id, "error")
        raise
    post_chain = chain(
        task_cleanup_consent.si(transcript_id=transcript_id),
        task_pipeline_post_to_zulip.si(transcript_id=transcript_id),
        task_send_webhook_if_needed.si(transcript_id=transcript_id),
    )
    post_chain.delay()
--- a/server/reflector/pipelines/main_multitrack_pipeline.py
+++ b/server/reflector/pipelines/main_multitrack_pipeline.py
@@ -1,21 +1,20 @@
 import asyncio
-import tempfile
+import io
-from pathlib import Path
+from fractions import Fraction
 import av
 import boto3
 import structlog
 from av.audio.resampler import AudioResampler
 from celery import chain, shared_task
 from reflector.asynctask import asynctask
 from reflector.dailyco_api import MeetingParticipantsResponse
 from reflector.db.transcripts import (
    Transcript,
    TranscriptParticipant,
    TranscriptStatus,
    TranscriptWaveform,
    transcripts_controller,
 )
 from reflector.logger import logger
 from reflector.pipelines import topic_processing
 from reflector.pipelines.main_file_pipeline import task_send_webhook_if_needed
 from reflector.pipelines.main_live_pipeline import (
    PipelineMainBase,
@@ -23,192 +22,348 @@ from reflector.pipelines.main_live_pipeline import (
    task_cleanup_consent,
    task_pipeline_post_to_zulip,
 )
-from reflector.pipelines.transcription_helpers import transcribe_file_with_processor
+from reflector.processors import (
-from reflector.processors import AudioFileWriterProcessor
+    AudioFileWriterProcessor,
    TranscriptFinalSummaryProcessor,
    TranscriptFinalTitleProcessor,
    TranscriptTopicDetectorProcessor,
 )
 from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
 from reflector.processors.file_transcript import FileTranscriptInput
 from reflector.processors.file_transcript_auto import FileTranscriptAutoProcessor
 from reflector.processors.types import TitleSummary
-from reflector.processors.types import Transcript as TranscriptType
+from reflector.processors.types import (
-from reflector.storage import Storage, get_transcripts_storage
+    Transcript as TranscriptType,
 from reflector.utils.audio_constants import PRESIGNED_URL_EXPIRATION_SECONDS
 from reflector.utils.audio_mixdown import (
    detect_sample_rate_from_tracks,
    mixdown_tracks_pyav,
 )
-from reflector.utils.audio_padding import (
+from reflector.settings import settings
-    apply_audio_padding_to_file,
+from reflector.storage import get_transcripts_storage
-    extract_stream_start_time_from_container,
+
-)
+
-from reflector.utils.daily import (
+class EmptyPipeline:
-    filter_cam_audio_tracks,
+    def __init__(self, logger: structlog.BoundLogger):
-    parse_daily_recording_filename,
+        self.logger = logger
-)
+
-from reflector.utils.string import NonEmptyString
+    def get_pref(self, k, d=None):
-from reflector.video_platforms.factory import create_platform_client
+        return d
    async def emit(self, event):
        pass
 class PipelineMainMultitrack(PipelineMainBase):
    """Process multiple participant tracks for a transcript without mixing audio."""
    def __init__(self, transcript_id: str):
        super().__init__(transcript_id=transcript_id)
        self.logger = logger.bind(transcript_id=self.transcript_id)
-        self.empty_pipeline = topic_processing.EmptyPipeline(logger=self.logger)
+        self.empty_pipeline = EmptyPipeline(logger=self.logger)
    async def pad_track_for_transcription(
        self,
-        track_url: NonEmptyString,
+        track_data: bytes,
        track_idx: int,
-        storage: Storage,
+        storage,
-    ) -> NonEmptyString:
+    ) -> tuple[bytes, str]:
        """
        Pad a single track with silence based on stream metadata start_time.
-        Downloads from S3 presigned URL, processes via PyAV using tempfile, uploads to S3.
+        This ensures Whisper timestamps will be relative to recording start.
-        Returns presigned URL of padded track (or original URL if no padding needed).
+        Uses ffmpeg subprocess approach proven to work with python-raw-tracks-align.
-        Memory usage:
+        Returns: (padded_data, storage_url)
        - Pattern: fixed_overhead(2-5MB) for PyAV codec/filters
        - PyAV streams input efficiently (no full download, verified)
        - Output written to tempfile (disk-based, not memory)
        - Upload streams from file handle (boto3 chunks, typically 5-10MB)
        Daily.co raw-tracks timing - Two approaches:
            CURRENT APPROACH (PyAV metadata):
            The WebM stream.start_time field encodes MEETING-RELATIVE timing:
            - t=0: When Daily.co recording started (first participant joined)
            - start_time=8.13s: This participant's track began 8.13s after recording started
            - Purpose: Enables track alignment without external manifest files
            This is NOT:
            - Stream-internal offset (first packet timestamp relative to stream start)
            - Absolute/wall-clock time
            - Recording duration
            ALTERNATIVE APPROACH (filename parsing):
            Daily.co filenames contain Unix timestamps (milliseconds):
            Format: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}.webm
            Example: 1760988935484-52f7f48b-fbab-431f-9a50-87b9abfc8255-cam-audio-1760988935922.webm
            Can calculate offset: (track_start_ts - recording_start_ts) / 1000
            - Track 0: (1760988935922 - 1760988935484) / 1000 = 0.438s
            - Track 1: (1760988943823 - 1760988935484) / 1000 = 8.339s
            TIME DIFFERENCE: PyAV metadata vs filename timestamps differ by ~209ms:
            - Track 0: filename=438ms, metadata=229ms (diff: 209ms)
            - Track 1: filename=8339ms, metadata=8130ms (diff: 209ms)
            Consistent delta suggests network/encoding delay. PyAV metadata is ground truth
            (represents when audio stream actually started vs when file upload initiated).
            Example with 2 participants:
                Track A: start_time=0.2s → Joined 200ms after recording began
                Track B: start_time=8.1s → Joined 8.1 seconds later
                After padding:
                    Track A: [0.2s silence] + [speech...]
                    Track B: [8.1s silence] + [speech...]
                Whisper transcription timestamps are now synchronized:
                    Track A word at 5.0s → happened at meeting t=5.0s
                    Track B word at 10.0s → happened at meeting t=10.0s
                Merging just sorts by timestamp - no offset calculation needed.
        Padding coincidentally involves re-encoding. It's important when we work with Daily.co + Whisper.
        This is because Daily.co returns recordings with skipped frames e.g. when microphone muted.
        Daily.co doesn't understand those frames and ignores them, causing timestamp issues in transcription.
        Re-encoding restores those frames. We do padding and re-encoding together just because it's convenient and more performant:
        we need padded values for mix mp3 anyways
        """
        import json
        import math
        import subprocess
        import tempfile
        if not track_data:
            return b"", ""
        transcript = await self.get_transcript()
        # Create temp files for ffmpeg processing
        with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as input_file:
            input_file.write(track_data)
            input_file_path = input_file.name
        output_file_path = input_file_path.replace(".webm", "_padded.webm")
        try:
-            # PyAV streams input from S3 URL efficiently (2-5MB fixed overhead for codec/filters)
+            # Get stream metadata using ffprobe
-            with av.open(track_url) as in_container:
+            ffprobe_cmd = [
-                start_time_seconds = extract_stream_start_time_from_container(
+                "ffprobe",
-                    in_container, track_idx, logger=self.logger
+                "-v",
                "error",
                "-show_entries",
                "stream=start_time",
                "-of",
                "json",
                input_file_path,
            ]
            result = subprocess.run(
                ffprobe_cmd, capture_output=True, text=True, check=True
            )
            metadata = json.loads(result.stdout)
            # Extract start_time from stream metadata
            start_time_seconds = 0.0
            if metadata.get("streams") and len(metadata["streams"]) > 0:
                start_time_str = metadata["streams"][0].get("start_time", "0")
                start_time_seconds = float(start_time_str)
            self.logger.info(
                f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s",
                track_idx=track_idx,
            )
            # If no padding needed, use original
            if start_time_seconds <= 0:
-                    self.logger.info(
+                storage_path = f"file_pipeline/{transcript.id}/tracks/original_track_{track_idx}.webm"
-                        f"Track {track_idx} requires no padding (start_time={start_time_seconds}s)",
+                await storage.put_file(storage_path, track_data)
-                        track_idx=track_idx,
+                url = await storage.get_file_url(storage_path)
-                    )
+                return track_data, url
                    return track_url
-                # Use tempfile instead of BytesIO for better memory efficiency
+            # Calculate delay in milliseconds
-                # Reduces peak memory usage during encoding/upload
+            delay_ms = math.floor(start_time_seconds * 1000)
-                with tempfile.NamedTemporaryFile(
+
-                    suffix=".webm", delete=False
+            # Run ffmpeg to pad the audio while maintaining WebM/Opus format for Modal compatibility
-                ) as temp_file:
+            # ffmpeg quirk: aresample needs to come before adelay in the filter chain
-                    temp_path = temp_file.name
+            ffmpeg_cmd = [
                "ffmpeg",
                "-hide_banner",
                "-loglevel",
                "error",
                "-y",  # overwrite output
                "-i",
                input_file_path,
                "-af",
                f"aresample=async=1,adelay={delay_ms}:all=true",
                "-c:a",
                "libopus",  # Keep Opus codec for Modal compatibility
                "-b:a",
                "128k",  # Standard bitrate for Opus
                output_file_path,
            ]
            self.logger.info(
                f"Padding track {track_idx} with {delay_ms}ms delay using ffmpeg",
                track_idx=track_idx,
                delay_ms=delay_ms,
                command=" ".join(ffmpeg_cmd),
            )
            result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
            if result.returncode != 0:
                self.logger.error(
                    f"ffmpeg padding failed for track {track_idx}",
                    track_idx=track_idx,
                    stderr=result.stderr,
                    returncode=result.returncode,
                )
                raise Exception(f"ffmpeg padding failed: {result.stderr}")
            # Read the padded output
            with open(output_file_path, "rb") as f:
                padded_data = f.read()
            # Store padded track
            storage_path = (
                f"file_pipeline/{transcript.id}/tracks/padded_track_{track_idx}.webm"
            )
            await storage.put_file(storage_path, padded_data)
            padded_url = await storage.get_file_url(storage_path)
            self.logger.info(
                f"Successfully padded track {track_idx} with {start_time_seconds:.3f}s offset, stored at {storage_path}",
                track_idx=track_idx,
                delay_ms=delay_ms,
                padded_url=padded_url,
                padded_size=len(padded_data),
            )
            return padded_data, padded_url
        finally:
            # Clean up temp files
            import os
            try:
-                    apply_audio_padding_to_file(
+                os.unlink(input_file_path)
-                        in_container,
+            except:
-                        temp_path,
+                pass
-                        start_time_seconds,
+            try:
-                        track_idx,
+                os.unlink(output_file_path)
-                        logger=self.logger,
+            except:
-                    )
+                pass
                    storage_path = (
                        f"file_pipeline/{transcript.id}/tracks/padded_{track_idx}.webm"
                    )
                    # Upload using file handle for streaming
                    with open(temp_path, "rb") as padded_file:
                        await storage.put_file(storage_path, padded_file)
                finally:
                    Path(temp_path).unlink(missing_ok=True)
                padded_url = await storage.get_file_url(
                    storage_path,
                    operation="get_object",
                    expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
                )
                self.logger.info(
                    f"Successfully padded track {track_idx}",
                    track_idx=track_idx,
                    start_time_seconds=start_time_seconds,
                    padded_url=padded_url,
                )
                return padded_url
        except Exception as e:
            self.logger.error(
                f"Failed to process track {track_idx}",
                track_idx=track_idx,
                url=track_url,
                error=str(e),
                exc_info=True,
            )
            raise Exception(
                f"Track {track_idx} padding failed - transcript would have incorrect timestamps"
            ) from e
    async def mixdown_tracks(
        self,
-        track_urls: list[str],
+        track_datas: list[bytes],
        writer: AudioFileWriterProcessor,
        offsets_seconds: list[float] | None = None,
    ) -> None:
-        """Multi-track mixdown using PyAV filter graph (amix), reading from S3 presigned URLs."""
+        """
-        target_sample_rate = detect_sample_rate_from_tracks(
+        Minimal multi-track mixdown using a PyAV filter graph (amix), no resampling.
-            track_urls, logger=self.logger
+        """
        )
        if not target_sample_rate:
            self.logger.error("Mixdown failed - no decodable audio frames found")
            raise Exception("Mixdown failed: No decodable audio frames in any track")
-        await mixdown_tracks_pyav(
+        # Discover target sample rate from first decodable frame
-            track_urls,
+        target_sample_rate: int | None = None
-            writer,
+        for data in track_datas:
-            target_sample_rate,
+            if not data:
-            offsets_seconds=offsets_seconds,
+                continue
-            logger=self.logger,
+            try:
                container = av.open(io.BytesIO(data))
                try:
                    for frame in container.decode(audio=0):
                        target_sample_rate = frame.sample_rate
                        break
                finally:
                    container.close()
            except Exception:
                continue
            if target_sample_rate:
                break
        if not target_sample_rate:
            self.logger.warning("Mixdown skipped - no decodable audio frames found")
            return
        # Build PyAV filter graph:
        # N abuffer (s32/stereo)
        #   -> optional adelay per input (for alignment)
        #   -> amix (s32)
        #   -> aformat(s16)
        #   -> sink
        graph = av.filter.Graph()
        inputs = []
        valid_track_datas = [d for d in track_datas if d]
        # Align offsets list with the filtered inputs (skip empties)
        input_offsets_seconds = None
        if offsets_seconds is not None:
            input_offsets_seconds = [
                offsets_seconds[i] for i, d in enumerate(track_datas) if d
            ]
        for idx, data in enumerate(valid_track_datas):
            args = (
                f"time_base=1/{target_sample_rate}:"
                f"sample_rate={target_sample_rate}:"
                f"sample_fmt=s32:"
                f"channel_layout=stereo"
            )
            in_ctx = graph.add("abuffer", args=args, name=f"in{idx}")
            inputs.append(in_ctx)
        if not inputs:
            self.logger.warning("Mixdown skipped - no valid inputs for graph")
            return
        mixer = graph.add("amix", args=f"inputs={len(inputs)}:normalize=0", name="mix")
        fmt = graph.add(
            "aformat",
            args=(
                f"sample_fmts=s32:channel_layouts=stereo:sample_rates={target_sample_rate}"
            ),
            name="fmt",
        )
        sink = graph.add("abuffersink", name="out")
        # Optional per-input delay before mixing
        delays_ms: list[int] = []
        if input_offsets_seconds is not None:
            base = min(input_offsets_seconds) if input_offsets_seconds else 0.0
            delays_ms = [
                max(0, int(round((o - base) * 1000))) for o in input_offsets_seconds
            ]
        else:
            delays_ms = [0 for _ in inputs]
        for idx, in_ctx in enumerate(inputs):
            delay_ms = delays_ms[idx] if idx < len(delays_ms) else 0
            if delay_ms > 0:
                # adelay requires one value per channel; use same for stereo
                adelay = graph.add(
                    "adelay",
                    args=f"delays={delay_ms}|{delay_ms}:all=1",
                    name=f"delay{idx}",
                )
                in_ctx.link_to(adelay)
                adelay.link_to(mixer, 0, idx)
            else:
                in_ctx.link_to(mixer, 0, idx)
        mixer.link_to(fmt)
        fmt.link_to(sink)
        graph.configure()
        # Open containers for decoding
        containers = []
        for i, d in enumerate(valid_track_datas):
            try:
                c = av.open(io.BytesIO(d))
                containers.append(c)
            except Exception as e:
                self.logger.warning(
                    "Mixdown: failed to open container", input=i, error=str(e)
                )
                containers.append(None)
        # Filter out Nones for decoders
        containers = [c for c in containers if c is not None]
        decoders = [c.decode(audio=0) for c in containers]
        active = [True] * len(decoders)
        # Per-input resamplers to enforce s32/stereo at the same rate (no resample of rate)
        resamplers = [
            AudioResampler(format="s32", layout="stereo", rate=target_sample_rate)
            for _ in decoders
        ]
        try:
            # Round-robin feed frames into graph, pull mixed frames as they become available
            while any(active):
                for i, (dec, is_active) in enumerate(zip(decoders, active)):
                    if not is_active:
                        continue
                    try:
                        frame = next(dec)
                    except StopIteration:
                        active[i] = False
                        continue
                    # Enforce same sample rate; convert format/layout to s16/stereo (no resample)
                    if frame.sample_rate != target_sample_rate:
                        # Skip frames with differing rate
                        continue
                    out_frames = resamplers[i].resample(frame) or []
                    for rf in out_frames:
                        rf.sample_rate = target_sample_rate
                        rf.time_base = Fraction(1, target_sample_rate)
                        inputs[i].push(rf)
                    # Drain available mixed frames
                    while True:
                        try:
                            mixed = sink.pull()
                        except Exception:
                            break
                        mixed.sample_rate = target_sample_rate
                        mixed.time_base = Fraction(1, target_sample_rate)
                        await writer.push(mixed)
            # Signal EOF to inputs and drain remaining
            for in_ctx in inputs:
                in_ctx.push(None)
            while True:
                try:
                    mixed = sink.pull()
                except Exception:
                    break
                mixed.sample_rate = target_sample_rate
                mixed.time_base = Fraction(1, target_sample_rate)
                await writer.push(mixed)
        finally:
            for c in containers:
                c.close()
    @broadcast_to_sockets
    async def set_status(self, transcript_id: str, status: TranscriptStatus):
@@ -223,163 +378,87 @@ class PipelineMainMultitrack(PipelineMainBase):
                transcript=transcript, event="WAVEFORM", data=waveform
            )
    async def update_participants_from_daily(
        self, transcript: Transcript, track_keys: list[str]
    ) -> None:
        """Update transcript participants with user_id and names from Daily.co API."""
        if not transcript.recording_id:
            return
        try:
            async with create_platform_client("daily") as daily_client:
                id_to_name = {}
                id_to_user_id = {}
                try:
                    rec_details = await daily_client.get_recording(
                        transcript.recording_id
                    )
                    mtg_session_id = rec_details.mtgSessionId
                    if mtg_session_id:
                        try:
                            payload: MeetingParticipantsResponse = (
                                await daily_client.get_meeting_participants(
                                    mtg_session_id
                                )
                            )
                            for p in payload.data:
                                pid = p.participant_id
                                name = p.user_name
                                user_id = p.user_id
                                if name:
                                    id_to_name[pid] = name
                                if user_id:
                                    id_to_user_id[pid] = user_id
                        except Exception as e:
                            self.logger.warning(
                                "Failed to fetch Daily meeting participants",
                                error=str(e),
                                mtg_session_id=mtg_session_id,
                                exc_info=True,
                            )
                    else:
                        self.logger.warning(
                            "No mtgSessionId found for recording; participant names may be generic",
                            recording_id=transcript.recording_id,
                        )
                except Exception as e:
                    self.logger.warning(
                        "Failed to fetch Daily recording details",
                        error=str(e),
                        recording_id=transcript.recording_id,
                        exc_info=True,
                    )
                    return
                cam_audio_keys = filter_cam_audio_tracks(track_keys)
                for idx, key in enumerate(cam_audio_keys):
                    try:
                        parsed = parse_daily_recording_filename(key)
                        participant_id = parsed.participant_id
                    except ValueError as e:
                        self.logger.error(
                            "Failed to parse Daily recording filename",
                            error=str(e),
                            key=key,
                            exc_info=True,
                        )
                        continue
                    default_name = f"Speaker {idx}"
                    name = id_to_name.get(participant_id, default_name)
                    user_id = id_to_user_id.get(participant_id)
                    participant = TranscriptParticipant(
                        id=participant_id, speaker=idx, name=name, user_id=user_id
                    )
                    await transcripts_controller.upsert_participant(
                        transcript, participant
                    )
        except Exception as e:
            self.logger.warning(
                "Failed to map participant names", error=str(e), exc_info=True
            )
    async def process(self, bucket_name: str, track_keys: list[str]):
        transcript = await self.get_transcript()
-        async with self.transaction():
+
-            await transcripts_controller.update(
+        s3 = boto3.client(
-                transcript,
+            "s3",
-                {
+            region_name=settings.RECORDING_STORAGE_AWS_REGION,
-                    "events": [],
+            aws_access_key_id=settings.RECORDING_STORAGE_AWS_ACCESS_KEY_ID,
-                    "topics": [],
+            aws_secret_access_key=settings.RECORDING_STORAGE_AWS_SECRET_ACCESS_KEY,
                    "participants": [],
                },
        )
-        await self.update_participants_from_daily(transcript, track_keys)
+        storage = get_transcripts_storage()
-        source_storage = get_transcripts_storage()
+        # Pre-download bytes for all tracks for mixing and transcription
-        transcript_storage = source_storage
+        track_datas: list[bytes] = []
        track_urls: list[str] = []
        for key in track_keys:
-            url = await source_storage.get_file_url(
+            try:
-                key,
+                obj = s3.get_object(Bucket=bucket_name, Key=key)
-                operation="get_object",
+                track_datas.append(obj["Body"].read())
-                expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
+            except Exception as e:
-                bucket=bucket_name,
+                self.logger.warning(
-            )
+                    "Skipping track - cannot read S3 object", key=key, error=str(e)
            track_urls.append(url)
            self.logger.info(
                f"Generated presigned URL for track from {bucket_name}",
                key=key,
                )
                track_datas.append(b"")
-        created_padded_files = set()
+        # PAD TRACKS FIRST - this creates full-length tracks with correct timeline
        padded_track_datas: list[bytes] = []
        padded_track_urls: list[str] = []
-        for idx, url in enumerate(track_urls):
+        for idx, data in enumerate(track_datas):
-            padded_url = await self.pad_track_for_transcription(
+            if not data:
-                url, idx, transcript_storage
+                padded_track_datas.append(b"")
-            )
+                padded_track_urls.append("")
-            padded_track_urls.append(padded_url)
+                continue
            if padded_url != url:
                storage_path = f"file_pipeline/{transcript.id}/tracks/padded_{idx}.webm"
                created_padded_files.add(storage_path)
            self.logger.info(f"Track {idx} processed, padded URL: {padded_url}")
            padded_data, padded_url = await self.pad_track_for_transcription(
                data, idx, storage
            )
            padded_track_datas.append(padded_data)
            padded_track_urls.append(padded_url)
            self.logger.info(f"Padded track {idx} for transcription: {padded_url}")
        # Mixdown PADDED tracks (already aligned with timeline) into transcript.audio_mp3_filename
        try:
            # Ensure data directory exists
            transcript.data_path.mkdir(parents=True, exist_ok=True)
            mp3_writer = AudioFileWriterProcessor(
                path=str(transcript.audio_mp3_filename),
                on_duration=self.on_duration,
            )
-        await self.mixdown_tracks(padded_track_urls, mp3_writer, offsets_seconds=None)
+            # Use PADDED tracks with NO additional offsets (already aligned by padding)
            await self.mixdown_tracks(
                padded_track_datas, mp3_writer, offsets_seconds=None
            )
            await mp3_writer.flush()
-        if not transcript.audio_mp3_filename.exists():
+            # Upload the mixed audio to S3 for web playback
-            raise Exception(
+            if transcript.audio_mp3_filename.exists():
-                "Mixdown failed - no MP3 file generated. Cannot proceed without playable audio."
+                mp3_data = transcript.audio_mp3_filename.read_bytes()
            )
                storage_path = f"{transcript.id}/audio.mp3"
-        # Use file handle streaming to avoid loading entire MP3 into memory
+                await storage.put_file(storage_path, mp3_data)
-        mp3_size = transcript.audio_mp3_filename.stat().st_size
+                mp3_url = await storage.get_file_url(storage_path)
        with open(transcript.audio_mp3_filename, "rb") as mp3_file:
            await transcript_storage.put_file(storage_path, mp3_file)
        mp3_url = await transcript_storage.get_file_url(storage_path)
-        await transcripts_controller.update(transcript, {"audio_location": "storage"})
+                # Update transcript to indicate audio is in storage
                await transcripts_controller.update(
                    transcript, {"audio_location": "storage"}
                )
                self.logger.info(
                    f"Uploaded mixed audio to storage",
                    storage_path=storage_path,
-            size=mp3_size,
+                    size=len(mp3_data),
                    url=mp3_url,
                )
            else:
                self.logger.warning("Mixdown file does not exist after processing")
        except Exception as e:
            self.logger.error("Mixdown failed", error=str(e), exc_info=True)
        # Generate waveform from the mixed audio file
        if transcript.audio_mp3_filename.exists():
            try:
                self.logger.info("Generating waveform from mixed audio")
                waveform_processor = AudioWaveformProcessor(
                    audio_path=transcript.audio_mp3_filename,
@@ -389,18 +468,60 @@ class PipelineMainMultitrack(PipelineMainBase):
                waveform_processor.set_pipeline(self.empty_pipeline)
                await waveform_processor.flush()
                self.logger.info("Waveform generated successfully")
            except Exception as e:
                self.logger.error(
                    "Waveform generation failed", error=str(e), exc_info=True
                )
        # Transcribe PADDED tracks - timestamps will be automatically correct!
        speaker_transcripts: list[TranscriptType] = []
        for idx, padded_url in enumerate(padded_track_urls):
            if not padded_url:
                continue
            try:
                # Transcribe the PADDED track
                t = await self.transcribe_file(padded_url, transcript.source_language)
            except Exception as e:
                self.logger.error(
                    "Transcription via default backend failed, trying local whisper",
                    track_idx=idx,
                    url=padded_url,
                    error=str(e),
                )
                try:
                    fallback = FileTranscriptAutoProcessor(name="whisper")
                    result = None
                    async def capture_result(r):
                        nonlocal result
                        result = r
                    fallback.on(capture_result)
                    await fallback.push(
                        FileTranscriptInput(
                            audio_url=padded_url, language=transcript.source_language
                        )
                    )
                    await fallback.flush()
                    if not result:
                        raise Exception("No transcript captured in fallback")
                    t = result
                except Exception as e2:
                    self.logger.error(
                        "Skipping track - transcription failed after fallback",
                        track_idx=idx,
                        url=padded_url,
                        error=str(e2),
                    )
                    continue
            if not t.words:
-                self.logger.debug(f"no words in track {idx}")
+                continue
                # not skipping, it may be silence or indistinguishable mumbling
            # NO OFFSET ADJUSTMENT NEEDED!
            # Timestamps are already correct because we transcribed padded tracks
            # Just set speaker ID
            for w in t.words:
                w.speaker = idx
@@ -410,33 +531,10 @@ class PipelineMainMultitrack(PipelineMainBase):
                track_idx=idx,
            )
        valid_track_count = len([url for url in padded_track_urls if url])
        if valid_track_count > 0 and len(speaker_transcripts) != valid_track_count:
            raise Exception(
                f"Only {len(speaker_transcripts)}/{valid_track_count} tracks transcribed successfully. "
                f"All tracks must succeed to avoid incomplete transcripts."
            )
        if not speaker_transcripts:
            raise Exception("No valid track transcriptions")
-        self.logger.info(f"Cleaning up {len(created_padded_files)} temporary S3 files")
+        # Merge all words and sort by timestamp
        cleanup_tasks = []
        for storage_path in created_padded_files:
            cleanup_tasks.append(transcript_storage.delete_file(storage_path))
        if cleanup_tasks:
            cleanup_results = await asyncio.gather(
                *cleanup_tasks, return_exceptions=True
            )
            for storage_path, result in zip(created_padded_files, cleanup_results):
                if isinstance(result, Exception):
                    self.logger.warning(
                        "Failed to cleanup temporary padded track",
                        storage_path=storage_path,
                        error=str(result),
                    )
        merged_words = []
        for t in speaker_transcripts:
            merged_words.extend(t.words)
@@ -446,6 +544,7 @@ class PipelineMainMultitrack(PipelineMainBase):
        merged_transcript = TranscriptType(words=merged_words, translation=None)
        # Emit TRANSCRIPT event through the shared handler (persists and broadcasts)
        await self.on_transcript(merged_transcript)
        topics = await self.detect_topics(merged_transcript, transcript.target_language)
@@ -458,37 +557,80 @@ class PipelineMainMultitrack(PipelineMainBase):
        await self.set_status(transcript.id, "ended")
    async def transcribe_file(self, audio_url: str, language: str) -> TranscriptType:
-        return await transcribe_file_with_processor(audio_url, language)
+        processor = FileTranscriptAutoProcessor()
        input_data = FileTranscriptInput(audio_url=audio_url, language=language)
        result: TranscriptType | None = None
        async def capture_result(transcript):
            nonlocal result
            result = transcript
        processor.on(capture_result)
        await processor.push(input_data)
        await processor.flush()
        if not result:
            raise ValueError("No transcript captured")
        return result
    async def detect_topics(
        self, transcript: TranscriptType, target_language: str
    ) -> list[TitleSummary]:
-        return await topic_processing.detect_topics(
+        chunk_size = 300
-            transcript,
+        topics: list[TitleSummary] = []
-            target_language,
+
-            on_topic_callback=self.on_topic,
+        async def on_topic(topic: TitleSummary):
-            empty_pipeline=self.empty_pipeline,
+            topics.append(topic)
            return await self.on_topic(topic)
        topic_detector = TranscriptTopicDetectorProcessor(callback=on_topic)
        topic_detector.set_pipeline(self.empty_pipeline)
        for i in range(0, len(transcript.words), chunk_size):
            chunk_words = transcript.words[i : i + chunk_size]
            if not chunk_words:
                continue
            chunk_transcript = TranscriptType(
                words=chunk_words, translation=transcript.translation
            )
            await topic_detector.push(chunk_transcript)
        await topic_detector.flush()
        return topics
    async def generate_title(self, topics: list[TitleSummary]):
-        return await topic_processing.generate_title(
+        if not topics:
-            topics,
+            self.logger.warning("No topics for title generation")
-            on_title_callback=self.on_title,
+            return
-            empty_pipeline=self.empty_pipeline,
+
-            logger=self.logger,
+        processor = TranscriptFinalTitleProcessor(callback=self.on_title)
-        )
+        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
    async def generate_summaries(self, topics: list[TitleSummary]):
        if not topics:
            self.logger.warning("No topics for summary generation")
            return
        transcript = await self.get_transcript()
-        return await topic_processing.generate_summaries(
+        processor = TranscriptFinalSummaryProcessor(
-            topics,
+            transcript=transcript,
-            transcript,
+            callback=self.on_long_summary,
-            on_long_summary_callback=self.on_long_summary,
+            on_short_summary=self.on_short_summary,
            on_short_summary_callback=self.on_short_summary,
            on_action_items_callback=self.on_action_items,
            empty_pipeline=self.empty_pipeline,
            logger=self.logger,
        )
        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
@shared_task
--- a/server/reflector/pipelines/main_multitrack_pipeline_fixed.py
+++ b/server/reflector/pipelines/main_multitrack_pipeline_fixed.py
@@ -0,0 +1,629 @@
 import asyncio
 import io
 from fractions import Fraction
 import av
 import boto3
 import structlog
 from av.audio.resampler import AudioResampler
 from celery import chain, shared_task
 from reflector.asynctask import asynctask
 from reflector.db.transcripts import (
    TranscriptStatus,
    TranscriptText,
    transcripts_controller,
 )
 from reflector.logger import logger
 from reflector.pipelines.main_file_pipeline import task_send_webhook_if_needed
 from reflector.pipelines.main_live_pipeline import (
    PipelineMainBase,
    task_cleanup_consent,
    task_pipeline_post_to_zulip,
 )
 from reflector.processors import (
    AudioFileWriterProcessor,
    TranscriptFinalSummaryProcessor,
    TranscriptFinalTitleProcessor,
    TranscriptTopicDetectorProcessor,
 )
 from reflector.processors.file_transcript import FileTranscriptInput
 from reflector.processors.file_transcript_auto import FileTranscriptAutoProcessor
 from reflector.processors.types import TitleSummary
 from reflector.processors.types import (
    Transcript as TranscriptType,
 )
 from reflector.settings import settings
 from reflector.storage import get_transcripts_storage
 class EmptyPipeline:
    def __init__(self, logger: structlog.BoundLogger):
        self.logger = logger
    def get_pref(self, k, d=None):
        return d
    async def emit(self, event):
        pass
 class PipelineMainMultitrack(PipelineMainBase):
    """Process multiple participant tracks for a transcript without mixing audio."""
    def __init__(self, transcript_id: str):
        super().__init__(transcript_id=transcript_id)
        self.logger = logger.bind(transcript_id=self.transcript_id)
        self.empty_pipeline = EmptyPipeline(logger=self.logger)
    async def pad_track_for_transcription(
        self,
        track_data: bytes,
        track_idx: int,
        storage,
    ) -> tuple[bytes, str]:
        """
        Pad a single track with silence based on stream metadata start_time.
        This ensures Whisper timestamps will be relative to recording start.
        Returns: (padded_data, storage_url)
        """
        if not track_data:
            return b"", ""
        transcript = await self.get_transcript()
        # Get stream metadata start_time using PyAV
        container = av.open(io.BytesIO(track_data))
        try:
            audio_stream = container.streams.audio[0]
            # Extract start_time from stream metadata
            if (
                audio_stream.start_time is not None
                and audio_stream.time_base is not None
            ):
                start_time_seconds = float(
                    audio_stream.start_time * audio_stream.time_base
                )
            else:
                start_time_seconds = 0.0
            sample_rate = audio_stream.sample_rate
            codec_name = audio_stream.codec.name
        finally:
            container.close()
        self.logger.info(
            f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s, sample_rate={sample_rate}",
            track_idx=track_idx,
        )
        # If no padding needed, use original
        if start_time_seconds <= 0:
            storage_path = (
                f"file_pipeline/{transcript.id}/tracks/original_track_{track_idx}.webm"
            )
            await storage.put_file(storage_path, track_data)
            url = await storage.get_file_url(storage_path)
            return track_data, url
        # Create PyAV filter graph for padding
        graph = av.filter.Graph()
        # Input buffer
        in_args = (
            f"time_base=1/{sample_rate}:"
            f"sample_rate={sample_rate}:"
            f"sample_fmt=s16:"
            f"channel_layout=stereo"
        )
        input_buffer = graph.add("abuffer", args=in_args, name="in")
        # Add delay filter for padding
        delay_ms = int(start_time_seconds * 1000)
        delay_filter = graph.add(
            "adelay", args=f"delays={delay_ms}|{delay_ms}:all=1", name="delay"
        )
        # Output sink
        sink = graph.add("abuffersink", name="out")
        # Link filters
        input_buffer.link_to(delay_filter)
        delay_filter.link_to(sink)
        graph.configure()
        # Process audio through filter
        output_bytes = io.BytesIO()
        output_container = av.open(output_bytes, "w", format="webm")
        output_stream = output_container.add_stream("libopus", rate=sample_rate)
        output_stream.channels = 2
        # Reopen input for processing
        input_container = av.open(io.BytesIO(track_data))
        resampler = AudioResampler(format="s16", layout="stereo", rate=sample_rate)
        try:
            # Process frames
            for frame in input_container.decode(audio=0):
                # Resample to match filter requirements
                resampled_frames = resampler.resample(frame)
                for resampled_frame in resampled_frames:
                    resampled_frame.pts = frame.pts
                    resampled_frame.time_base = Fraction(1, sample_rate)
                    input_buffer.push(resampled_frame)
                    # Pull from filter and encode
                    while True:
                        try:
                            out_frame = sink.pull()
                            out_frame.pts = out_frame.pts if out_frame.pts else 0
                            out_frame.time_base = Fraction(1, sample_rate)
                            for packet in output_stream.encode(out_frame):
                                output_container.mux(packet)
                        except av.BlockingIOError:
                            break
            # Flush
            input_buffer.push(None)
            while True:
                try:
                    out_frame = sink.pull()
                    for packet in output_stream.encode(out_frame):
                        output_container.mux(packet)
                except (av.BlockingIOError, av.EOFError):
                    break
            # Flush encoder
            for packet in output_stream.encode(None):
                output_container.mux(packet)
        finally:
            input_container.close()
            output_container.close()
        padded_data = output_bytes.getvalue()
        # Store padded track
        storage_path = (
            f"file_pipeline/{transcript.id}/tracks/padded_track_{track_idx}.webm"
        )
        await storage.put_file(storage_path, padded_data)
        padded_url = await storage.get_file_url(storage_path)
        self.logger.info(
            f"Padded track {track_idx} with {start_time_seconds:.3f}s offset, stored at {storage_path}",
            track_idx=track_idx,
            delay_ms=delay_ms,
            padded_url=padded_url,
        )
        return padded_data, padded_url
    async def mixdown_tracks(
        self,
        track_datas: list[bytes],
        writer: AudioFileWriterProcessor,
        offsets_seconds: list[float] | None = None,
    ) -> None:
        """
        Minimal multi-track mixdown using a PyAV filter graph (amix), no resampling.
        """
        # Discover target sample rate from first decodable frame
        target_sample_rate: int | None = None
        for data in track_datas:
            if not data:
                continue
            try:
                container = av.open(io.BytesIO(data))
                try:
                    for frame in container.decode(audio=0):
                        target_sample_rate = frame.sample_rate
                        break
                finally:
                    container.close()
            except Exception:
                continue
            if target_sample_rate:
                break
        if not target_sample_rate:
            self.logger.warning("Mixdown skipped - no decodable audio frames found")
            return
        # Build PyAV filter graph:
        # N abuffer (s32/stereo)
        #   -> optional adelay per input (for alignment)
        #   -> amix (s32)
        #   -> aformat(s16)
        #   -> sink
        graph = av.filter.Graph()
        inputs = []
        valid_track_datas = [d for d in track_datas if d]
        # Align offsets list with the filtered inputs (skip empties)
        input_offsets_seconds = None
        if offsets_seconds is not None:
            input_offsets_seconds = [
                offsets_seconds[i] for i, d in enumerate(track_datas) if d
            ]
        for idx, data in enumerate(valid_track_datas):
            args = (
                f"time_base=1/{target_sample_rate}:"
                f"sample_rate={target_sample_rate}:"
                f"sample_fmt=s32:"
                f"channel_layout=stereo"
            )
            in_ctx = graph.add("abuffer", args=args, name=f"in{idx}")
            inputs.append(in_ctx)
        if not inputs:
            self.logger.warning("Mixdown skipped - no valid inputs for graph")
            return
        mixer = graph.add("amix", args=f"inputs={len(inputs)}:normalize=0", name="mix")
        fmt = graph.add(
            "aformat",
            args=(
                f"sample_fmts=s32:channel_layouts=stereo:sample_rates={target_sample_rate}"
            ),
            name="fmt",
        )
        sink = graph.add("abuffersink", name="out")
        # Optional per-input delay before mixing
        delays_ms: list[int] = []
        if input_offsets_seconds is not None:
            base = min(input_offsets_seconds) if input_offsets_seconds else 0.0
            delays_ms = [
                max(0, int(round((o - base) * 1000))) for o in input_offsets_seconds
            ]
        else:
            delays_ms = [0 for _ in inputs]
        for idx, in_ctx in enumerate(inputs):
            delay_ms = delays_ms[idx] if idx < len(delays_ms) else 0
            if delay_ms > 0:
                # adelay requires one value per channel; use same for stereo
                adelay = graph.add(
                    "adelay",
                    args=f"delays={delay_ms}|{delay_ms}:all=1",
                    name=f"delay{idx}",
                )
                in_ctx.link_to(adelay)
                adelay.link_to(mixer, 0, idx)
            else:
                in_ctx.link_to(mixer, 0, idx)
        mixer.link_to(fmt)
        fmt.link_to(sink)
        graph.configure()
        # Open containers for decoding
        containers = []
        for i, d in enumerate(valid_track_datas):
            try:
                c = av.open(io.BytesIO(d))
                containers.append(c)
            except Exception as e:
                self.logger.warning(
                    "Mixdown: failed to open container", input=i, error=str(e)
                )
                containers.append(None)
        # Filter out Nones for decoders
        containers = [c for c in containers if c is not None]
        decoders = [c.decode(audio=0) for c in containers]
        active = [True] * len(decoders)
        # Per-input resamplers to enforce s32/stereo at the same rate (no resample of rate)
        resamplers = [
            AudioResampler(format="s32", layout="stereo", rate=target_sample_rate)
            for _ in decoders
        ]
        try:
            # Round-robin feed frames into graph, pull mixed frames as they become available
            while any(active):
                for i, (dec, is_active) in enumerate(zip(decoders, active)):
                    if not is_active:
                        continue
                    try:
                        frame = next(dec)
                    except StopIteration:
                        active[i] = False
                        continue
                    # Enforce same sample rate; convert format/layout to s16/stereo (no resample)
                    if frame.sample_rate != target_sample_rate:
                        # Skip frames with differing rate
                        continue
                    out_frames = resamplers[i].resample(frame) or []
                    for rf in out_frames:
                        rf.sample_rate = target_sample_rate
                        rf.time_base = Fraction(1, target_sample_rate)
                        inputs[i].push(rf)
                    # Drain available mixed frames
                    while True:
                        try:
                            mixed = sink.pull()
                        except Exception:
                            break
                        mixed.sample_rate = target_sample_rate
                        mixed.time_base = Fraction(1, target_sample_rate)
                        await writer.push(mixed)
            # Signal EOF to inputs and drain remaining
            for in_ctx in inputs:
                in_ctx.push(None)
            while True:
                try:
                    mixed = sink.pull()
                except Exception:
                    break
                mixed.sample_rate = target_sample_rate
                mixed.time_base = Fraction(1, target_sample_rate)
                await writer.push(mixed)
        finally:
            for c in containers:
                c.close()
    async def set_status(self, transcript_id: str, status: TranscriptStatus):
        async with self.lock_transaction():
            return await transcripts_controller.set_status(transcript_id, status)
    async def process(self, bucket_name: str, track_keys: list[str]):
        transcript = await self.get_transcript()
        s3 = boto3.client(
            "s3",
            region_name=settings.RECORDING_STORAGE_AWS_REGION,
            aws_access_key_id=settings.RECORDING_STORAGE_AWS_ACCESS_KEY_ID,
            aws_secret_access_key=settings.RECORDING_STORAGE_AWS_SECRET_ACCESS_KEY,
        )
        storage = get_transcripts_storage()
        # Pre-download bytes for all tracks for mixing and transcription
        track_datas: list[bytes] = []
        for key in track_keys:
            try:
                obj = s3.get_object(Bucket=bucket_name, Key=key)
                track_datas.append(obj["Body"].read())
            except Exception as e:
                self.logger.warning(
                    "Skipping track - cannot read S3 object", key=key, error=str(e)
                )
                track_datas.append(b"")
        # REMOVED: Filename offset extraction - not needed anymore!
        # We use stream metadata start_time for padding instead
        # Get stream metadata start_times for mixing (still useful for mixdown)
        stream_start_times: list[float] = []
        for data in track_datas:
            if not data:
                stream_start_times.append(0.0)
                continue
            container = av.open(io.BytesIO(data))
            try:
                audio_stream = container.streams.audio[0]
                if (
                    audio_stream.start_time is not None
                    and audio_stream.time_base is not None
                ):
                    start_time = float(audio_stream.start_time * audio_stream.time_base)
                else:
                    start_time = 0.0
                stream_start_times.append(start_time)
            finally:
                container.close()
        # Mixdown all available tracks into transcript.audio_mp3_filename, using stream metadata offsets
        try:
            mp3_writer = AudioFileWriterProcessor(
                path=str(transcript.audio_mp3_filename)
            )
            await self.mixdown_tracks(track_datas, mp3_writer, stream_start_times)
            await mp3_writer.flush()
        except Exception as e:
            self.logger.error("Mixdown failed", error=str(e))
        # PAD TRACKS BEFORE TRANSCRIPTION - THIS IS THE KEY FIX!
        padded_track_urls: list[str] = []
        for idx, data in enumerate(track_datas):
            if not data:
                padded_track_urls.append("")
                continue
            _, padded_url = await self.pad_track_for_transcription(data, idx, storage)
            padded_track_urls.append(padded_url)
            self.logger.info(f"Padded track {idx} for transcription: {padded_url}")
        # Transcribe PADDED tracks - timestamps will be automatically correct!
        speaker_transcripts: list[TranscriptType] = []
        for idx, padded_url in enumerate(padded_track_urls):
            if not padded_url:
                continue
            try:
                # Transcribe the PADDED track
                t = await self.transcribe_file(padded_url, transcript.source_language)
            except Exception as e:
                self.logger.error(
                    "Transcription via default backend failed, trying local whisper",
                    track_idx=idx,
                    url=padded_url,
                    error=str(e),
                )
                try:
                    fallback = FileTranscriptAutoProcessor(name="whisper")
                    result = None
                    async def capture_result(r):
                        nonlocal result
                        result = r
                    fallback.on(capture_result)
                    await fallback.push(
                        FileTranscriptInput(
                            audio_url=padded_url, language=transcript.source_language
                        )
                    )
                    await fallback.flush()
                    if not result:
                        raise Exception("No transcript captured in fallback")
                    t = result
                except Exception as e2:
                    self.logger.error(
                        "Skipping track - transcription failed after fallback",
                        track_idx=idx,
                        url=padded_url,
                        error=str(e2),
                    )
                    continue
            if not t.words:
                continue
            # NO OFFSET ADJUSTMENT NEEDED!
            # Timestamps are already correct because we transcribed padded tracks
            # Just set speaker ID
            for w in t.words:
                w.speaker = idx
            speaker_transcripts.append(t)
            self.logger.info(
                f"Track {idx} transcribed successfully with {len(t.words)} words",
                track_idx=idx,
            )
        if not speaker_transcripts:
            raise Exception("No valid track transcriptions")
        # Merge all words and sort by timestamp
        merged_words = []
        for t in speaker_transcripts:
            merged_words.extend(t.words)
        merged_words.sort(
            key=lambda w: w.start if hasattr(w, "start") and w.start is not None else 0
        )
        merged_transcript = TranscriptType(words=merged_words, translation=None)
        await transcripts_controller.append_event(
            transcript,
            event="TRANSCRIPT",
            data=TranscriptText(
                text=merged_transcript.text, translation=merged_transcript.translation
            ),
        )
        topics = await self.detect_topics(merged_transcript, transcript.target_language)
        await asyncio.gather(
            self.generate_title(topics),
            self.generate_summaries(topics),
            return_exceptions=False,
        )
        await self.set_status(transcript.id, "ended")
    async def transcribe_file(self, audio_url: str, language: str) -> TranscriptType:
        processor = FileTranscriptAutoProcessor()
        input_data = FileTranscriptInput(audio_url=audio_url, language=language)
        result: TranscriptType | None = None
        async def capture_result(transcript):
            nonlocal result
            result = transcript
        processor.on(capture_result)
        await processor.push(input_data)
        await processor.flush()
        if not result:
            raise ValueError("No transcript captured")
        return result
    async def detect_topics(
        self, transcript: TranscriptType, target_language: str
    ) -> list[TitleSummary]:
        chunk_size = 300
        topics: list[TitleSummary] = []
        async def on_topic(topic: TitleSummary):
            topics.append(topic)
            return await self.on_topic(topic)
        topic_detector = TranscriptTopicDetectorProcessor(callback=on_topic)
        topic_detector.set_pipeline(self.empty_pipeline)
        for i in range(0, len(transcript.words), chunk_size):
            chunk_words = transcript.words[i : i + chunk_size]
            if not chunk_words:
                continue
            chunk_transcript = TranscriptType(
                words=chunk_words, translation=transcript.translation
            )
            await topic_detector.push(chunk_transcript)
        await topic_detector.flush()
        return topics
    async def generate_title(self, topics: list[TitleSummary]):
        if not topics:
            self.logger.warning("No topics for title generation")
            return
        processor = TranscriptFinalTitleProcessor(callback=self.on_title)
        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
    async def generate_summaries(self, topics: list[TitleSummary]):
        if not topics:
            self.logger.warning("No topics for summary generation")
            return
        transcript = await self.get_transcript()
        processor = TranscriptFinalSummaryProcessor(
            transcript=transcript,
            callback=self.on_long_summary,
            on_short_summary=self.on_short_summary,
        )
        processor.set_pipeline(self.empty_pipeline)
        for topic in topics:
            await processor.push(topic)
        await processor.flush()
@shared_task
@asynctask
 async def task_pipeline_multitrack_process(
    *, transcript_id: str, bucket_name: str, track_keys: list[str]
 ):
    pipeline = PipelineMainMultitrack(transcript_id=transcript_id)
    try:
        await pipeline.set_status(transcript_id, "processing")
        await pipeline.process(bucket_name, track_keys)
    except Exception:
        await pipeline.set_status(transcript_id, "error")
        raise
    post_chain = chain(
        task_cleanup_consent.si(transcript_id=transcript_id),
        task_pipeline_post_to_zulip.si(transcript_id=transcript_id),
        task_send_webhook_if_needed.si(transcript_id=transcript_id),
    )
    post_chain.delay()
--- a/server/reflector/pipelines/topic_processing.py
+++ b/server/reflector/pipelines/topic_processing.py
@@ -1,113 +0,0 @@
 """
 Topic processing utilities
 ==========================
 Shared topic detection, title generation, and summarization logic
 used across file and multitrack pipelines.
 """
 from typing import Callable
 import structlog
 from reflector.db.transcripts import Transcript
 from reflector.processors import (
    TranscriptFinalSummaryProcessor,
    TranscriptFinalTitleProcessor,
    TranscriptTopicDetectorProcessor,
 )
 from reflector.processors.types import TitleSummary
 from reflector.processors.types import Transcript as TranscriptType
 class EmptyPipeline:
    def __init__(self, logger: structlog.BoundLogger):
        self.logger = logger
    def get_pref(self, k, d=None):
        return d
    async def emit(self, event):
        pass
 async def detect_topics(
    transcript: TranscriptType,
    target_language: str,
    *,
    on_topic_callback: Callable,
    empty_pipeline: EmptyPipeline,
 ) -> list[TitleSummary]:
    chunk_size = 300
    topics: list[TitleSummary] = []
    async def on_topic(topic: TitleSummary):
        topics.append(topic)
        return await on_topic_callback(topic)
    topic_detector = TranscriptTopicDetectorProcessor(callback=on_topic)
    topic_detector.set_pipeline(empty_pipeline)
    for i in range(0, len(transcript.words), chunk_size):
        chunk_words = transcript.words[i : i + chunk_size]
        if not chunk_words:
            continue
        chunk_transcript = TranscriptType(
            words=chunk_words, translation=transcript.translation
        )
        await topic_detector.push(chunk_transcript)
    await topic_detector.flush()
    return topics
 async def generate_title(
    topics: list[TitleSummary],
    *,
    on_title_callback: Callable,
    empty_pipeline: EmptyPipeline,
    logger: structlog.BoundLogger,
 ):
    if not topics:
        logger.warning("No topics for title generation")
        return
    processor = TranscriptFinalTitleProcessor(callback=on_title_callback)
    processor.set_pipeline(empty_pipeline)
    for topic in topics:
        await processor.push(topic)
    await processor.flush()
 async def generate_summaries(
    topics: list[TitleSummary],
    transcript: Transcript,
    *,
    on_long_summary_callback: Callable,
    on_short_summary_callback: Callable,
    on_action_items_callback: Callable,
    empty_pipeline: EmptyPipeline,
    logger: structlog.BoundLogger,
 ):
    if not topics:
        logger.warning("No topics for summary generation")
        return
    processor_kwargs = {
        "transcript": transcript,
        "callback": on_long_summary_callback,
        "on_short_summary": on_short_summary_callback,
        "on_action_items": on_action_items_callback,
    }
    processor = TranscriptFinalSummaryProcessor(**processor_kwargs)
    processor.set_pipeline(empty_pipeline)
    for topic in topics:
        await processor.push(topic)
    await processor.flush()
--- a/server/reflector/pipelines/transcription_helpers.py
+++ b/server/reflector/pipelines/transcription_helpers.py
@@ -1,34 +0,0 @@
 from reflector.processors.file_transcript import FileTranscriptInput
 from reflector.processors.file_transcript_auto import FileTranscriptAutoProcessor
 from reflector.processors.types import Transcript as TranscriptType
 async def transcribe_file_with_processor(
    audio_url: str,
    language: str,
    processor_name: str | None = None,
 ) -> TranscriptType:
    processor = (
        FileTranscriptAutoProcessor(name=processor_name)
        if processor_name
        else FileTranscriptAutoProcessor()
    )
    input_data = FileTranscriptInput(audio_url=audio_url, language=language)
    result: TranscriptType | None = None
    async def capture_result(transcript):
        nonlocal result
        result = transcript
    processor.on(capture_result)
    await processor.push(input_data)
    await processor.flush()
    if not result:
        processor_label = processor_name or "default"
        raise ValueError(
            f"No transcript captured from {processor_label} processor for audio: {audio_url}"
        )
    return result
--- a/server/reflector/platform_types.py
+++ b/server/reflector/platform_types.py
@@ -0,0 +1,9 @@
 """Platform type definitions.
 This module exists solely to define the Platform literal type without any imports,
 preventing circular import issues when used across the codebase.
 """
 from typing import Literal
 Platform = Literal["whereby", "daily"]
--- a/server/reflector/processors/file_transcript_modal.py
+++ b/server/reflector/processors/file_transcript_modal.py
@@ -56,16 +56,6 @@ class FileTranscriptModalProcessor(FileTranscriptProcessor):
                },
                follow_redirects=True,
            )
            if response.status_code != 200:
                error_body = response.text
                self.logger.error(
                    "Modal API error",
                    audio_url=data.audio_url,
                    status_code=response.status_code,
                    error_body=error_body,
                )
            response.raise_for_status()
            result = response.json()
--- a/server/reflector/processors/summary/summary_builder.py
+++ b/server/reflector/processors/summary/summary_builder.py
@@ -96,36 +96,6 @@ RECAP_PROMPT = dedent(
    """
 ).strip()
 ACTION_ITEMS_PROMPT = dedent(
    """
    Identify action items from this meeting transcript. Your goal is to identify what was decided and what needs to happen next.
    Look for:
    1. **Decisions Made**: Any decisions, choices, or conclusions reached during the meeting. For each decision:
       - What was decided? (be specific)
       - Who made the decision or was involved? (use actual participant names)
       - Why was this decision made? (key factors, reasoning, or rationale)
    2. **Next Steps / Action Items**: Any tasks, follow-ups, or actions that were mentioned or assigned. For each action item:
       - What specific task needs to be done? (be concrete and actionable)
       - Who is responsible? (use actual participant names if mentioned, or "team" if unclear)
       - When is it due? (any deadlines, timeframes, or "by next meeting" type commitments)
       - What context is needed? (any additional details that help understand the task)
    Guidelines:
    - Be thorough and identify all action items, even if they seem minor
    - Include items that were agreed upon, assigned, or committed to
    - Include decisions even if they seem obvious or implicit
    - If someone says "I'll do X" or "We should do Y", that's an action item
    - If someone says "Let's go with option A", that's a decision
    - Use the exact participant names from the transcript
    - If no participant name is mentioned, you can leave assigned_to/decided_by as null
    Only return empty lists if the transcript contains NO decisions and NO action items whatsoever.
    """
 ).strip()
 STRUCTURED_RESPONSE_PROMPT_TEMPLATE = dedent(
    """
    Based on the following analysis, provide the information in the requested JSON format:
@@ -185,53 +155,6 @@ class SubjectsResponse(BaseModel):
    )
 class ActionItem(BaseModel):
    """A single action item from the meeting"""
    task: str = Field(description="The task or action item to be completed")
    assigned_to: str | None = Field(
        default=None, description="Person or team assigned to this task (name)"
    )
    assigned_to_participant_id: str | None = Field(
        default=None, description="Participant ID if assigned_to matches a participant"
    )
    deadline: str | None = Field(
        default=None, description="Deadline or timeframe mentioned for this task"
    )
    context: str | None = Field(
        default=None, description="Additional context or notes about this task"
    )
 class Decision(BaseModel):
    """A decision made during the meeting"""
    decision: str = Field(description="What was decided")
    rationale: str | None = Field(
        default=None,
        description="Reasoning or key factors that influenced this decision",
    )
    decided_by: str | None = Field(
        default=None, description="Person or group who made the decision (name)"
    )
    decided_by_participant_id: str | None = Field(
        default=None, description="Participant ID if decided_by matches a participant"
    )
 class ActionItemsResponse(BaseModel):
    """Pydantic model for identified action items"""
    decisions: list[Decision] = Field(
        default_factory=list,
        description="List of decisions made during the meeting",
    )
    next_steps: list[ActionItem] = Field(
        default_factory=list,
        description="List of action items and next steps to be taken",
    )
 class SummaryBuilder:
    def __init__(self, llm: LLM, filename: str | None = None, logger=None) -> None:
        self.transcript: str | None = None
@@ -242,9 +165,6 @@ class SummaryBuilder:
        self.llm: LLM = llm
        self.model_name: str = llm.model_name
        self.logger = logger or structlog.get_logger()
        self.participant_instructions: str | None = None
        self.action_items: ActionItemsResponse | None = None
        self.participant_name_to_id: dict[str, str] = {}
        if filename:
            self.read_transcript_from_file(filename)
@@ -268,81 +188,17 @@ class SummaryBuilder:
        self.llm = llm
    async def _get_structured_response(
-        self,
+        self, prompt: str, output_cls: Type[T], tone_name: str | None = None
        prompt: str,
        output_cls: Type[T],
        tone_name: str | None = None,
        timeout: int | None = None,
    ) -> T:
        """Generic function to get structured output from LLM for non-function-calling models."""
        enhanced_prompt = self._enhance_prompt_with_participants(prompt)
        return await self.llm.get_structured_response(
-            enhanced_prompt,
+            prompt, [self.transcript], output_cls, tone_name=tone_name
            [self.transcript],
            output_cls,
            tone_name=tone_name,
            timeout=timeout,
        )
    async def _get_response(
        self, prompt: str, texts: list[str], tone_name: str | None = None
    ) -> str:
        """Get text response with automatic participant instructions injection."""
        enhanced_prompt = self._enhance_prompt_with_participants(prompt)
        return await self.llm.get_response(enhanced_prompt, texts, tone_name=tone_name)
    def _enhance_prompt_with_participants(self, prompt: str) -> str:
        """Add participant instructions to any prompt if participants are known."""
        if self.participant_instructions:
            self.logger.debug("Adding participant instructions to prompt")
            return f"{prompt}\n\n{self.participant_instructions}"
        return prompt
    # ----------------------------------------------------------------------------
    # Participants
    # ----------------------------------------------------------------------------
    def set_known_participants(
        self,
        participants: list[str],
        participant_name_to_id: dict[str, str] | None = None,
    ) -> None:
        """
        Set known participants directly without LLM identification.
        This is used when participants are already identified and stored.
        They are appended at the end of the transcript, providing more context for the assistant.
        Args:
            participants: List of participant names
            participant_name_to_id: Optional mapping of participant names to their IDs
        """
        if not participants:
            self.logger.warning("No participants provided")
            return
        self.logger.info(
            "Using known participants",
            participants=participants,
        )
        if participant_name_to_id:
            self.participant_name_to_id = participant_name_to_id
        participants_md = self.format_list_md(participants)
        self.transcript += f"\n\n# Participants\n\n{participants_md}"
        participants_list = ", ".join(participants)
        self.participant_instructions = dedent(
            f"""
            # IMPORTANT: Participant Names
            The following participants are identified in this conversation: {participants_list}
            You MUST use these specific participant names when referring to people in your response.
            Do NOT use generic terms like "a participant", "someone", "attendee", "Speaker 1", "Speaker 2", etc.
            Always refer to people by their actual names (e.g., "John suggested..." not "A participant suggested...").
            """
        ).strip()
    async def identify_participants(self) -> None:
        """
        From a transcript, try to identify the participants using TreeSummarize with structured output.
@@ -376,19 +232,6 @@ class SummaryBuilder:
            if unique_participants:
                participants_md = self.format_list_md(unique_participants)
                self.transcript += f"\n\n# Participants\n\n{participants_md}"
                # Set instructions that will be automatically added to all prompts
                participants_list = ", ".join(unique_participants)
                self.participant_instructions = dedent(
                    f"""
                    # IMPORTANT: Participant Names
                    The following participants are identified in this conversation: {participants_list}
                    You MUST use these specific participant names when referring to people in your response.
                    Do NOT use generic terms like "a participant", "someone", "attendee", "Speaker 1", "Speaker 2", etc.
                    Always refer to people by their actual names (e.g., "John suggested..." not "A participant suggested...").
                    """
                ).strip()
            else:
                self.logger.warning("No participants identified in the transcript")
@@ -475,13 +318,13 @@ class SummaryBuilder:
        for subject in self.subjects:
            detailed_prompt = DETAILED_SUBJECT_PROMPT_TEMPLATE.format(subject=subject)
-            detailed_response = await self._get_response(
+            detailed_response = await self.llm.get_response(
                detailed_prompt, [self.transcript], tone_name="Topic assistant"
            )
            paragraph_prompt = PARAGRAPH_SUMMARY_PROMPT
-            paragraph_response = await self._get_response(
+            paragraph_response = await self.llm.get_response(
                paragraph_prompt, [str(detailed_response)], tone_name="Topic summarizer"
            )
@@ -502,99 +345,13 @@ class SummaryBuilder:
        recap_prompt = RECAP_PROMPT
-        recap_response = await self._get_response(
+        recap_response = await self.llm.get_response(
            recap_prompt, [summaries_text], tone_name="Recap summarizer"
        )
        self.recap = str(recap_response)
        self.logger.info(f"Quick recap: {self.recap}")
    def _map_participant_names_to_ids(
        self, response: ActionItemsResponse
    ) -> ActionItemsResponse:
        """Map participant names in action items to participant IDs."""
        if not self.participant_name_to_id:
            return response
        decisions = []
        for decision in response.decisions:
            new_decision = decision.model_copy()
            if (
                decision.decided_by
                and decision.decided_by in self.participant_name_to_id
            ):
                new_decision.decided_by_participant_id = self.participant_name_to_id[
                    decision.decided_by
                ]
            decisions.append(new_decision)
        next_steps = []
        for item in response.next_steps:
            new_item = item.model_copy()
            if item.assigned_to and item.assigned_to in self.participant_name_to_id:
                new_item.assigned_to_participant_id = self.participant_name_to_id[
                    item.assigned_to
                ]
            next_steps.append(new_item)
        return ActionItemsResponse(decisions=decisions, next_steps=next_steps)
    async def identify_action_items(self) -> ActionItemsResponse | None:
        """Identify action items (decisions and next steps) from the transcript."""
        self.logger.info("--- identify action items using TreeSummarize")
        if not self.transcript:
            self.logger.warning(
                "No transcript available for action items identification"
            )
            self.action_items = None
            return None
        action_items_prompt = ACTION_ITEMS_PROMPT
        try:
            response = await self._get_structured_response(
                action_items_prompt,
                ActionItemsResponse,
                tone_name="Action item identifier",
                timeout=settings.LLM_STRUCTURED_RESPONSE_TIMEOUT,
            )
            response = self._map_participant_names_to_ids(response)
            self.action_items = response
            self.logger.info(
                f"Identified {len(response.decisions)} decisions and {len(response.next_steps)} action items",
                decisions_count=len(response.decisions),
                next_steps_count=len(response.next_steps),
            )
            if response.decisions:
                self.logger.debug(
                    "Decisions identified",
                    decisions=[d.decision for d in response.decisions],
                )
            if response.next_steps:
                self.logger.debug(
                    "Action items identified",
                    tasks=[item.task for item in response.next_steps],
                )
            if not response.decisions and not response.next_steps:
                self.logger.warning(
                    "No action items identified from transcript",
                    transcript_length=len(self.transcript),
                )
            return response
        except Exception as e:
            self.logger.error(
                f"Error identifying action items: {e}",
                exc_info=True,
            )
            self.action_items = None
            return None
    async def generate_summary(self, only_subjects: bool = False) -> None:
        """
        Generate summary by extracting subjects, creating summaries for each, and generating a recap.
@@ -606,7 +363,6 @@ class SummaryBuilder:
        await self.generate_subject_summaries()
        await self.generate_recap()
        await self.identify_action_items()
    # ----------------------------------------------------------------------------
    # Markdown
@@ -709,6 +465,8 @@ if __name__ == "__main__":
        if args.summary:
            await sm.generate_summary()
        # Note: action items generation has been removed
        print("")
        print("-" * 80)
        print("")
--- a/server/reflector/processors/transcript_final_summary.py
+++ b/server/reflector/processors/transcript_final_summary.py
@@ -1,12 +1,7 @@
 from reflector.llm import LLM
 from reflector.processors.base import Processor
 from reflector.processors.summary.summary_builder import SummaryBuilder
-from reflector.processors.types import (
+from reflector.processors.types import FinalLongSummary, FinalShortSummary, TitleSummary
    ActionItems,
    FinalLongSummary,
    FinalShortSummary,
    TitleSummary,
 )
 from reflector.settings import settings
@@ -31,30 +26,7 @@ class TranscriptFinalSummaryProcessor(Processor):
    async def get_summary_builder(self, text) -> SummaryBuilder:
        builder = SummaryBuilder(self.llm, logger=self.logger)
        builder.set_transcript(text)
        if self.transcript and self.transcript.participants:
            participant_names = [p.name for p in self.transcript.participants if p.name]
            if participant_names:
                self.logger.info(
                    f"Using {len(participant_names)} known participants from transcript"
                )
                participant_name_to_id = {
                    p.name: p.id
                    for p in self.transcript.participants
                    if p.name and p.id
                }
                builder.set_known_participants(
                    participant_names, participant_name_to_id=participant_name_to_id
                )
            else:
                self.logger.info(
                    "Participants field exists but is empty, identifying participants"
                )
        await builder.identify_participants()
        else:
            self.logger.info("No participants stored, identifying participants")
            await builder.identify_participants()
        await builder.generate_summary()
        return builder
@@ -73,31 +45,22 @@ class TranscriptFinalSummaryProcessor(Processor):
            self.logger.warning("No summary to output")
            return
        # build the speakermap from the transcript
        speakermap = {}
        if self.transcript:
            speakermap = {
-                p.speaker: p.name
+                participant["speaker"]: participant["name"]
-                for p in (self.transcript.participants or [])
+                for participant in self.transcript.participants
                if p.speaker is not None and p.name
            }
            self.logger.info(
                f"Built speaker map with {len(speakermap)} participants",
                speakermap=speakermap,
            )
        # build the transcript as a single string
        # XXX: unsure if the participants name as replaced directly in speaker ?
        text_transcript = []
        unique_speakers = set()
        for topic in self.chunks:
            for segment in topic.transcript.as_segments():
                name = speakermap.get(segment.speaker, f"Speaker {segment.speaker}")
                unique_speakers.add((segment.speaker, name))
                text_transcript.append(f"{name}: {segment.text}")
        self.logger.info(
            f"Built transcript with {len(unique_speakers)} unique speakers",
            speakers=list(unique_speakers),
        )
        text_transcript = "\n".join(text_transcript)
        last_chunk = self.chunks[-1]
@@ -118,9 +81,4 @@ class TranscriptFinalSummaryProcessor(Processor):
            )
            await self.emit(final_short_summary, name="short_summary")
        if self.builder and self.builder.action_items:
            action_items = self.builder.action_items.model_dump()
            action_items = ActionItems(action_items=action_items)
            await self.emit(action_items, name="action_items")
        await self.emit(final_long_summary)
--- a/server/reflector/processors/transcript_topic_detector.py
+++ b/server/reflector/processors/transcript_topic_detector.py
@@ -1,6 +1,6 @@
 from textwrap import dedent
-from pydantic import AliasChoices, BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 from reflector.llm import LLM
 from reflector.processors.base import Processor
@@ -34,13 +34,13 @@ TOPIC_PROMPT = dedent(
 class TopicResponse(BaseModel):
    """Structured response for topic detection"""
    model_config = ConfigDict(populate_by_name=True)
    title: str = Field(
-        description="A descriptive title for the topic being discussed",
+        description="A descriptive title for the topic being discussed", alias="Title"
        validation_alias=AliasChoices("title", "Title"),
    )
    summary: str = Field(
-        description="A concise 1-2 sentence summary of the discussion",
+        description="A concise 1-2 sentence summary of the discussion", alias="Summary"
        validation_alias=AliasChoices("summary", "Summary"),
    )
@@ -78,11 +78,7 @@ class TranscriptTopicDetectorProcessor(Processor):
        """
        prompt = TOPIC_PROMPT.format(text=text)
        response = await self.llm.get_structured_response(
-            prompt,
+            prompt, [text], TopicResponse, tone_name="Topic analyzer"
            [text],
            TopicResponse,
            tone_name="Topic analyzer",
            timeout=settings.LLM_STRUCTURED_RESPONSE_TIMEOUT,
        )
        return response
--- a/server/reflector/processors/types.py
+++ b/server/reflector/processors/types.py
@@ -1,7 +1,6 @@
 import io
 import re
 import tempfile
 from collections import defaultdict
 from pathlib import Path
 from typing import Annotated, TypedDict
@@ -17,17 +16,6 @@ class DiarizationSegment(TypedDict):
 PUNC_RE = re.compile(r"[.;:?!…]")
 SENTENCE_END_RE = re.compile(r"[.?!…]$")
 # Max segment length for words_to_segments() - breaks on any punctuation (. ; : ? ! …)
 # when segment exceeds this limit. Used for non-multitrack recordings.
 MAX_SEGMENT_CHARS = 120
 # Max segment length for words_to_segments_by_sentence() - only breaks on sentence-ending
 # punctuation (. ? ! …) when segment exceeds this limit. Higher threshold allows complete
 # sentences in multitrack recordings where speakers overlap.
 # similar number to server/reflector/processors/transcript_liner.py
 MAX_SENTENCE_SEGMENT_CHARS = 1000
 class AudioFile(BaseModel):
@@ -88,6 +76,7 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
    # but separate if the speaker changes, or if the punctuation is a . , ; : ? !
    segments = []
    current_segment = None
    MAX_SEGMENT_LENGTH = 120
    for word in words:
        if current_segment is None:
@@ -117,7 +106,7 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
        current_segment.end = word.end
        have_punc = PUNC_RE.search(word.text)
-        if have_punc and (len(current_segment.text) > MAX_SEGMENT_CHARS):
+        if have_punc and (len(current_segment.text) > MAX_SEGMENT_LENGTH):
            segments.append(current_segment)
            current_segment = None
@@ -127,70 +116,6 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
    return segments
 def words_to_segments_by_sentence(words: list[Word]) -> list[TranscriptSegment]:
    """Group words by speaker, then split into sentences.
    For multitrack recordings where words from different speakers are interleaved
    by timestamp, this function first groups all words by speaker, then creates
    segments based on sentence boundaries within each speaker's words.
    This produces cleaner output than words_to_segments() which breaks on every
    speaker change, resulting in many tiny segments when speakers overlap.
    """
    if not words:
        return []
    # Group words by speaker, preserving order within each speaker
    by_speaker: dict[int, list[Word]] = defaultdict(list)
    for w in words:
        by_speaker[w.speaker].append(w)
    segments: list[TranscriptSegment] = []
    for speaker, speaker_words in by_speaker.items():
        current_text = ""
        current_start: float | None = None
        current_end: float = 0.0
        for word in speaker_words:
            if current_start is None:
                current_start = word.start
            current_text += word.text
            current_end = word.end
            # Check for sentence end or max length
            is_sentence_end = SENTENCE_END_RE.search(word.text.strip())
            is_too_long = len(current_text) >= MAX_SENTENCE_SEGMENT_CHARS
            if is_sentence_end or is_too_long:
                segments.append(
                    TranscriptSegment(
                        text=current_text,
                        start=current_start,
                        end=current_end,
                        speaker=speaker,
                    )
                )
                current_text = ""
                current_start = None
        # Flush remaining words for this speaker
        if current_text and current_start is not None:
            segments.append(
                TranscriptSegment(
                    text=current_text,
                    start=current_start,
                    end=current_end,
                    speaker=speaker,
                )
            )
    # Sort segments by start time
    segments.sort(key=lambda s: s.start)
    return segments
 class Transcript(BaseModel):
    translation: str | None = None
    words: list[Word] = []
@@ -229,9 +154,7 @@ class Transcript(BaseModel):
            word.start += offset
            word.end += offset
-    def as_segments(self, is_multitrack: bool = False) -> list[TranscriptSegment]:
+    def as_segments(self) -> list[TranscriptSegment]:
        if is_multitrack:
            return words_to_segments_by_sentence(self.words)
        return words_to_segments(self.words)
@@ -264,10 +187,6 @@ class FinalShortSummary(BaseModel):
    duration: float
 class ActionItems(BaseModel):
    action_items: dict  # JSON-serializable dict from ActionItemsResponse
 class FinalTitle(BaseModel):
    title: str
--- a/server/reflector/schemas/platform.py
+++ b/server/reflector/schemas/platform.py
@@ -1,5 +0,0 @@
 from typing import Literal
 Platform = Literal["whereby", "daily"]
 WHEREBY_PLATFORM: Platform = "whereby"
 DAILY_PLATFORM: Platform = "daily"
--- a/server/reflector/schemas/transcript_formats.py
+++ b/server/reflector/schemas/transcript_formats.py
@@ -1,17 +0,0 @@
 """Schema definitions for transcript format types and segments."""
 from typing import Literal
 from pydantic import BaseModel
 TranscriptFormat = Literal["text", "text-timestamped", "webvtt-named", "json"]
 class TranscriptSegment(BaseModel):
    """A single transcript segment with speaker and timing information."""
    speaker: int
    speaker_name: str
    text: str
    start: float
    end: float
--- a/server/reflector/services/transcript_process.py
+++ b/server/reflector/services/transcript_process.py
@@ -1,297 +0,0 @@
 """
 Transcript processing service - shared logic for HTTP endpoints and Celery tasks.
 This module provides result-based error handling that works in both contexts:
 - HTTP endpoint: converts errors to HTTPException
 - Celery task: converts errors to Exception
 """
 from dataclasses import dataclass
 from typing import Literal, Union, assert_never
 import celery
 from celery.result import AsyncResult
 from hatchet_sdk.clients.rest.exceptions import ApiException
 from hatchet_sdk.clients.rest.models import V1TaskStatus
 from reflector.db.recordings import recordings_controller
 from reflector.db.rooms import rooms_controller
 from reflector.db.transcripts import Transcript, transcripts_controller
 from reflector.hatchet.client import HatchetClientManager
 from reflector.logger import logger
 from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
 from reflector.pipelines.main_multitrack_pipeline import (
    task_pipeline_multitrack_process,
 )
 from reflector.settings import settings
 from reflector.utils.string import NonEmptyString
@dataclass
 class ProcessError:
    detail: NonEmptyString
@dataclass
 class FileProcessingConfig:
    transcript_id: NonEmptyString
    mode: Literal["file"] = "file"
@dataclass
 class MultitrackProcessingConfig:
    transcript_id: NonEmptyString
    bucket_name: NonEmptyString
    track_keys: list[str]
    recording_id: NonEmptyString | None = None
    room_id: NonEmptyString | None = None
    mode: Literal["multitrack"] = "multitrack"
 ProcessingConfig = Union[FileProcessingConfig, MultitrackProcessingConfig]
 PrepareResult = Union[ProcessingConfig, ProcessError]
@dataclass
 class ValidationOk:
    # transcript currently doesnt always have recording_id
    recording_id: NonEmptyString | None
    transcript_id: NonEmptyString
    room_id: NonEmptyString | None = None
@dataclass
 class ValidationLocked:
    detail: NonEmptyString
@dataclass
 class ValidationNotReady:
    detail: NonEmptyString
@dataclass
 class ValidationAlreadyScheduled:
    detail: NonEmptyString
 ValidationError = Union[
    ValidationNotReady, ValidationLocked, ValidationAlreadyScheduled
 ]
 ValidationResult = Union[ValidationOk, ValidationError]
@dataclass
 class DispatchOk:
    status: Literal["ok"] = "ok"
@dataclass
 class DispatchAlreadyRunning:
    status: Literal["already_running"] = "already_running"
 DispatchResult = Union[
    DispatchOk, DispatchAlreadyRunning, ProcessError, ValidationError
 ]
 async def validate_transcript_for_processing(
    transcript: Transcript,
 ) -> ValidationResult:
    if transcript.locked:
        return ValidationLocked(detail="Recording is locked")
    if transcript.status == "idle":
        return ValidationNotReady(detail="Recording is not ready for processing")
    # Check Celery tasks
    if task_is_scheduled_or_active(
        "reflector.pipelines.main_file_pipeline.task_pipeline_file_process",
        transcript_id=transcript.id,
    ) or task_is_scheduled_or_active(
        "reflector.pipelines.main_multitrack_pipeline.task_pipeline_multitrack_process",
        transcript_id=transcript.id,
    ):
        return ValidationAlreadyScheduled(detail="already running")
    # Check Hatchet workflows (if enabled)
    if settings.HATCHET_ENABLED and transcript.workflow_run_id:
        try:
            status = await HatchetClientManager.get_workflow_run_status(
                transcript.workflow_run_id
            )
            # If workflow is running or queued, don't allow new processing
            if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
                return ValidationAlreadyScheduled(
                    detail="Hatchet workflow already running"
                )
        except ApiException:
            # Workflow might be gone (404) or API issue - allow processing
            pass
    return ValidationOk(
        recording_id=transcript.recording_id,
        transcript_id=transcript.id,
        room_id=transcript.room_id,
    )
 async def prepare_transcript_processing(validation: ValidationOk) -> PrepareResult:
    """
    Determine processing mode from transcript/recording data.
    """
    bucket_name: str | None = None
    track_keys: list[str] | None = None
    recording_id: str | None = validation.recording_id
    if validation.recording_id:
        recording = await recordings_controller.get_by_id(validation.recording_id)
        if recording:
            bucket_name = recording.bucket_name
            track_keys = recording.track_keys
            if track_keys is not None and len(track_keys) == 0:
                return ProcessError(
                    detail="No track keys found, must be either > 0 or None",
                )
            if track_keys is not None and not bucket_name:
                return ProcessError(
                    detail="Bucket name must be specified",
                )
    if track_keys:
        return MultitrackProcessingConfig(
            bucket_name=bucket_name,  # type: ignore (validated above)
            track_keys=track_keys,
            transcript_id=validation.transcript_id,
            recording_id=recording_id,
            room_id=validation.room_id,
        )
    return FileProcessingConfig(
        transcript_id=validation.transcript_id,
    )
 async def dispatch_transcript_processing(
    config: ProcessingConfig, force: bool = False
 ) -> AsyncResult | None:
    """Dispatch transcript processing to appropriate backend (Hatchet or Celery).
    Returns AsyncResult for Celery tasks, None for Hatchet workflows.
    """
    if isinstance(config, MultitrackProcessingConfig):
        # Check if room has use_hatchet=True (overrides env vars)
        room_forces_hatchet = False
        if config.room_id:
            room = await rooms_controller.get_by_id(config.room_id)
            room_forces_hatchet = room.use_hatchet if room else False
        # Start durable workflow if enabled (Hatchet)
        # or if room has use_hatchet=True
        use_hatchet = settings.HATCHET_ENABLED or room_forces_hatchet
        if room_forces_hatchet:
            logger.info(
                "Room forces Hatchet workflow",
                room_id=config.room_id,
                transcript_id=config.transcript_id,
            )
        if use_hatchet:
            # First check if we can replay (outside transaction since it's read-only)
            transcript = await transcripts_controller.get_by_id(config.transcript_id)
            if transcript and transcript.workflow_run_id and not force:
                can_replay = await HatchetClientManager.can_replay(
                    transcript.workflow_run_id
                )
                if can_replay:
                    await HatchetClientManager.replay_workflow(
                        transcript.workflow_run_id
                    )
                    logger.info(
                        "Replaying Hatchet workflow",
                        workflow_id=transcript.workflow_run_id,
                    )
                    return None
            # Force: cancel old workflow if exists
            if force and transcript and transcript.workflow_run_id:
                await HatchetClientManager.cancel_workflow(transcript.workflow_run_id)
                logger.info(
                    "Cancelled old workflow (--force)",
                    workflow_id=transcript.workflow_run_id,
                )
                await transcripts_controller.update(
                    transcript, {"workflow_run_id": None}
                )
            # Re-fetch and check for concurrent dispatch (optimistic approach).
            # No database lock - worst case is duplicate dispatch, but Hatchet
            # workflows are idempotent so this is acceptable.
            transcript = await transcripts_controller.get_by_id(config.transcript_id)
            if transcript and transcript.workflow_run_id:
                # Another process started a workflow between validation and now
                try:
                    status = await HatchetClientManager.get_workflow_run_status(
                        transcript.workflow_run_id
                    )
                    if status in (V1TaskStatus.RUNNING, V1TaskStatus.QUEUED):
                        logger.info(
                            "Concurrent workflow detected, skipping dispatch",
                            workflow_id=transcript.workflow_run_id,
                        )
                        return None
                except ApiException:
                    # Workflow might be gone (404) or API issue - proceed with new workflow
                    pass
            workflow_id = await HatchetClientManager.start_workflow(
                workflow_name="DiarizationPipeline",
                input_data={
                    "recording_id": config.recording_id,
                    "tracks": [{"s3_key": k} for k in config.track_keys],
                    "bucket_name": config.bucket_name,
                    "transcript_id": config.transcript_id,
                    "room_id": config.room_id,
                },
                additional_metadata={
                    "transcript_id": config.transcript_id,
                    "recording_id": config.recording_id,
                    "daily_recording_id": config.recording_id,
                },
            )
            if transcript:
                await transcripts_controller.update(
                    transcript, {"workflow_run_id": workflow_id}
                )
            logger.info("Hatchet workflow dispatched", workflow_id=workflow_id)
            return None
        # Celery pipeline (durable workflows disabled)
        return task_pipeline_multitrack_process.delay(
            transcript_id=config.transcript_id,
            bucket_name=config.bucket_name,
            track_keys=config.track_keys,
        )
    elif isinstance(config, FileProcessingConfig):
        return task_pipeline_file_process.delay(transcript_id=config.transcript_id)
    else:
        assert_never(config)
 def task_is_scheduled_or_active(task_name: str, **kwargs):
    inspect = celery.current_app.control.inspect()
    scheduled = inspect.scheduled() or {}
    active = inspect.active() or {}
    all = scheduled | active
    for worker, tasks in all.items():
        for task in tasks:
            if task["name"] == task_name and task["kwargs"] == kwargs:
                return True
    return False
--- a/server/reflector/settings.py
+++ b/server/reflector/settings.py
@@ -1,7 +1,7 @@
 from pydantic.types import PositiveInt
 from pydantic_settings import BaseSettings, SettingsConfigDict
-from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
+from reflector.platform_types import Platform
 from reflector.utils.string import NonEmptyString
@@ -48,17 +48,14 @@ class Settings(BaseSettings):
    TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
    TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
-    # Platform-specific recording storage (follows {PREFIX}_STORAGE_AWS_{CREDENTIAL} pattern)
+    # Recording storage
-    # Whereby storage configuration
+    RECORDING_STORAGE_BACKEND: str | None = None
    WHEREBY_STORAGE_AWS_BUCKET_NAME: str | None = None
    WHEREBY_STORAGE_AWS_REGION: str | None = None
    WHEREBY_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
    WHEREBY_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
-    # Daily.co storage configuration
+    # Recording storage configuration for AWS
-    DAILYCO_STORAGE_AWS_BUCKET_NAME: str | None = None
+    RECORDING_STORAGE_AWS_BUCKET_NAME: str = "recording-bucket"
-    DAILYCO_STORAGE_AWS_REGION: str | None = None
+    RECORDING_STORAGE_AWS_REGION: str = "us-east-1"
-    DAILYCO_STORAGE_AWS_ROLE_ARN: str | None = None
+    RECORDING_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
    RECORDING_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
    # Translate into the target language
    TRANSLATION_BACKEND: str = "passthrough"
@@ -74,13 +71,6 @@ class Settings(BaseSettings):
    LLM_API_KEY: str | None = None
    LLM_CONTEXT_WINDOW: int = 16000
    LLM_PARSE_MAX_RETRIES: int = (
        3  # Max retries for JSON/validation errors (total attempts = retries + 1)
    )
    LLM_STRUCTURED_RESPONSE_TIMEOUT: int = (
        300  # Timeout in seconds for structured responses (5 minutes)
    )
    # Diarization
    DIARIZATION_ENABLED: bool = True
    DIARIZATION_BACKEND: str = "modal"
@@ -135,6 +125,8 @@ class Settings(BaseSettings):
    WHEREBY_API_URL: str = "https://api.whereby.dev/v1"
    WHEREBY_API_KEY: NonEmptyString | None = None
    WHEREBY_WEBHOOK_SECRET: str | None = None
    AWS_WHEREBY_ACCESS_KEY_ID: str | None = None
    AWS_WHEREBY_ACCESS_KEY_SECRET: str | None = None
    AWS_PROCESS_RECORDING_QUEUE_URL: str | None = None
    SQS_POLLING_TIMEOUT_SECONDS: int = 60
@@ -142,30 +134,19 @@ class Settings(BaseSettings):
    DAILY_API_KEY: str | None = None
    DAILY_WEBHOOK_SECRET: str | None = None
    DAILY_SUBDOMAIN: str | None = None
-    DAILY_WEBHOOK_UUID: str | None = (
+    AWS_DAILY_S3_BUCKET: str | None = None
-        None  # Webhook UUID for this environment. Not used by production code
+    AWS_DAILY_S3_REGION: str = "us-west-2"
-    )
+    AWS_DAILY_ROLE_ARN: str | None = None
-    # Platform Configuration
+
-    DEFAULT_VIDEO_PLATFORM: Platform = WHEREBY_PLATFORM
+    # Platform Migration Feature Flags
    DAILY_MIGRATION_ENABLED: bool = False
    DAILY_MIGRATION_ROOM_IDS: list[str] = []
    DEFAULT_VIDEO_PLATFORM: Platform = "whereby"
    # Zulip integration
    ZULIP_REALM: str | None = None
    ZULIP_API_KEY: str | None = None
    ZULIP_BOT_EMAIL: str | None = None
    # Durable workflow orchestration
    # Provider: "hatchet" (or "none" to disable)
    DURABLE_WORKFLOW_PROVIDER: str = "none"
    # Hatchet workflow orchestration
    HATCHET_CLIENT_TOKEN: str | None = None
    HATCHET_CLIENT_TLS_STRATEGY: str = "none"  # none, tls, mtls
    HATCHET_DEBUG: bool = False
    @property
    def HATCHET_ENABLED(self) -> bool:
        """True if Hatchet is the active provider."""
        return self.DURABLE_WORKFLOW_PROVIDER == "hatchet"
 settings = Settings()
--- a/server/reflector/storage/init.py
+++ b/server/reflector/storage/init.py
@@ -3,13 +3,6 @@ from reflector.settings import settings
 def get_transcripts_storage() -> Storage:
    """
    Get storage for processed transcript files (master credentials).
    Also use this for ALL our file operations with bucket override:
        master = get_transcripts_storage()
        master.delete_file(key, bucket=recording.bucket_name)
    """
    assert settings.TRANSCRIPT_STORAGE_BACKEND
    return Storage.get_instance(
        name=settings.TRANSCRIPT_STORAGE_BACKEND,
@@ -17,53 +10,8 @@ def get_transcripts_storage() -> Storage:
    )
-def get_whereby_storage() -> Storage:
+def get_recordings_storage() -> Storage:
    """
    Get storage config for Whereby (for passing to Whereby API).
    Usage:
        whereby_storage = get_whereby_storage()
        key_id, secret = whereby_storage.key_credentials
        whereby_api.create_meeting(
            bucket=whereby_storage.bucket_name,
            access_key_id=key_id,
            secret=secret,
        )
    Do NOT use for our file operations - use get_transcripts_storage() instead.
    """
    if not settings.WHEREBY_STORAGE_AWS_BUCKET_NAME:
        raise ValueError(
            "WHEREBY_STORAGE_AWS_BUCKET_NAME required for Whereby with AWS storage"
        )
    return Storage.get_instance(
-        name="aws",
+        name=settings.RECORDING_STORAGE_BACKEND,
-        settings_prefix="WHEREBY_STORAGE_",
+        settings_prefix="RECORDING_STORAGE_",
    )
 def get_dailyco_storage() -> Storage:
    """
    Get storage config for Daily.co (for passing to Daily API).
    Usage:
        daily_storage = get_dailyco_storage()
        daily_api.create_meeting(
            bucket=daily_storage.bucket_name,
            region=daily_storage.region,
            role_arn=daily_storage.role_credential,
        )
    Do NOT use for our file operations - use get_transcripts_storage() instead.
    """
    # Fail fast if platform-specific config missing
    if not settings.DAILYCO_STORAGE_AWS_BUCKET_NAME:
        raise ValueError(
            "DAILYCO_STORAGE_AWS_BUCKET_NAME required for Daily.co with AWS storage"
        )
    return Storage.get_instance(
        name="aws",
        settings_prefix="DAILYCO_STORAGE_",
    )
--- a/server/reflector/storage/base.py
+++ b/server/reflector/storage/base.py
@@ -1,23 +1,10 @@
 import importlib
 from typing import BinaryIO, Union
 from pydantic import BaseModel
 from reflector.settings import settings
 class StorageError(Exception):
    """Base exception for storage operations."""
    pass
 class StoragePermissionError(StorageError):
    """Exception raised when storage operation fails due to permission issues."""
    pass
 class FileResult(BaseModel):
    filename: str
    url: str
@@ -49,113 +36,26 @@ class Storage:
        return cls._registry[name](**config)
-    # Credential properties for API passthrough
+    async def put_file(self, filename: str, data: bytes) -> FileResult:
-    @property
+        return await self._put_file(filename, data)
-    def bucket_name(self) -> str:
+
-        """Default bucket name for this storage instance."""
+    async def _put_file(self, filename: str, data: bytes) -> FileResult:
        raise NotImplementedError
-    @property
+    async def delete_file(self, filename: str):
-    def region(self) -> str:
+        return await self._delete_file(filename)
-        """AWS region for this storage instance."""
+
    async def _delete_file(self, filename: str):
        raise NotImplementedError
-    @property
+    async def get_file_url(self, filename: str) -> str:
-    def access_key_id(self) -> str | None:
+        return await self._get_file_url(filename)
        """AWS access key ID (None for role-based auth). Prefer key_credentials property."""
        return None
-    @property
+    async def _get_file_url(self, filename: str) -> str:
    def secret_access_key(self) -> str | None:
        """AWS secret access key (None for role-based auth). Prefer key_credentials property."""
        return None
    @property
    def role_arn(self) -> str | None:
        """AWS IAM role ARN for role-based auth (None for key-based auth). Prefer role_credential property."""
        return None
    @property
    def key_credentials(self) -> tuple[str, str]:
        """
        Get (access_key_id, secret_access_key) for key-based auth.
        Raises ValueError if storage uses IAM role instead.
        """
        raise NotImplementedError
-    @property
+    async def get_file(self, filename: str):
-    def role_credential(self) -> str:
+        return await self._get_file(filename)
-        """
+
-        Get IAM role ARN for role-based auth.
+    async def _get_file(self, filename: str):
        Raises ValueError if storage uses access keys instead.
        """
        raise NotImplementedError
    async def put_file(
        self, filename: str, data: Union[bytes, BinaryIO], *, bucket: str | None = None
    ) -> FileResult:
        """Upload data. bucket: override instance default if provided."""
        return await self._put_file(filename, data, bucket=bucket)
    async def _put_file(
        self, filename: str, data: Union[bytes, BinaryIO], *, bucket: str | None = None
    ) -> FileResult:
        raise NotImplementedError
    async def delete_file(self, filename: str, *, bucket: str | None = None):
        """Delete file. bucket: override instance default if provided."""
        return await self._delete_file(filename, bucket=bucket)
    async def _delete_file(self, filename: str, *, bucket: str | None = None):
        raise NotImplementedError
    async def get_file_url(
        self,
        filename: str,
        operation: str = "get_object",
        expires_in: int = 3600,
        *,
        bucket: str | None = None,
    ) -> str:
        """Generate presigned URL. bucket: override instance default if provided."""
        return await self._get_file_url(filename, operation, expires_in, bucket=bucket)
    async def _get_file_url(
        self,
        filename: str,
        operation: str = "get_object",
        expires_in: int = 3600,
        *,
        bucket: str | None = None,
    ) -> str:
        raise NotImplementedError
    async def get_file(self, filename: str, *, bucket: str | None = None):
        """Download file. bucket: override instance default if provided."""
        return await self._get_file(filename, bucket=bucket)
    async def _get_file(self, filename: str, *, bucket: str | None = None):
        raise NotImplementedError
    async def list_objects(
        self, prefix: str = "", *, bucket: str | None = None
    ) -> list[str]:
        """List object keys. bucket: override instance default if provided."""
        return await self._list_objects(prefix, bucket=bucket)
    async def _list_objects(
        self, prefix: str = "", *, bucket: str | None = None
    ) -> list[str]:
        raise NotImplementedError
    async def stream_to_fileobj(
        self, filename: str, fileobj: BinaryIO, *, bucket: str | None = None
    ):
        """Stream file directly to file object without loading into memory.
        bucket: override instance default if provided."""
        return await self._stream_to_fileobj(filename, fileobj, bucket=bucket)
    async def _stream_to_fileobj(
        self, filename: str, fileobj: BinaryIO, *, bucket: str | None = None
    ):
        raise NotImplementedError
--- a/server/reflector/storage/storage_aws.py
+++ b/server/reflector/storage/storage_aws.py
@@ -1,236 +1,79 @@
 from functools import wraps
 from typing import BinaryIO, Union
 import aioboto3
 from botocore.config import Config
 from botocore.exceptions import ClientError
 from reflector.logger import logger
-from reflector.storage.base import FileResult, Storage, StoragePermissionError
+from reflector.storage.base import FileResult, Storage
 def handle_s3_client_errors(operation_name: str):
    """Decorator to handle S3 ClientError with bucket-aware messaging.
    Args:
        operation_name: Human-readable operation name for error messages (e.g., "upload", "delete")
    """
    def decorator(func):
        @wraps(func)
        async def wrapper(self, *args, **kwargs):
            bucket = kwargs.get("bucket")
            try:
                return await func(self, *args, **kwargs)
            except ClientError as e:
                error_code = e.response.get("Error", {}).get("Code")
                if error_code in ("AccessDenied", "NoSuchBucket"):
                    actual_bucket = bucket or self._bucket_name
                    bucket_context = (
                        f"overridden bucket '{actual_bucket}'"
                        if bucket
                        else f"default bucket '{actual_bucket}'"
                    )
                    raise StoragePermissionError(
                        f"S3 {operation_name} failed for {bucket_context}: {error_code}. "
                        f"Check TRANSCRIPT_STORAGE_AWS_* credentials have permission."
                    ) from e
                raise
        return wrapper
    return decorator
 class AwsStorage(Storage):
    """AWS S3 storage with bucket override for multi-platform recording architecture.
    Master credentials access all buckets via optional bucket parameter in operations."""
    def __init__(
        self,
        aws_access_key_id: str,
        aws_secret_access_key: str,
        aws_bucket_name: str,
        aws_region: str,
        aws_access_key_id: str | None = None,
        aws_secret_access_key: str | None = None,
        aws_role_arn: str | None = None,
    ):
        if not aws_access_key_id:
            raise ValueError("Storage `aws_storage` require `aws_access_key_id`")
        if not aws_secret_access_key:
            raise ValueError("Storage `aws_storage` require `aws_secret_access_key`")
        if not aws_bucket_name:
            raise ValueError("Storage `aws_storage` require `aws_bucket_name`")
        if not aws_region:
            raise ValueError("Storage `aws_storage` require `aws_region`")
        if not aws_access_key_id and not aws_role_arn:
            raise ValueError(
                "Storage `aws_storage` require either `aws_access_key_id` or `aws_role_arn`"
            )
        if aws_role_arn and (aws_access_key_id or aws_secret_access_key):
            raise ValueError(
                "Storage `aws_storage` cannot use both `aws_role_arn` and access keys"
            )
        super().__init__()
-        self._bucket_name = aws_bucket_name
+        self.aws_bucket_name = aws_bucket_name
        self._region = aws_region
        self._access_key_id = aws_access_key_id
        self._secret_access_key = aws_secret_access_key
        self._role_arn = aws_role_arn
        self.aws_folder = ""
        if "/" in aws_bucket_name:
-            self._bucket_name, self.aws_folder = aws_bucket_name.split("/", 1)
+            self.aws_bucket_name, self.aws_folder = aws_bucket_name.split("/", 1)
        self.boto_config = Config(retries={"max_attempts": 3, "mode": "adaptive"})
        self.session = aioboto3.Session(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            region_name=aws_region,
        )
-        self.base_url = f"https://{self._bucket_name}.s3.amazonaws.com/"
+        self.base_url = f"https://{aws_bucket_name}.s3.amazonaws.com/"
-    # Implement credential properties
+    async def _put_file(self, filename: str, data: bytes) -> FileResult:
-    @property
+        bucket = self.aws_bucket_name
-    def bucket_name(self) -> str:
+        folder = self.aws_folder
-        return self._bucket_name
+        logger.info(f"Uploading {filename} to S3 {bucket}/{folder}")
-
+        s3filename = f"{folder}/{filename}" if folder else filename
-    @property
+        async with self.session.client("s3") as client:
-    def region(self) -> str:
+            await client.put_object(
-        return self._region
+                Bucket=bucket,
-
+                Key=s3filename,
-    @property
+                Body=data,
    def access_key_id(self) -> str | None:
        return self._access_key_id
    @property
    def secret_access_key(self) -> str | None:
        return self._secret_access_key
    @property
    def role_arn(self) -> str | None:
        return self._role_arn
    @property
    def key_credentials(self) -> tuple[str, str]:
        """Get (access_key_id, secret_access_key) for key-based auth."""
        if self._role_arn:
            raise ValueError(
                "Storage uses IAM role authentication. "
                "Use role_credential property instead of key_credentials."
            )
        if not self._access_key_id or not self._secret_access_key:
            raise ValueError("Storage access key credentials not configured")
        return (self._access_key_id, self._secret_access_key)
-    @property
+    async def _get_file_url(self, filename: str) -> FileResult:
-    def role_credential(self) -> str:
+        bucket = self.aws_bucket_name
        """Get IAM role ARN for role-based auth."""
        if self._access_key_id or self._secret_access_key:
            raise ValueError(
                "Storage uses access key authentication. "
                "Use key_credentials property instead of role_credential."
            )
        if not self._role_arn:
            raise ValueError("Storage IAM role ARN not configured")
        return self._role_arn
    @handle_s3_client_errors("upload")
    async def _put_file(
        self, filename: str, data: Union[bytes, BinaryIO], *, bucket: str | None = None
    ) -> FileResult:
        actual_bucket = bucket or self._bucket_name
        folder = self.aws_folder
        s3filename = f"{folder}/{filename}" if folder else filename
-        logger.info(f"Uploading {filename} to S3 {actual_bucket}/{folder}")
+        async with self.session.client("s3") as client:
        async with self.session.client("s3", config=self.boto_config) as client:
            if isinstance(data, bytes):
                await client.put_object(Bucket=actual_bucket, Key=s3filename, Body=data)
            else:
                # boto3 reads file-like object in chunks
                # avoids creating extra memory copy vs bytes.getvalue() approach
                await client.upload_fileobj(data, Bucket=actual_bucket, Key=s3filename)
        url = await self._get_file_url(filename, bucket=bucket)
        return FileResult(filename=filename, url=url)
    @handle_s3_client_errors("presign")
    async def _get_file_url(
        self,
        filename: str,
        operation: str = "get_object",
        expires_in: int = 3600,
        *,
        bucket: str | None = None,
    ) -> str:
        actual_bucket = bucket or self._bucket_name
        folder = self.aws_folder
        s3filename = f"{folder}/{filename}" if folder else filename
        async with self.session.client("s3", config=self.boto_config) as client:
            presigned_url = await client.generate_presigned_url(
-                operation,
+                "get_object",
-                Params={"Bucket": actual_bucket, "Key": s3filename},
+                Params={"Bucket": bucket, "Key": s3filename},
-                ExpiresIn=expires_in,
+                ExpiresIn=3600,
            )
            return presigned_url
-    @handle_s3_client_errors("delete")
+    async def _delete_file(self, filename: str):
-    async def _delete_file(self, filename: str, *, bucket: str | None = None):
+        bucket = self.aws_bucket_name
        actual_bucket = bucket or self._bucket_name
        folder = self.aws_folder
-        logger.info(f"Deleting {filename} from S3 {actual_bucket}/{folder}")
+        logger.info(f"Deleting {filename} from S3 {bucket}/{folder}")
        s3filename = f"{folder}/{filename}" if folder else filename
-        async with self.session.client("s3", config=self.boto_config) as client:
+        async with self.session.client("s3") as client:
-            await client.delete_object(Bucket=actual_bucket, Key=s3filename)
+            await client.delete_object(Bucket=bucket, Key=s3filename)
-    @handle_s3_client_errors("download")
+    async def _get_file(self, filename: str):
-    async def _get_file(self, filename: str, *, bucket: str | None = None):
+        bucket = self.aws_bucket_name
        actual_bucket = bucket or self._bucket_name
        folder = self.aws_folder
-        logger.info(f"Downloading {filename} from S3 {actual_bucket}/{folder}")
+        logger.info(f"Downloading {filename} from S3 {bucket}/{folder}")
        s3filename = f"{folder}/{filename}" if folder else filename
-        async with self.session.client("s3", config=self.boto_config) as client:
+        async with self.session.client("s3") as client:
-            response = await client.get_object(Bucket=actual_bucket, Key=s3filename)
+            response = await client.get_object(Bucket=bucket, Key=s3filename)
            return await response["Body"].read()
    @handle_s3_client_errors("list_objects")
    async def _list_objects(
        self, prefix: str = "", *, bucket: str | None = None
    ) -> list[str]:
        actual_bucket = bucket or self._bucket_name
        folder = self.aws_folder
        # Combine folder and prefix
        s3prefix = f"{folder}/{prefix}" if folder else prefix
        logger.info(f"Listing objects from S3 {actual_bucket} with prefix '{s3prefix}'")
        keys = []
        async with self.session.client("s3", config=self.boto_config) as client:
            paginator = client.get_paginator("list_objects_v2")
            async for page in paginator.paginate(Bucket=actual_bucket, Prefix=s3prefix):
                if "Contents" in page:
                    for obj in page["Contents"]:
                        # Strip folder prefix from keys if present
                        key = obj["Key"]
                        if folder:
                            if key.startswith(f"{folder}/"):
                                key = key[len(folder) + 1 :]
                            elif key == folder:
                                # Skip folder marker itself
                                continue
                        keys.append(key)
        return keys
    @handle_s3_client_errors("stream")
    async def _stream_to_fileobj(
        self, filename: str, fileobj: BinaryIO, *, bucket: str | None = None
    ):
        """Stream file from S3 directly to file object without loading into memory."""
        actual_bucket = bucket or self._bucket_name
        folder = self.aws_folder
        logger.info(f"Streaming {filename} from S3 {actual_bucket}/{folder}")
        s3filename = f"{folder}/{filename}" if folder else filename
        async with self.session.client("s3", config=self.boto_config) as client:
            await client.download_fileobj(
                Bucket=actual_bucket, Key=s3filename, Fileobj=fileobj
            )
 Storage.register("aws", AwsStorage)
--- a/server/reflector/tools/cli_multitrack.py
+++ b/server/reflector/tools/cli_multitrack.py
@@ -1,347 +0,0 @@
 import asyncio
 import sys
 import time
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Protocol
 import structlog
 from celery.result import AsyncResult
 from reflector.db import get_database
 from reflector.db.transcripts import SourceKind, Transcript, transcripts_controller
 from reflector.pipelines.main_multitrack_pipeline import (
    task_pipeline_multitrack_process,
 )
 from reflector.storage import get_transcripts_storage
 from reflector.tools.process import (
    extract_result_from_entry,
    parse_s3_url,
    validate_s3_objects,
 )
 logger = structlog.get_logger(__name__)
 DEFAULT_PROCESSING_TIMEOUT_SECONDS = 3600
 MAX_ERROR_MESSAGE_LENGTH = 500
 TASK_POLL_INTERVAL_SECONDS = 2
 class StatusCallback(Protocol):
    def __call__(self, state: str, elapsed_seconds: int) -> None: ...
@dataclass
 class MultitrackTaskResult:
    success: bool
    transcript_id: str
    error: Optional[str] = None
 async def create_multitrack_transcript(
    bucket_name: str,
    track_keys: List[str],
    source_language: str,
    target_language: str,
    user_id: Optional[str] = None,
 ) -> Transcript:
    num_tracks = len(track_keys)
    track_word = "track" if num_tracks == 1 else "tracks"
    transcript_name = f"Multitrack ({num_tracks} {track_word})"
    transcript = await transcripts_controller.add(
        transcript_name,
        source_kind=SourceKind.FILE,
        source_language=source_language,
        target_language=target_language,
        user_id=user_id,
    )
    logger.info(
        "Created multitrack transcript",
        transcript_id=transcript.id,
        name=transcript_name,
        bucket=bucket_name,
        num_tracks=len(track_keys),
    )
    return transcript
 def submit_multitrack_task(
    transcript_id: str, bucket_name: str, track_keys: List[str]
 ) -> AsyncResult:
    result = task_pipeline_multitrack_process.delay(
        transcript_id=transcript_id,
        bucket_name=bucket_name,
        track_keys=track_keys,
    )
    logger.info(
        "Multitrack task submitted",
        transcript_id=transcript_id,
        task_id=result.id,
        bucket=bucket_name,
        num_tracks=len(track_keys),
    )
    return result
 async def wait_for_task(
    result: AsyncResult,
    transcript_id: str,
    timeout_seconds: int = DEFAULT_PROCESSING_TIMEOUT_SECONDS,
    poll_interval: int = TASK_POLL_INTERVAL_SECONDS,
    status_callback: Optional[StatusCallback] = None,
 ) -> MultitrackTaskResult:
    start_time = time.time()
    last_status = None
    while not result.ready():
        elapsed = time.time() - start_time
        if elapsed > timeout_seconds:
            error_msg = (
                f"Task {result.id} did not complete within {timeout_seconds}s "
                f"for transcript {transcript_id}"
            )
            logger.error(
                "Task timeout",
                task_id=result.id,
                transcript_id=transcript_id,
                elapsed_seconds=elapsed,
            )
            raise TimeoutError(error_msg)
        if result.state != last_status:
            if status_callback:
                status_callback(result.state, int(elapsed))
            last_status = result.state
        await asyncio.sleep(poll_interval)
    if result.failed():
        error_info = result.info
        traceback_info = getattr(result, "traceback", None)
        logger.error(
            "Multitrack task failed",
            transcript_id=transcript_id,
            task_id=result.id,
            error=str(error_info),
            has_traceback=bool(traceback_info),
        )
        error_detail = str(error_info)
        if traceback_info:
            error_detail += f"\nTraceback:\n{traceback_info}"
        return MultitrackTaskResult(
            success=False, transcript_id=transcript_id, error=error_detail
        )
    logger.info(
        "Multitrack task completed",
        transcript_id=transcript_id,
        task_id=result.id,
        state=result.state,
    )
    return MultitrackTaskResult(success=True, transcript_id=transcript_id)
 async def update_transcript_status(
    transcript_id: str,
    status: str,
    error: Optional[str] = None,
    max_error_length: int = MAX_ERROR_MESSAGE_LENGTH,
 ) -> None:
    database = get_database()
    connected = False
    try:
        await database.connect()
        connected = True
        transcript = await transcripts_controller.get_by_id(transcript_id)
        if transcript:
            update_data: Dict[str, Any] = {"status": status}
            if error:
                if len(error) > max_error_length:
                    error = error[: max_error_length - 3] + "..."
                update_data["error"] = error
            await transcripts_controller.update(transcript, update_data)
            logger.info(
                "Updated transcript status",
                transcript_id=transcript_id,
                status=status,
                has_error=bool(error),
            )
    except Exception as e:
        logger.warning(
            "Failed to update transcript status",
            transcript_id=transcript_id,
            error=str(e),
        )
    finally:
        if connected:
            try:
                await database.disconnect()
            except Exception as e:
                logger.warning(f"Database disconnect failed: {e}")
 async def process_multitrack(
    bucket_name: str,
    track_keys: List[str],
    source_language: str,
    target_language: str,
    user_id: Optional[str] = None,
    timeout_seconds: int = DEFAULT_PROCESSING_TIMEOUT_SECONDS,
    status_callback: Optional[StatusCallback] = None,
 ) -> MultitrackTaskResult:
    """High-level orchestration for multitrack processing."""
    database = get_database()
    transcript = None
    connected = False
    try:
        await database.connect()
        connected = True
        transcript = await create_multitrack_transcript(
            bucket_name=bucket_name,
            track_keys=track_keys,
            source_language=source_language,
            target_language=target_language,
            user_id=user_id,
        )
        result = submit_multitrack_task(
            transcript_id=transcript.id, bucket_name=bucket_name, track_keys=track_keys
        )
    except Exception as e:
        if transcript:
            try:
                await update_transcript_status(
                    transcript_id=transcript.id, status="failed", error=str(e)
                )
            except Exception as update_error:
                logger.error(
                    "Failed to update transcript status after error",
                    original_error=str(e),
                    update_error=str(update_error),
                    transcript_id=transcript.id,
                )
        raise
    finally:
        if connected:
            try:
                await database.disconnect()
            except Exception as e:
                logger.warning(f"Database disconnect failed: {e}")
    # Poll outside database connection
    task_result = await wait_for_task(
        result=result,
        transcript_id=transcript.id,
        timeout_seconds=timeout_seconds,
        poll_interval=2,
        status_callback=status_callback,
    )
    if not task_result.success:
        await update_transcript_status(
            transcript_id=transcript.id, status="failed", error=task_result.error
        )
    return task_result
 def print_progress(message: str) -> None:
    """Print progress message to stderr for CLI visibility."""
    print(f"{message}", file=sys.stderr)
 def create_status_callback() -> StatusCallback:
    """Create callback for task status updates during polling."""
    def callback(state: str, elapsed_seconds: int) -> None:
        print_progress(
            f"Multitrack pipeline status: {state} (elapsed: {elapsed_seconds}s)"
        )
    return callback
 async def process_multitrack_cli(
    s3_urls: List[str],
    source_language: str,
    target_language: str,
    output_path: Optional[str] = None,
 ) -> None:
    if not s3_urls:
        raise ValueError("At least one track required for multitrack processing")
    bucket_keys = []
    for url in s3_urls:
        try:
            bucket, key = parse_s3_url(url)
            bucket_keys.append((bucket, key))
        except ValueError as e:
            raise ValueError(f"Invalid S3 URL '{url}': {e}") from e
    buckets = set(bucket for bucket, _ in bucket_keys)
    if len(buckets) > 1:
        raise ValueError(
            f"All tracks must be in the same S3 bucket. "
            f"Found {len(buckets)} different buckets: {sorted(buckets)}. "
            f"Please upload all files to a single bucket."
        )
    primary_bucket = bucket_keys[0][0]
    track_keys = [key for _, key in bucket_keys]
    print_progress(
        f"Starting multitrack CLI processing: "
        f"bucket={primary_bucket}, num_tracks={len(track_keys)}, "
        f"source_language={source_language}, target_language={target_language}"
    )
    storage = get_transcripts_storage()
    await validate_s3_objects(storage, bucket_keys)
    print_progress(f"S3 validation complete: {len(bucket_keys)} objects verified")
    result = await process_multitrack(
        bucket_name=primary_bucket,
        track_keys=track_keys,
        source_language=source_language,
        target_language=target_language,
        user_id=None,
        timeout_seconds=3600,
        status_callback=create_status_callback(),
    )
    if not result.success:
        error_msg = (
            f"Multitrack pipeline failed for transcript {result.transcript_id}\n"
        )
        if result.error:
            error_msg += f"Error: {result.error}\n"
        raise RuntimeError(error_msg)
    print_progress(
        f"Multitrack processing complete for transcript {result.transcript_id}"
    )
    database = get_database()
    await database.connect()
    try:
        await extract_result_from_entry(result.transcript_id, output_path)
    finally:
        await database.disconnect()
--- a/server/reflector/tools/process.py
+++ b/server/reflector/tools/process.py
@@ -9,10 +9,7 @@ import shutil
 import sys
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Literal, Tuple
+from typing import Any, Dict, List, Literal
 from urllib.parse import unquote, urlparse
 from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
 from reflector.db.transcripts import SourceKind, TranscriptTopic, transcripts_controller
 from reflector.logger import logger
@@ -23,119 +20,10 @@ from reflector.pipelines.main_live_pipeline import pipeline_post as live_pipelin
 from reflector.pipelines.main_live_pipeline import (
    pipeline_process as live_pipeline_process,
 )
 from reflector.storage import Storage
 def validate_s3_bucket_name(bucket: str) -> None:
    if not bucket:
        raise ValueError("Bucket name cannot be empty")
    if len(bucket) > 255:  # Absolute max for any region
        raise ValueError(f"Bucket name too long: {len(bucket)} characters (max 255)")
 def validate_s3_key(key: str) -> None:
    if not key:
        raise ValueError("S3 key cannot be empty")
    if len(key) > 1024:
        raise ValueError(f"S3 key too long: {len(key)} characters (max 1024)")
 def parse_s3_url(url: str) -> Tuple[str, str]:
    parsed = urlparse(url)
    if parsed.scheme == "s3":
        bucket = parsed.netloc
        key = parsed.path.lstrip("/")
        if parsed.fragment:
            logger.debug(
                "URL fragment ignored (not part of S3 key)",
                url=url,
                fragment=parsed.fragment,
            )
        if not bucket or not key:
            raise ValueError(f"Invalid S3 URL: {url} (missing bucket or key)")
        bucket = unquote(bucket)
        key = unquote(key)
        validate_s3_bucket_name(bucket)
        validate_s3_key(key)
        return bucket, key
    elif parsed.scheme in ("http", "https"):
        if ".s3." in parsed.netloc or parsed.netloc.endswith(".s3.amazonaws.com"):
            bucket = parsed.netloc.split(".")[0]
            key = parsed.path.lstrip("/")
            if parsed.fragment:
                logger.debug("URL fragment ignored", url=url, fragment=parsed.fragment)
            if not bucket or not key:
                raise ValueError(f"Invalid S3 URL: {url} (missing bucket or key)")
            bucket = unquote(bucket)
            key = unquote(key)
            validate_s3_bucket_name(bucket)
            validate_s3_key(key)
            return bucket, key
        elif parsed.netloc.startswith("s3.") and "amazonaws.com" in parsed.netloc:
            path_parts = parsed.path.lstrip("/").split("/", 1)
            if len(path_parts) != 2:
                raise ValueError(f"Invalid S3 URL: {url} (missing bucket or key)")
            bucket, key = path_parts
            if parsed.fragment:
                logger.debug("URL fragment ignored", url=url, fragment=parsed.fragment)
            bucket = unquote(bucket)
            key = unquote(key)
            validate_s3_bucket_name(bucket)
            validate_s3_key(key)
            return bucket, key
        else:
            raise ValueError(f"Invalid S3 URL format: {url} (not recognized as S3 URL)")
    else:
        raise ValueError(f"Invalid S3 URL scheme: {url} (must be s3:// or https://)")
 async def validate_s3_objects(
    storage: Storage, bucket_keys: List[Tuple[str, str]]
 ) -> None:
    async with storage.session.client("s3") as client:
        async def check_object(bucket: str, key: str) -> None:
            try:
                await client.head_object(Bucket=bucket, Key=key)
            except ClientError as e:
                error_code = e.response["Error"]["Code"]
                if error_code in ("404", "NoSuchKey"):
                    raise ValueError(f"S3 object not found: s3://{bucket}/{key}") from e
                elif error_code in ("403", "Forbidden", "AccessDenied"):
                    raise ValueError(
                        f"Access denied for S3 object: s3://{bucket}/{key}. "
                        f"Check AWS credentials and permissions"
                    ) from e
                else:
                    raise ValueError(
                        f"S3 error {error_code} for s3://{bucket}/{key}: "
                        f"{e.response['Error'].get('Message', 'Unknown error')}"
                    ) from e
            except NoCredentialsError as e:
                raise ValueError(
                    "AWS credentials not configured. Set AWS_ACCESS_KEY_ID and "
                    "AWS_SECRET_ACCESS_KEY environment variables"
                ) from e
            except BotoCoreError as e:
                raise ValueError(
                    f"AWS service error for s3://{bucket}/{key}: {str(e)}"
                ) from e
            except Exception as e:
                raise ValueError(
                    f"Unexpected error validating s3://{bucket}/{key}: {str(e)}"
                ) from e
        await asyncio.gather(
            *(check_object(bucket, key) for bucket, key in bucket_keys)
        )
 def serialize_topics(topics: List[TranscriptTopic]) -> List[Dict[str, Any]]:
    """Convert TranscriptTopic objects to JSON-serializable dicts"""
    serialized = []
    for topic in topics:
        topic_dict = topic.model_dump()
@@ -144,6 +32,7 @@ def serialize_topics(topics: List[TranscriptTopic]) -> List[Dict[str, Any]]:
 def debug_print_speakers(serialized_topics: List[Dict[str, Any]]) -> None:
    """Print debug info about speakers found in topics"""
    all_speakers = set()
    for topic_dict in serialized_topics:
        for word in topic_dict.get("words", []):
@@ -158,6 +47,8 @@ def debug_print_speakers(serialized_topics: List[Dict[str, Any]]) -> None:
 TranscriptId = str
 # common interface for every flow: it needs an Entry in db with specific ceremony (file path + status + actual file in file system)
 # ideally we want to get rid of it at some point
 async def prepare_entry(
    source_path: str,
    source_language: str,
@@ -174,7 +65,9 @@ async def prepare_entry(
        user_id=None,
    )
-    logger.info(f"Created transcript {transcript.id} for {file_path.name}")
+    logger.info(
        f"Created empty transcript {transcript.id} for file {file_path.name} because technically we need an empty transcript before we start transcript"
    )
    # pipelines expect files as upload.*
@@ -190,6 +83,7 @@ async def prepare_entry(
    return transcript.id
 # same reason as prepare_entry
 async def extract_result_from_entry(
    transcript_id: TranscriptId, output_path: str
 ) -> None:
@@ -299,20 +193,13 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Process audio files with speaker diarization"
    )
-    parser.add_argument(
+    parser.add_argument("source", help="Source file (mp3, wav, mp4...)")
        "source",
        help="Source file (mp3, wav, mp4...) or comma-separated S3 URLs with --multitrack",
    )
    parser.add_argument(
        "--pipeline",
        required=True,
        choices=["live", "file"],
        help="Pipeline type to use for processing (live: streaming/incremental, file: batch/parallel)",
    )
    parser.add_argument(
        "--multitrack",
        action="store_true",
        help="Process multiple audio tracks from comma-separated S3 URLs",
    )
    parser.add_argument(
        "--source-language", default="en", help="Source language code (default: en)"
    )
@@ -322,34 +209,6 @@ if __name__ == "__main__":
    parser.add_argument("--output", "-o", help="Output file (output.jsonl)")
    args = parser.parse_args()
    if args.multitrack:
        if not args.source:
            parser.error("Source URLs required for multitrack processing")
        s3_urls = [url.strip() for url in args.source.split(",") if url.strip()]
        if not s3_urls:
            parser.error("At least one S3 URL required for multitrack processing")
        from reflector.tools.cli_multitrack import process_multitrack_cli
        asyncio.run(
            process_multitrack_cli(
                s3_urls,
                args.source_language,
                args.target_language,
                args.output,
            )
        )
    else:
        if not args.pipeline:
            parser.error("--pipeline is required for single-track processing")
        if "," in args.source:
            parser.error(
                "Multiple files detected. Use --multitrack flag for multitrack processing"
            )
    asyncio.run(
        process(
            args.source,
--- a/server/reflector/tools/process_transcript.py
+++ b/server/reflector/tools/process_transcript.py
@@ -1,171 +0,0 @@
 """
 Process transcript by ID - auto-detects multitrack vs file pipeline.
 Usage:
    uv run -m reflector.tools.process_transcript <transcript_id>
    # Or via docker:
    docker compose exec server uv run -m reflector.tools.process_transcript <transcript_id>
 """
 import argparse
 import asyncio
 import sys
 import time
 from typing import Callable
 from celery.result import AsyncResult
 from hatchet_sdk.clients.rest.models import V1TaskStatus
 from reflector.db import get_database
 from reflector.db.transcripts import Transcript, transcripts_controller
 from reflector.hatchet.client import HatchetClientManager
 from reflector.services.transcript_process import (
    FileProcessingConfig,
    MultitrackProcessingConfig,
    PrepareResult,
    ProcessError,
    ValidationError,
    ValidationResult,
    dispatch_transcript_processing,
    prepare_transcript_processing,
    validate_transcript_for_processing,
 )
 async def process_transcript_inner(
    transcript: Transcript,
    on_validation: Callable[[ValidationResult], None],
    on_preprocess: Callable[[PrepareResult], None],
    force: bool = False,
 ) -> AsyncResult | None:
    validation = await validate_transcript_for_processing(transcript)
    on_validation(validation)
    config = await prepare_transcript_processing(validation)
    on_preprocess(config)
    return await dispatch_transcript_processing(config, force=force)
 async def process_transcript(
    transcript_id: str, sync: bool = False, force: bool = False
 ) -> None:
    """
    Process a transcript by ID, auto-detecting multitrack vs file pipeline.
    Args:
        transcript_id: The transcript UUID
        sync: If True, wait for task completion. If False, dispatch and exit.
        force: If True, cancel old workflow and start new (latest code). If False, replay failed workflow.
    """
    database = get_database()
    await database.connect()
    try:
        transcript = await transcripts_controller.get_by_id(transcript_id)
        if not transcript:
            print(f"Error: Transcript {transcript_id} not found", file=sys.stderr)
            sys.exit(1)
        print(f"Found transcript: {transcript.title or transcript_id}", file=sys.stderr)
        print(f"  Status: {transcript.status}", file=sys.stderr)
        print(f"  Recording ID: {transcript.recording_id or 'None'}", file=sys.stderr)
        def on_validation(validation: ValidationResult) -> None:
            if isinstance(validation, ValidationError):
                print(f"Error: {validation.detail}", file=sys.stderr)
                sys.exit(1)
        def on_preprocess(config: PrepareResult) -> None:
            if isinstance(config, ProcessError):
                print(f"Error: {config.detail}", file=sys.stderr)
                sys.exit(1)
            elif isinstance(config, MultitrackProcessingConfig):
                print(f"Dispatching multitrack pipeline", file=sys.stderr)
                print(f"  Bucket: {config.bucket_name}", file=sys.stderr)
                print(f"  Tracks: {len(config.track_keys)}", file=sys.stderr)
            elif isinstance(config, FileProcessingConfig):
                print(f"Dispatching file pipeline", file=sys.stderr)
        result = await process_transcript_inner(
            transcript,
            on_validation=on_validation,
            on_preprocess=on_preprocess,
            force=force,
        )
        if result is None:
            # Hatchet workflow dispatched
            if sync:
                # Re-fetch transcript to get workflow_run_id
                transcript = await transcripts_controller.get_by_id(transcript_id)
                if not transcript or not transcript.workflow_run_id:
                    print("Error: workflow_run_id not found", file=sys.stderr)
                    sys.exit(1)
                print("Waiting for Hatchet workflow...", file=sys.stderr)
                while True:
                    status = await HatchetClientManager.get_workflow_run_status(
                        transcript.workflow_run_id
                    )
                    print(f"  Status: {status.value}", file=sys.stderr)
                    if status == V1TaskStatus.COMPLETED:
                        print("Workflow completed successfully", file=sys.stderr)
                        break
                    elif status in (V1TaskStatus.FAILED, V1TaskStatus.CANCELLED):
                        print(f"Workflow failed: {status}", file=sys.stderr)
                        sys.exit(1)
                    await asyncio.sleep(5)
            else:
                print(
                    "Task dispatched (use --sync to wait for completion)",
                    file=sys.stderr,
                )
        elif sync:
            print("Waiting for task completion...", file=sys.stderr)
            while not result.ready():
                print(f"  Status: {result.state}", file=sys.stderr)
                time.sleep(5)
            if result.successful():
                print("Task completed successfully", file=sys.stderr)
            else:
                print(f"Task failed: {result.result}", file=sys.stderr)
                sys.exit(1)
        else:
            print(
                "Task dispatched (use --sync to wait for completion)", file=sys.stderr
            )
    finally:
        await database.disconnect()
 def main():
    parser = argparse.ArgumentParser(
        description="Process transcript by ID - auto-detects multitrack vs file pipeline"
    )
    parser.add_argument(
        "transcript_id",
        help="Transcript UUID to process",
    )
    parser.add_argument(
        "--sync",
        action="store_true",
        help="Wait for task completion instead of just dispatching",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help="Cancel old workflow and start new (uses latest code instead of replaying)",
    )
    args = parser.parse_args()
    asyncio.run(
        process_transcript(args.transcript_id, sync=args.sync, force=args.force)
    )
 if __name__ == "__main__":
    main()
--- a/server/reflector/utils/audio_constants.py
+++ b/server/reflector/utils/audio_constants.py
@@ -1,15 +0,0 @@
 """
 Shared audio processing constants.
 Used by both Hatchet workflows and Celery pipelines for consistent audio encoding.
 """
 # Opus codec settings
 OPUS_STANDARD_SAMPLE_RATE = 48000
 OPUS_DEFAULT_BIT_RATE = 128000  # 128kbps for good speech quality
 # S3 presigned URL expiration
 PRESIGNED_URL_EXPIRATION_SECONDS = 7200  # 2 hours
 # Waveform visualization
 WAVEFORM_SEGMENTS = 255
--- a/server/reflector/utils/audio_mixdown.py
+++ b/server/reflector/utils/audio_mixdown.py
@@ -1,227 +0,0 @@
 """
 Audio track mixdown utilities.
 Shared PyAV-based functions for mixing multiple audio tracks into a single output.
 Used by both Hatchet workflows and Celery pipelines.
 """
 from fractions import Fraction
 import av
 from av.audio.resampler import AudioResampler
 def detect_sample_rate_from_tracks(track_urls: list[str], logger=None) -> int | None:
    """Detect sample rate from first decodable audio frame.
    Args:
        track_urls: List of URLs to audio files (S3 presigned or local)
        logger: Optional logger instance
    Returns:
        Sample rate in Hz, or None if no decodable frames found
    """
    for url in track_urls:
        if not url:
            continue
        container = None
        try:
            container = av.open(url)
            for frame in container.decode(audio=0):
                return frame.sample_rate
        except Exception:
            continue
        finally:
            if container is not None:
                container.close()
    return None
 async def mixdown_tracks_pyav(
    track_urls: list[str],
    writer,
    target_sample_rate: int,
    offsets_seconds: list[float] | None = None,
    logger=None,
 ) -> None:
    """Multi-track mixdown using PyAV filter graph (amix).
    Builds a filter graph: N abuffer -> optional adelay -> amix -> aformat -> sink
    Reads from S3 presigned URLs or local files, pushes mixed frames to writer.
    Args:
        track_urls: List of URLs to audio tracks (S3 presigned or local)
        writer: AudioFileWriterProcessor instance with async push() method
        target_sample_rate: Sample rate for output (Hz)
        offsets_seconds: Optional per-track delays in seconds for alignment.
            If provided, must have same length as track_urls. Delays are relative
            to the minimum offset (earliest track has delay=0).
        logger: Optional logger instance
    Raises:
        ValueError: If offsets_seconds length doesn't match track_urls,
            no valid tracks provided, or no containers can be opened
    """
    if offsets_seconds is not None and len(offsets_seconds) != len(track_urls):
        raise ValueError(
            f"offsets_seconds length ({len(offsets_seconds)}) must match track_urls ({len(track_urls)})"
        )
    valid_track_urls = [url for url in track_urls if url]
    if not valid_track_urls:
        if logger:
            logger.error("Mixdown failed - no valid track URLs provided")
        raise ValueError("Mixdown failed: No valid track URLs")
    # Calculate per-input delays if offsets provided
    input_offsets_seconds = None
    if offsets_seconds is not None:
        input_offsets_seconds = [
            offsets_seconds[i] for i, url in enumerate(track_urls) if url
        ]
    # Build PyAV filter graph:
    # N abuffer (s32/stereo)
    #   -> optional adelay per input (for alignment)
    #   -> amix (s32)
    #   -> aformat(s16)
    #   -> sink
    graph = av.filter.Graph()
    inputs = []
    for idx, url in enumerate(valid_track_urls):
        args = (
            f"time_base=1/{target_sample_rate}:"
            f"sample_rate={target_sample_rate}:"
            f"sample_fmt=s32:"
            f"channel_layout=stereo"
        )
        in_ctx = graph.add("abuffer", args=args, name=f"in{idx}")
        inputs.append(in_ctx)
    if not inputs:
        if logger:
            logger.error("Mixdown failed - no valid inputs for graph")
        raise ValueError("Mixdown failed: No valid inputs for filter graph")
    mixer = graph.add("amix", args=f"inputs={len(inputs)}:normalize=0", name="mix")
    fmt = graph.add(
        "aformat",
        args=f"sample_fmts=s32:channel_layouts=stereo:sample_rates={target_sample_rate}",
        name="fmt",
    )
    sink = graph.add("abuffersink", name="out")
    # Optional per-input delay before mixing
    delays_ms: list[int] = []
    if input_offsets_seconds is not None:
        base = min(input_offsets_seconds) if input_offsets_seconds else 0.0
        delays_ms = [
            max(0, int(round((o - base) * 1000))) for o in input_offsets_seconds
        ]
    else:
        delays_ms = [0 for _ in inputs]
    for idx, in_ctx in enumerate(inputs):
        delay_ms = delays_ms[idx] if idx < len(delays_ms) else 0
        if delay_ms > 0:
            # adelay requires one value per channel; use same for stereo
            adelay = graph.add(
                "adelay",
                args=f"delays={delay_ms}|{delay_ms}:all=1",
                name=f"delay{idx}",
            )
            in_ctx.link_to(adelay)
            adelay.link_to(mixer, 0, idx)
        else:
            in_ctx.link_to(mixer, 0, idx)
    mixer.link_to(fmt)
    fmt.link_to(sink)
    graph.configure()
    containers = []
    try:
        # Open all containers with cleanup guaranteed
        for i, url in enumerate(valid_track_urls):
            try:
                c = av.open(
                    url,
                    options={
                        # S3 streaming options
                        "reconnect": "1",
                        "reconnect_streamed": "1",
                        "reconnect_delay_max": "5",
                    },
                )
                containers.append(c)
            except Exception as e:
                if logger:
                    logger.warning(
                        "Mixdown: failed to open container from URL",
                        input=i,
                        url=url,
                        error=str(e),
                    )
        if not containers:
            if logger:
                logger.error("Mixdown failed - no valid containers opened")
            raise ValueError("Mixdown failed: Could not open any track containers")
        decoders = [c.decode(audio=0) for c in containers]
        active = [True] * len(decoders)
        resamplers = [
            AudioResampler(format="s32", layout="stereo", rate=target_sample_rate)
            for _ in decoders
        ]
        while any(active):
            for i, (dec, is_active) in enumerate(zip(decoders, active)):
                if not is_active:
                    continue
                try:
                    frame = next(dec)
                except StopIteration:
                    active[i] = False
                    # Signal end of stream to filter graph
                    inputs[i].push(None)
                    continue
                if frame.sample_rate != target_sample_rate:
                    continue
                out_frames = resamplers[i].resample(frame) or []
                for rf in out_frames:
                    rf.sample_rate = target_sample_rate
                    rf.time_base = Fraction(1, target_sample_rate)
                    inputs[i].push(rf)
                while True:
                    try:
                        mixed = sink.pull()
                    except Exception:
                        break
                    mixed.sample_rate = target_sample_rate
                    mixed.time_base = Fraction(1, target_sample_rate)
                    await writer.push(mixed)
        # Flush remaining frames from filter graph
        while True:
            try:
                mixed = sink.pull()
            except Exception:
                break
            mixed.sample_rate = target_sample_rate
            mixed.time_base = Fraction(1, target_sample_rate)
            await writer.push(mixed)
    finally:
        # Cleanup all containers, even if processing failed
        for c in containers:
            if c is not None:
                try:
                    c.close()
                except Exception:
                    pass  # Best effort cleanup
--- a/server/reflector/utils/audio_padding.py
+++ b/server/reflector/utils/audio_padding.py
@@ -1,186 +0,0 @@
 """
 Audio track padding utilities.
 Shared PyAV-based functions for extracting stream metadata and applying
 silence padding to audio tracks. Used by both Hatchet workflows and Celery pipelines.
 """
 import math
 from fractions import Fraction
 import av
 from av.audio.resampler import AudioResampler
 from reflector.utils.audio_constants import (
    OPUS_DEFAULT_BIT_RATE,
    OPUS_STANDARD_SAMPLE_RATE,
 )
 def extract_stream_start_time_from_container(
    container,
    track_idx: int,
    logger=None,
 ) -> float:
    """Extract meeting-relative start time from WebM stream metadata.
    Uses PyAV to read stream.start_time from WebM container.
    More accurate than filename timestamps by ~209ms due to network/encoding delays.
    Args:
        container: PyAV container opened from audio file/URL
        track_idx: Track index for logging context
        logger: Optional logger instance (structlog or stdlib compatible)
    Returns:
        Start time in seconds (0.0 if extraction fails)
    """
    start_time_seconds = 0.0
    try:
        audio_streams = [s for s in container.streams if s.type == "audio"]
        stream = audio_streams[0] if audio_streams else container.streams[0]
        # 1) Try stream-level start_time (most reliable for Daily.co tracks)
        if stream.start_time is not None and stream.time_base is not None:
            start_time_seconds = float(stream.start_time * stream.time_base)
        # 2) Fallback to container-level start_time (in av.time_base units)
        if (start_time_seconds <= 0) and (container.start_time is not None):
            start_time_seconds = float(container.start_time * av.time_base)
        # 3) Fallback to first packet DTS in stream.time_base
        if start_time_seconds <= 0:
            for packet in container.demux(stream):
                if packet.dts is not None:
                    start_time_seconds = float(packet.dts * stream.time_base)
                    break
    except Exception as e:
        if logger:
            logger.warning(
                "PyAV metadata read failed; assuming 0 start_time",
                track_idx=track_idx,
                error=str(e),
            )
        start_time_seconds = 0.0
    if logger:
        logger.info(
            f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s",
            track_idx=track_idx,
        )
    return start_time_seconds
 def apply_audio_padding_to_file(
    in_container,
    output_path: str,
    start_time_seconds: float,
    track_idx: int,
    logger=None,
 ) -> None:
    """Apply silence padding to audio track using PyAV filter graph.
    Uses adelay filter to prepend silence, aligning track to meeting start time.
    Output is WebM/Opus format.
    Args:
        in_container: PyAV container opened from source audio
        output_path: Path for output WebM file
        start_time_seconds: Amount of silence to prepend (in seconds)
        track_idx: Track index for logging context
        logger: Optional logger instance (structlog or stdlib compatible)
    Raises:
        Exception: If no audio stream found or PyAV processing fails
    """
    delay_ms = math.floor(start_time_seconds * 1000)
    if logger:
        logger.info(
            f"Padding track {track_idx} with {delay_ms}ms delay using PyAV",
            track_idx=track_idx,
            delay_ms=delay_ms,
        )
    try:
        with av.open(output_path, "w", format="webm") as out_container:
            in_stream = next(
                (s for s in in_container.streams if s.type == "audio"), None
            )
            if in_stream is None:
                raise Exception("No audio stream in input")
            out_stream = out_container.add_stream(
                "libopus", rate=OPUS_STANDARD_SAMPLE_RATE
            )
            out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
            graph = av.filter.Graph()
            abuf_args = (
                f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
                f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
                f"sample_fmt=s16:"
                f"channel_layout=stereo"
            )
            src = graph.add("abuffer", args=abuf_args, name="src")
            aresample_f = graph.add("aresample", args="async=1", name="ares")
            # adelay requires one delay value per channel separated by '|'
            delays_arg = f"{delay_ms}|{delay_ms}"
            adelay_f = graph.add(
                "adelay", args=f"delays={delays_arg}:all=1", name="delay"
            )
            sink = graph.add("abuffersink", name="sink")
            src.link_to(aresample_f)
            aresample_f.link_to(adelay_f)
            adelay_f.link_to(sink)
            graph.configure()
            resampler = AudioResampler(
                format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
            )
            # Decode -> resample -> push through graph -> encode Opus
            for frame in in_container.decode(in_stream):
                out_frames = resampler.resample(frame) or []
                for rframe in out_frames:
                    rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
                    rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
                    src.push(rframe)
                    while True:
                        try:
                            f_out = sink.pull()
                        except Exception:
                            break
                        f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
                        f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
                        for packet in out_stream.encode(f_out):
                            out_container.mux(packet)
            # Flush remaining frames from filter graph
            src.push(None)
            while True:
                try:
                    f_out = sink.pull()
                except Exception:
                    break
                f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
                f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
                for packet in out_stream.encode(f_out):
                    out_container.mux(packet)
            # Flush encoder
            for packet in out_stream.encode(None):
                out_container.mux(packet)
    except Exception as e:
        if logger:
            logger.error(
                "PyAV padding failed for track",
                track_idx=track_idx,
                delay_ms=delay_ms,
                error=str(e),
                exc_info=True,
            )
        raise
--- a/server/reflector/utils/common.py
+++ b/server/reflector/utils/common.py
@@ -1,4 +0,0 @@
 def assert_not_none[T](value: T | None, message: str = "Value is None") -> T:
    if value is None:
        raise ValueError(message)
    return value
--- a/server/reflector/utils/daily.py
+++ b/server/reflector/utils/daily.py
@@ -1,92 +0,0 @@
 import os
 import re
 from typing import NamedTuple
 from reflector.utils.string import NonEmptyString
 DailyRoomName = NonEmptyString
 class DailyRecordingFilename(NamedTuple):
    """Parsed components from Daily.co recording filename.
    Format: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
    Example: 1763152299562-12f0b87c-97d4-4dd3-a65c-cee1f854a79c-cam-audio-1763152314582
    Note: S3 object keys have no extension, but browsers add .webm when downloading
    from S3 UI due to MIME type headers. If you download manually and wonder.
    """
    recording_start_ts: int
    participant_id: str
    track_start_ts: int
 def parse_daily_recording_filename(filename: str) -> DailyRecordingFilename:
    """Parse Daily.co recording filename to extract timestamps and participant ID.
    Args:
        filename: Full path or basename of Daily.co recording file
                 Format: {recording_start_ts}-{participant_id}-cam-audio-{track_start_ts}
    Returns:
        DailyRecordingFilename with parsed components
    Raises:
        ValueError: If filename doesn't match expected format
    Examples:
        >>> parse_daily_recording_filename("1763152299562-12f0b87c-97d4-4dd3-a65c-cee1f854a79c-cam-audio-1763152314582")
        DailyRecordingFilename(recording_start_ts=1763152299562, participant_id='12f0b87c-97d4-4dd3-a65c-cee1f854a79c', track_start_ts=1763152314582)
    """
    base = os.path.basename(filename)
    pattern = r"(\d{13,})-([0-9a-fA-F-]{36})-cam-audio-(\d{13,})"
    match = re.search(pattern, base)
    if not match:
        raise ValueError(
            f"Invalid Daily.co recording filename: {filename}. "
            f"Expected format: {{recording_start_ts}}-{{participant_id}}-cam-audio-{{track_start_ts}}"
        )
    recording_start_ts = int(match.group(1))
    participant_id = match.group(2)
    track_start_ts = int(match.group(3))
    return DailyRecordingFilename(
        recording_start_ts=recording_start_ts,
        participant_id=participant_id,
        track_start_ts=track_start_ts,
    )
 def recording_lock_key(recording_id: NonEmptyString) -> NonEmptyString:
    return f"recording:{recording_id}"
 def filter_cam_audio_tracks(track_keys: list[str]) -> list[str]:
    """Filter track keys to cam-audio tracks only (skip screen-audio, etc.)."""
    return [k for k in track_keys if "cam-audio" in k]
 def extract_base_room_name(daily_room_name: DailyRoomName) -> NonEmptyString:
    """
    Extract base room name from Daily.co timestamped room name.
    Daily.co creates rooms with timestamp suffix: {base_name}-YYYYMMDDHHMMSS
    This function removes the timestamp to get the original room name.
    Examples:
        "daily-20251020193458" → "daily"
        "daily-2-20251020193458" → "daily-2"
        "my-room-name-20251020193458" → "my-room-name"
    Args:
        daily_room_name: Full Daily.co room name with optional timestamp
    Returns:
        Base room name without timestamp suffix
    """
    base_name = daily_room_name.rsplit("-", 1)[0]
    assert base_name, f"Extracted base name is empty from: {daily_room_name}"
    return base_name
--- a/server/reflector/utils/datetime.py
+++ b/server/reflector/utils/datetime.py
@@ -1,9 +0,0 @@
 from datetime import datetime, timezone
 def parse_datetime_with_timezone(iso_string: str) -> datetime:
    """Parse ISO datetime string and ensure timezone awareness (defaults to UTC if naive)."""
    dt = datetime.fromisoformat(iso_string)
    if dt.tzinfo is None:
        dt = dt.replace(tzinfo=timezone.utc)
    return dt
--- a/server/reflector/utils/string.py
+++ b/server/reflector/utils/string.py
@@ -1,18 +1,7 @@
-from typing import Annotated, TypeVar
+from typing import Annotated
 from pydantic import Field, TypeAdapter, constr
 T_NotNone = TypeVar("T_NotNone")
 def assert_not_none(
    value: T_NotNone | None, message: str = "Value is None"
 ) -> T_NotNone:
    if value is None:
        raise ValueError(message)
    return value
 NonEmptyStringBase = constr(min_length=1, strip_whitespace=False)
 NonEmptyString = Annotated[
    NonEmptyStringBase,
@@ -32,20 +21,3 @@ def try_parse_non_empty_string(s: str) -> NonEmptyString | None:
    if not s:
        return None
    return parse_non_empty_string(s)
 T_Str = TypeVar("T_Str", bound=str)
 def assert_equal(s1: T_Str, s2: T_Str) -> T_Str:
    if s1 != s2:
        raise ValueError(f"assert_equal: {s1} != {s2}")
    return s1
 def assert_non_none_and_non_empty(
    value: str | None, error: str | None = None
 ) -> NonEmptyString:
    return parse_non_empty_string(
        assert_not_none(value, error or "Value is None"), error
    )
--- a/server/reflector/utils/transcript_formats.py
+++ b/server/reflector/utils/transcript_formats.py
@@ -1,133 +0,0 @@
 """Utilities for converting transcript data to various output formats."""
 import webvtt
 from reflector.db.transcripts import TranscriptParticipant, TranscriptTopic
 from reflector.processors.types import (
    Transcript as ProcessorTranscript,
 )
 from reflector.schemas.transcript_formats import TranscriptSegment
 from reflector.utils.webvtt import seconds_to_timestamp
 def get_speaker_name(
    speaker: int, participants: list[TranscriptParticipant] | None
 ) -> str:
    """Get participant name for speaker or default to 'Speaker N'."""
    if participants:
        for participant in participants:
            if participant.speaker == speaker:
                return participant.name
    return f"Speaker {speaker}"
 def format_timestamp_mmss(seconds: float | int) -> str:
    """Format seconds as MM:SS timestamp."""
    minutes = int(seconds // 60)
    secs = int(seconds % 60)
    return f"{minutes:02d}:{secs:02d}"
 def transcript_to_text(
    topics: list[TranscriptTopic],
    participants: list[TranscriptParticipant] | None,
    is_multitrack: bool = False,
 ) -> str:
    """Convert transcript topics to plain text with speaker names."""
    lines = []
    for topic in topics:
        if not topic.words:
            continue
        transcript = ProcessorTranscript(words=topic.words)
        segments = transcript.as_segments(is_multitrack)
        for segment in segments:
            speaker_name = get_speaker_name(segment.speaker, participants)
            text = segment.text.strip()
            lines.append(f"{speaker_name}: {text}")
    return "\n".join(lines)
 def transcript_to_text_timestamped(
    topics: list[TranscriptTopic],
    participants: list[TranscriptParticipant] | None,
    is_multitrack: bool = False,
 ) -> str:
    """Convert transcript topics to timestamped text with speaker names."""
    lines = []
    for topic in topics:
        if not topic.words:
            continue
        transcript = ProcessorTranscript(words=topic.words)
        segments = transcript.as_segments(is_multitrack)
        for segment in segments:
            speaker_name = get_speaker_name(segment.speaker, participants)
            timestamp = format_timestamp_mmss(segment.start)
            text = segment.text.strip()
            lines.append(f"[{timestamp}] {speaker_name}: {text}")
    return "\n".join(lines)
 def topics_to_webvtt_named(
    topics: list[TranscriptTopic],
    participants: list[TranscriptParticipant] | None,
    is_multitrack: bool = False,
 ) -> str:
    """Convert transcript topics to WebVTT format with participant names."""
    vtt = webvtt.WebVTT()
    for topic in topics:
        if not topic.words:
            continue
        transcript = ProcessorTranscript(words=topic.words)
        segments = transcript.as_segments(is_multitrack)
        for segment in segments:
            speaker_name = get_speaker_name(segment.speaker, participants)
            text = segment.text.strip()
            text = f"<v {speaker_name}>{text}"
            caption = webvtt.Caption(
                start=seconds_to_timestamp(segment.start),
                end=seconds_to_timestamp(segment.end),
                text=text,
            )
            vtt.captions.append(caption)
    return vtt.content
 def transcript_to_json_segments(
    topics: list[TranscriptTopic],
    participants: list[TranscriptParticipant] | None,
    is_multitrack: bool = False,
 ) -> list[TranscriptSegment]:
    """Convert transcript topics to a flat list of JSON segments."""
    result = []
    for topic in topics:
        if not topic.words:
            continue
        transcript = ProcessorTranscript(words=topic.words)
        segments = transcript.as_segments(is_multitrack)
        for segment in segments:
            speaker_name = get_speaker_name(segment.speaker, participants)
            result.append(
                TranscriptSegment(
                    speaker=segment.speaker,
                    speaker_name=speaker_name,
                    text=segment.text.strip(),
                    start=segment.start,
                    end=segment.end,
                )
            )
    return result
--- a/server/reflector/utils/url.py
+++ b/server/reflector/utils/url.py
@@ -1,37 +0,0 @@
 """URL manipulation utilities."""
 from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
 def add_query_param(url: str, key: str, value: str) -> str:
    """
    Add or update a query parameter in a URL.
    Properly handles URLs with or without existing query parameters,
    preserving fragments and encoding special characters.
    Args:
        url: The URL to modify
        key: The query parameter name
        value: The query parameter value
    Returns:
        The URL with the query parameter added or updated
    Examples:
        >>> add_query_param("https://example.com/room", "t", "token123")
        'https://example.com/room?t=token123'
        >>> add_query_param("https://example.com/room?existing=param", "t", "token123")
        'https://example.com/room?existing=param&t=token123'
    """
    parsed = urlparse(url)
    query_params = parse_qs(parsed.query, keep_blank_values=True)
    query_params[key] = [value]
    new_query = urlencode(query_params, doseq=True)
    new_parsed = parsed._replace(query=new_query)
    return urlunparse(new_parsed)
--- a/server/reflector/utils/webvtt.py
+++ b/server/reflector/utils/webvtt.py
@@ -13,7 +13,7 @@ VttTimestamp = Annotated[str, "vtt_timestamp"]
 WebVTTStr = Annotated[str, "webvtt_str"]
-def seconds_to_timestamp(seconds: Seconds) -> VttTimestamp:
+def _seconds_to_timestamp(seconds: Seconds) -> VttTimestamp:
    # lib doesn't do that
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
@@ -37,8 +37,8 @@ def words_to_webvtt(words: list[Word]) -> WebVTTStr:
        text = f"<v Speaker{segment.speaker}>{text}"
        caption = webvtt.Caption(
-            start=seconds_to_timestamp(segment.start),
+            start=_seconds_to_timestamp(segment.start),
-            end=seconds_to_timestamp(segment.end),
+            end=_seconds_to_timestamp(segment.end),
            text=text,
        )
        vtt.captions.append(caption)
--- a/server/reflector/video_platforms/init.py
+++ b/server/reflector/video_platforms/init.py
@@ -1,3 +1,10 @@
 # Video Platform Abstraction Layer
 """
 This module provides an abstraction layer for different video conferencing platforms.
 It allows seamless switching between providers (Whereby, Daily.co, etc.) without
 changing the core application logic.
 """
 from .base import VideoPlatformClient
 from .models import MeetingData, VideoPlatformConfig
 from .registry import get_platform_client, register_platform
--- a/server/reflector/video_platforms/base.py
+++ b/server/reflector/video_platforms/base.py
@@ -2,18 +2,17 @@ from abc import ABC, abstractmethod
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Dict, Optional
-from ..schemas.platform import Platform
+from reflector.platform_types import Platform
-from ..utils.string import NonEmptyString
+
-from .models import MeetingData, SessionData, VideoPlatformConfig
+from .models import MeetingData, VideoPlatformConfig
 if TYPE_CHECKING:
    from reflector.db.rooms import Room
 # separator doesn't guarantee there's no more "ROOM_PREFIX_SEPARATOR" strings in room name
 ROOM_PREFIX_SEPARATOR = "-"
 class VideoPlatformClient(ABC):
    """Abstract base class for video platform integrations."""
    PLATFORM_NAME: Platform
    def __init__(self, config: VideoPlatformConfig):
@@ -21,26 +20,36 @@ class VideoPlatformClient(ABC):
    @abstractmethod
    async def create_meeting(
-        self, room_name_prefix: NonEmptyString, end_date: datetime, room: "Room"
+        self, room_name_prefix: str, end_date: datetime, room: "Room"
    ) -> MeetingData:
        """Create a new meeting room."""
        pass
    @abstractmethod
-    async def get_room_sessions(self, room_name: str) -> list[SessionData]:
+    async def get_room_sessions(self, room_name: str) -> Dict[str, Any]:
-        """Get session history for a room."""
+        """Get session information for a room."""
        pass
    @abstractmethod
    async def delete_room(self, room_name: str) -> bool:
        """Delete a room. Returns True if successful."""
        pass
    @abstractmethod
    async def upload_logo(self, room_name: str, logo_path: str) -> bool:
        """Upload a logo to the room. Returns True if successful."""
        pass
    @abstractmethod
    def verify_webhook_signature(
        self, body: bytes, signature: str, timestamp: Optional[str] = None
    ) -> bool:
        """Verify webhook signature for security."""
        pass
    def format_recording_config(self, room: "Room") -> Dict[str, Any]:
        """Format recording configuration for the platform.
        Can be overridden by specific implementations."""
        if room.recording_type == "cloud" and self.config.s3_bucket:
            return {
                "type": room.recording_type,
--- a/server/reflector/video_platforms/daily.py
+++ b/server/reflector/video_platforms/daily.py
@@ -1,204 +1,178 @@
 import hmac
 from datetime import datetime
 from hashlib import sha256
 from http import HTTPStatus
 from typing import Any, Dict, Optional
 import httpx
 from reflector.dailyco_api import (
    CreateMeetingTokenRequest,
    CreateRoomRequest,
    DailyApiClient,
    MeetingParticipantsResponse,
    MeetingTokenProperties,
    RecordingResponse,
    RecordingsBucketConfig,
    RoomPresenceResponse,
    RoomProperties,
    verify_webhook_signature,
 )
 from reflector.db.daily_participant_sessions import (
    daily_participant_sessions_controller,
 )
 from reflector.db.rooms import Room
-from reflector.logger import logger
+from reflector.platform_types import Platform
 from reflector.storage import get_dailyco_storage
-from ..dailyco_api.responses import RecordingStatus
+from .base import VideoPlatformClient
-from ..schemas.platform import Platform
+from .models import MeetingData, RecordingType, VideoPlatformConfig
 from ..utils.daily import DailyRoomName
 from ..utils.string import NonEmptyString
 from .base import ROOM_PREFIX_SEPARATOR, VideoPlatformClient
 from .models import MeetingData, RecordingType, SessionData, VideoPlatformConfig
 class DailyClient(VideoPlatformClient):
    PLATFORM_NAME: Platform = "daily"
    TIMEOUT = 10
    BASE_URL = "https://api.daily.co/v1"
    TIMESTAMP_FORMAT = "%Y%m%d%H%M%S"
    RECORDING_NONE: RecordingType = "none"
    RECORDING_LOCAL: RecordingType = "local"
    RECORDING_CLOUD: RecordingType = "cloud"
    def __init__(self, config: VideoPlatformConfig):
        super().__init__(config)
-        self._api_client = DailyApiClient(
+        self.headers = {
-            api_key=config.api_key,
+            "Authorization": f"Bearer {config.api_key}",
-            webhook_secret=config.webhook_secret,
+            "Content-Type": "application/json",
-            timeout=10.0,
+        }
        )
    async def create_meeting(
-        self, room_name_prefix: NonEmptyString, end_date: datetime, room: Room
+        self, room_name_prefix: str, end_date: datetime, room: Room
    ) -> MeetingData:
-        """
+        """Create a Daily.co room."""
        Daily.co rooms vs meetings:
        - We create a NEW Daily.co room for each Reflector meeting
        - Daily.co meeting/session starts automatically when first participant joins
        - Room auto-deletes after exp time
        - Meeting.room_name stores the timestamped Daily.co room name
        """
        timestamp = datetime.now().strftime(self.TIMESTAMP_FORMAT)
-        room_name = f"{room_name_prefix}{ROOM_PREFIX_SEPARATOR}{timestamp}"
+        if room_name_prefix:
            room_name = f"{room_name_prefix}-{timestamp}"
        else:
            room_name = f"room-{timestamp}"
-        enable_recording = None
+        data = {
-        if room.recording_type == self.RECORDING_LOCAL:
+            "name": room_name,
-            enable_recording = "local"
+            "privacy": "private" if room.is_locked else "public",
-        elif room.recording_type == self.RECORDING_CLOUD:
+            "properties": {
-            enable_recording = "raw-tracks"
+                "enable_recording": "raw-tracks"
                if room.recording_type != self.RECORDING_NONE
                else False,
                "enable_chat": True,
                "enable_screenshare": True,
                "start_video_off": False,
                "start_audio_off": False,
                "exp": int(end_date.timestamp()),
            },
        }
-        properties = RoomProperties(
+        # Configure S3 bucket for recordings
-            enable_recording=enable_recording,
+        # NOTE: Not checking room.recording_type - figure out later if conditional needed
-            enable_chat=True,
+        assert self.config.s3_bucket, "S3 bucket must be configured"
-            enable_screenshare=True,
+        data["properties"]["recordings_bucket"] = {
-            enable_knocking=room.is_locked,
+            "bucket_name": self.config.s3_bucket,
-            start_video_off=False,
+            "bucket_region": self.config.s3_region,
-            start_audio_off=False,
+            "assume_role_arn": self.config.aws_role_arn,
-            exp=int(end_date.timestamp()),
+            "allow_api_access": True,
        }
        from reflector.logger import logger
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{self.BASE_URL}/rooms",
                headers=self.headers,
                json=data,
                timeout=self.TIMEOUT,
            )
-
+            if response.status_code >= 400:
-        if room.recording_type == self.RECORDING_CLOUD:
+                logger.error(
-            daily_storage = get_dailyco_storage()
+                    "Daily.co API error",
-            assert daily_storage.bucket_name, "S3 bucket must be configured"
+                    status_code=response.status_code,
-            properties.recordings_bucket = RecordingsBucketConfig(
+                    response_body=response.text,
-                bucket_name=daily_storage.bucket_name,
+                    request_data=data,
                bucket_region=daily_storage.region,
                assume_role_arn=daily_storage.role_credential,
                allow_api_access=True,
                )
            response.raise_for_status()
            result = response.json()
-        request = CreateRoomRequest(
+        # Format response to match our standard
-            name=room_name,
+        room_url = result["url"]
            privacy="private" if room.is_locked else "public",
            properties=properties,
        )
        result = await self._api_client.create_room(request)
        return MeetingData(
-            meeting_id=result.id,
+            meeting_id=result["id"],
-            room_name=result.name,
+            room_name=result["name"],
-            room_url=result.url,
+            room_url=room_url,
-            host_room_url=result.url,
+            host_room_url=room_url,
            platform=self.PLATFORM_NAME,
-            extra_data=result.model_dump(),
+            extra_data=result,
        )
-    async def get_room_sessions(self, room_name: str) -> list[SessionData]:
+    async def get_room_sessions(self, room_name: str) -> Dict[str, Any]:
-        """Get room session history from database (webhook-stored sessions).
+        """Get Daily.co room information."""
-
+        async with httpx.AsyncClient() as client:
-        Daily.co doesn't provide historical session API, so we query our database
+            response = await client.get(
-        where participant.joined/left webhooks are stored.
+                f"{self.BASE_URL}/rooms/{room_name}",
-        """
+                headers=self.headers,
-        from reflector.db.meetings import meetings_controller  # noqa: PLC0415
+                timeout=self.TIMEOUT,
        meeting = await meetings_controller.get_by_room_name(room_name)
        if not meeting:
            return []
        sessions = await daily_participant_sessions_controller.get_by_meeting(
            meeting.id
            )
            response.raise_for_status()
            return response.json()
-        return [
+    async def get_room_presence(self, room_name: str) -> Dict[str, Any]:
-            SessionData(
+        """Get real-time participant data - Daily.co specific feature."""
-                session_id=s.id,
+        async with httpx.AsyncClient() as client:
-                started_at=s.joined_at,
+            response = await client.get(
-                ended_at=s.left_at,
+                f"{self.BASE_URL}/rooms/{room_name}/presence",
                headers=self.headers,
                timeout=self.TIMEOUT,
            )
-            for s in sessions
+            response.raise_for_status()
-        ]
+            return response.json()
-    async def get_room_presence(self, room_name: str) -> RoomPresenceResponse:
+    async def delete_room(self, room_name: str) -> bool:
-        """Get room presence/session data for a Daily.co room."""
+        """Delete a Daily.co room."""
-        return await self._api_client.get_room_presence(room_name)
+        async with httpx.AsyncClient() as client:
-
+            response = await client.delete(
-    async def get_meeting_participants(
+                f"{self.BASE_URL}/rooms/{room_name}",
-        self, meeting_id: str
+                headers=self.headers,
-    ) -> MeetingParticipantsResponse:
+                timeout=self.TIMEOUT,
        """Get participant data for a specific Daily.co meeting."""
        return await self._api_client.get_meeting_participants(meeting_id)
    async def get_recording(self, recording_id: str) -> RecordingResponse:
        return await self._api_client.get_recording(recording_id)
    async def list_recordings(
        self,
        room_name: NonEmptyString | None = None,
        starting_after: str | None = None,
        ending_before: str | None = None,
        limit: int = 100,
    ) -> list[RecordingResponse]:
        return await self._api_client.list_recordings(
            room_name=room_name,
            starting_after=starting_after,
            ending_before=ending_before,
            limit=limit,
            )
-
+            # Daily.co returns 200 for success, 404 if room doesn't exist
-    async def get_recording_status(
+            return response.status_code in (HTTPStatus.OK, HTTPStatus.NOT_FOUND)
        self, recording_id: NonEmptyString
    ) -> RecordingStatus:
        recording = await self.get_recording(recording_id)
        return recording.status
    async def upload_logo(self, room_name: str, logo_path: str) -> bool:
        """Daily.co doesn't support custom logos per room - this is a no-op."""
        return True
    def verify_webhook_signature(
-        self, body: bytes, signature: str, timestamp: str | None = None
+        self, body: bytes, signature: str, timestamp: Optional[str] = None
    ) -> bool:
-        """Verify Daily.co webhook signature using dailyco_api module."""
+        """Verify Daily.co webhook signature.
-        if not self.config.webhook_secret:
+
-            logger.warning("Webhook secret not configured")
+        Daily.co uses:
        - X-Webhook-Signature header
        - X-Webhook-Timestamp header
        - Signature format: HMAC-SHA256(base64_decode(secret), timestamp + '.' + body)
        - Result is base64 encoded
        """
        if not signature or not timestamp:
            return False
-        return verify_webhook_signature(
+        try:
-            body=body,
+            import base64
-            signature=signature,
+
-            timestamp=timestamp or "",
+            secret_bytes = base64.b64decode(self.config.webhook_secret)
-            webhook_secret=self.config.webhook_secret,
+
            signed_content = timestamp.encode() + b"." + body
            expected = hmac.new(secret_bytes, signed_content, sha256).digest()
            expected_b64 = base64.b64encode(expected).decode()
            return hmac.compare_digest(expected_b64, signature)
        except Exception:
            return False
    async def create_meeting_token(self, room_name: str, enable_recording: bool) -> str:
        """Create meeting token for auto-recording."""
        data = {"properties": {"room_name": room_name}}
        if enable_recording:
            data["properties"]["start_cloud_recording"] = True
            data["properties"]["enable_recording_ui"] = False
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{self.BASE_URL}/meeting-tokens",
                headers=self.headers,
                json=data,
                timeout=self.TIMEOUT,
            )
-
+            response.raise_for_status()
-    async def create_meeting_token(
+            return response.json()["token"]
        self,
        room_name: DailyRoomName,
        start_cloud_recording: bool,
        enable_recording_ui: bool,
        user_id: NonEmptyString | None = None,
        is_owner: bool = False,
    ) -> NonEmptyString:
        properties = MeetingTokenProperties(
            room_name=room_name,
            user_id=user_id,
            start_cloud_recording=start_cloud_recording,
            enable_recording_ui=enable_recording_ui,
            is_owner=is_owner,
        )
        request = CreateMeetingTokenRequest(properties=properties)
        result = await self._api_client.create_meeting_token(request)
        return result.token
    async def close(self):
        """Clean up API client resources."""
        await self._api_client.close()
    async def __aenter__(self):
        return self
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.close()
--- a/server/reflector/video_platforms/factory.py
+++ b/server/reflector/video_platforms/factory.py
@@ -1,28 +1,29 @@
-from reflector.settings import settings
+"""Factory for creating video platform clients based on configuration."""
 from reflector.storage import get_dailyco_storage, get_whereby_storage
-from ..schemas.platform import WHEREBY_PLATFORM, Platform
+from typing import Optional
-from .base import VideoPlatformClient, VideoPlatformConfig
+
 from reflector.settings import settings
 from .base import Platform, VideoPlatformClient, VideoPlatformConfig
 from .registry import get_platform_client
 def get_platform_config(platform: Platform) -> VideoPlatformConfig:
-    if platform == WHEREBY_PLATFORM:
+    """Get configuration for a specific platform."""
    if platform == "whereby":
        if not settings.WHEREBY_API_KEY:
            raise ValueError(
                "WHEREBY_API_KEY is required when platform='whereby'. "
                "Set WHEREBY_API_KEY environment variable."
            )
        whereby_storage = get_whereby_storage()
        key_id, secret = whereby_storage.key_credentials
        return VideoPlatformConfig(
            api_key=settings.WHEREBY_API_KEY,
            webhook_secret=settings.WHEREBY_WEBHOOK_SECRET or "",
            api_url=settings.WHEREBY_API_URL,
-            s3_bucket=whereby_storage.bucket_name,
+            s3_bucket=settings.RECORDING_STORAGE_AWS_BUCKET_NAME,
-            s3_region=whereby_storage.region,
+            s3_region=settings.RECORDING_STORAGE_AWS_REGION,
-            aws_access_key_id=key_id,
+            aws_access_key_id=settings.AWS_WHEREBY_ACCESS_KEY_ID,
-            aws_access_key_secret=secret,
+            aws_access_key_secret=settings.AWS_WHEREBY_ACCESS_KEY_SECRET,
        )
    elif platform == "daily":
        if not settings.DAILY_API_KEY:
@@ -35,19 +36,45 @@ def get_platform_config(platform: Platform) -> VideoPlatformConfig:
                "DAILY_SUBDOMAIN is required when platform='daily'. "
                "Set DAILY_SUBDOMAIN environment variable."
            )
        daily_storage = get_dailyco_storage()
        return VideoPlatformConfig(
            api_key=settings.DAILY_API_KEY,
            webhook_secret=settings.DAILY_WEBHOOK_SECRET or "",
            subdomain=settings.DAILY_SUBDOMAIN,
-            s3_bucket=daily_storage.bucket_name,
+            s3_bucket=settings.AWS_DAILY_S3_BUCKET,
-            s3_region=daily_storage.region,
+            s3_region=settings.AWS_DAILY_S3_REGION,
-            aws_role_arn=daily_storage.role_credential,
+            aws_role_arn=settings.AWS_DAILY_ROLE_ARN,
        )
    else:
        raise ValueError(f"Unknown platform: {platform}")
 def create_platform_client(platform: Platform) -> VideoPlatformClient:
    """Create a video platform client instance."""
    config = get_platform_config(platform)
    return get_platform_client(platform, config)
 def get_platform_for_room(
    room_id: Optional[str] = None, room_platform: Optional[Platform] = None
 ) -> Platform:
    """Determine which platform to use for a room.
    Priority order (highest to lowest):
    1. DAILY_MIGRATION_ROOM_IDS - env var override for testing/migration
    2. room_platform - database persisted platform choice
    3. DEFAULT_VIDEO_PLATFORM - env var fallback
    """
    # If Daily migration is disabled, always use Whereby
    if not settings.DAILY_MIGRATION_ENABLED:
        return "whereby"
    # Highest priority: If room is in migration list, use Daily (env var override)
    if room_id and room_id in settings.DAILY_MIGRATION_ROOM_IDS:
        return "daily"
    # Second priority: Use room's persisted platform from database
    if room_platform:
        return room_platform
    # Fallback: Use default platform from env var
    return settings.DEFAULT_VIDEO_PLATFORM
--- a/server/reflector/video_platforms/models.py
+++ b/server/reflector/video_platforms/models.py
@@ -1,44 +1,31 @@
-from datetime import datetime
+"""Video platform data models.
 Standard data models used across all video platform implementations.
 """
 from typing import Any, Dict, Literal, Optional
 from pydantic import BaseModel, Field
-from reflector.schemas.platform import WHEREBY_PLATFORM, Platform
+from reflector.platform_types import Platform
 from reflector.utils.string import NonEmptyString
 RecordingType = Literal["none", "local", "cloud"]
 class SessionData(BaseModel):
    """Platform-agnostic session data.
    Represents a participant session in a meeting room, regardless of platform.
    Used to determine if a meeting is still active or has ended.
    """
    session_id: NonEmptyString = Field(description="Unique session identifier")
    started_at: datetime = Field(description="When session started (UTC)")
    ended_at: datetime | None = Field(
        description="When session ended (UTC), None if still active"
    )
 class MeetingData(BaseModel):
    """Standardized meeting data returned by all providers."""
    platform: Platform
-    meeting_id: NonEmptyString = Field(
+    meeting_id: str = Field(description="Platform-specific meeting identifier")
-        description="Platform-specific meeting identifier"
+    room_url: str = Field(description="URL for participants to join")
-    )
+    host_room_url: str = Field(description="URL for hosts (may be same as room_url)")
-    room_url: NonEmptyString = Field(description="URL for participants to join")
+    room_name: str = Field(description="Human-readable room name")
    host_room_url: NonEmptyString = Field(
        description="URL for hosts (may be same as room_url)"
    )
    room_name: NonEmptyString = Field(description="Human-readable room name")
    extra_data: Dict[str, Any] = Field(default_factory=dict)
    class Config:
        json_schema_extra = {
            "example": {
-                "platform": WHEREBY_PLATFORM,
+                "platform": "whereby",
                "meeting_id": "12345678",
                "room_url": "https://subdomain.whereby.com/room-20251008120000",
                "host_room_url": "https://subdomain.whereby.com/room-20251008120000?roomKey=abc123",
@@ -48,6 +35,8 @@ class MeetingData(BaseModel):
 class VideoPlatformConfig(BaseModel):
    """Platform-agnostic configuration model."""
    api_key: str
    webhook_secret: str
    api_url: Optional[str] = None
--- a/server/reflector/video_platforms/registry.py
+++ b/server/reflector/video_platforms/registry.py
@@ -1,18 +1,20 @@
 from typing import Dict, Type
-from ..schemas.platform import DAILY_PLATFORM, WHEREBY_PLATFORM, Platform
+from .base import Platform, VideoPlatformClient, VideoPlatformConfig
 from .base import VideoPlatformClient, VideoPlatformConfig
 # Registry of available video platforms
 _PLATFORMS: Dict[Platform, Type[VideoPlatformClient]] = {}
 def register_platform(name: Platform, client_class: Type[VideoPlatformClient]):
    """Register a video platform implementation."""
    _PLATFORMS[name] = client_class
 def get_platform_client(
    platform: Platform, config: VideoPlatformConfig
 ) -> VideoPlatformClient:
    """Get a video platform client instance."""
    if platform not in _PLATFORMS:
        raise ValueError(f"Unknown video platform: {platform}")
@@ -21,15 +23,17 @@ def get_platform_client(
 def get_available_platforms() -> list[Platform]:
    """Get list of available platform names."""
    return list(_PLATFORMS.keys())
 # Auto-register built-in platforms
 def _register_builtin_platforms():
    from .daily import DailyClient  # noqa: PLC0415
    from .whereby import WherebyClient  # noqa: PLC0415
-    register_platform(WHEREBY_PLATFORM, WherebyClient)
+    register_platform("whereby", WherebyClient)
-    register_platform(DAILY_PLATFORM, DailyClient)
+    register_platform("daily", DailyClient)
 _register_builtin_platforms()
--- a/server/reflector/video_platforms/whereby.py
+++ b/server/reflector/video_platforms/whereby.py
@@ -4,22 +4,19 @@ import re
 import time
 from datetime import datetime
 from hashlib import sha256
-from typing import Optional
+from typing import Any, Dict, Optional
 import httpx
 from reflector.db.rooms import Room
 from reflector.storage import get_whereby_storage
-from ..schemas.platform import WHEREBY_PLATFORM, Platform
+from .base import MeetingData, Platform, VideoPlatformClient, VideoPlatformConfig
 from ..utils.string import NonEmptyString
 from .base import VideoPlatformClient
 from .models import MeetingData, SessionData, VideoPlatformConfig
 from .whereby_utils import whereby_room_name_prefix
 class WherebyClient(VideoPlatformClient):
-    PLATFORM_NAME: Platform = WHEREBY_PLATFORM
+    """Whereby video platform implementation."""
    PLATFORM_NAME: Platform = "whereby"
    TIMEOUT = 10  # seconds
    MAX_ELAPSED_TIME = 60 * 1000  # 1 minute in milliseconds
@@ -31,28 +28,27 @@ class WherebyClient(VideoPlatformClient):
        }
    async def create_meeting(
-        self, room_name_prefix: NonEmptyString, end_date: datetime, room: Room
+        self, room_name_prefix: str, end_date: datetime, room: Room
    ) -> MeetingData:
        """Create a Whereby meeting."""
        data = {
            "isLocked": room.is_locked,
-            "roomNamePrefix": whereby_room_name_prefix(room_name_prefix),
+            "roomNamePrefix": room_name_prefix,
            "roomNamePattern": "uuid",
            "roomMode": room.room_mode,
            "endDate": end_date.isoformat(),
            "fields": ["hostRoomUrl"],
        }
        # Add recording configuration if cloud recording is enabled
        if room.recording_type == "cloud":
            # Get storage config for passing credentials to Whereby API
            whereby_storage = get_whereby_storage()
            key_id, secret = whereby_storage.key_credentials
            data["recording"] = {
                "type": room.recording_type,
                "destination": {
                    "provider": "s3",
-                    "bucket": whereby_storage.bucket_name,
+                    "bucket": self.config.s3_bucket,
-                    "accessKeyId": key_id,
+                    "accessKeyId": self.config.aws_access_key_id,
-                    "accessKeySecret": secret,
+                    "accessKeySecret": self.config.aws_access_key_secret,
                    "fileFormat": "mp4",
                },
                "startTrigger": room.recording_trigger,
@@ -77,52 +73,23 @@ class WherebyClient(VideoPlatformClient):
            extra_data=result,
        )
-    async def get_room_sessions(self, room_name: str) -> list[SessionData]:
+    async def get_room_sessions(self, room_name: str) -> Dict[str, Any]:
-        """Get room session history from Whereby API.
+        """Get Whereby room session information."""
        Whereby API returns: [{"sessionId": "...", "startedAt": "...", "endedAt": "..." | null}, ...]
        """
        async with httpx.AsyncClient() as client:
            """
                        {
              "cursor": "text",
              "results": [
                {
                  "roomSessionId": "e2f29530-46ec-4cee-8b27-e565cb5bb2e9",
                  "roomName": "/room-prefix-793e9ec1-c686-423d-9043-9b7a10c553fd",
                  "startedAt": "2025-01-01T00:00:00.000Z",
                  "endedAt": "2025-01-01T01:00:00.000Z",
                  "totalParticipantMinutes": 124,
                  "totalRecorderMinutes": 120,
                  "totalStreamerMinutes": 120,
                  "totalUniqueParticipants": 4,
                  "totalUniqueRecorders": 3,
                  "totalUniqueStreamers": 2
                }
              ]
            }"""
            response = await client.get(
                f"{self.config.api_url}/insights/room-sessions?roomName={room_name}",
                headers=self.headers,
                timeout=self.TIMEOUT,
            )
            response.raise_for_status()
-            results = response.json().get("results", [])
+            return response.json()
-            return [
+    async def delete_room(self, room_name: str) -> bool:
-                SessionData(
+        """Whereby doesn't support room deletion - meetings expire automatically."""
-                    session_id=s["roomSessionId"],
+        return True
                    started_at=datetime.fromisoformat(
                        s["startedAt"].replace("Z", "+00:00")
                    ),
                    ended_at=datetime.fromisoformat(s["endedAt"].replace("Z", "+00:00"))
                    if s.get("endedAt")
                    else None,
                )
                for s in results
            ]
    async def upload_logo(self, room_name: str, logo_path: str) -> bool:
        """Upload logo to Whereby room."""
        async with httpx.AsyncClient() as client:
            with open(logo_path, "rb") as f:
                response = await client.put(
@@ -139,6 +106,7 @@ class WherebyClient(VideoPlatformClient):
    def verify_webhook_signature(
        self, body: bytes, signature: str, timestamp: Optional[str] = None
    ) -> bool:
        """Verify Whereby webhook signature."""
        if not signature:
            return False
@@ -148,11 +116,13 @@ class WherebyClient(VideoPlatformClient):
        ts, sig = matches.groups()
        # Check timestamp to prevent replay attacks
        current_time = int(time.time() * 1000)
        diff_time = current_time - int(ts) * 1000
        if diff_time >= self.MAX_ELAPSED_TIME:
            return False
        # Verify signature
        body_dict = json.loads(body)
        signed_payload = f"{ts}.{json.dumps(body_dict, separators=(',', ':'))}"
        hmac_obj = hmac.new(
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Igor Loskutov	1bf73c8199	sync with parent	2025-10-21 11:59:26 -04:00
Sergey Mankovsky	d82abf65ba	Emit multriack pipeline events	2025-10-21 16:31:31 +02:00
Igor Loskutov	7d239fe380	dailico track merge vibe	2025-10-21 10:30:19 -04:00
Sergey Mankovsky	acb6e90f28	Generate waveforms for the mixed audio	2025-10-21 13:33:31 +02:00
Igor Loskutov	f844b9fc1f	Merge branch 'igor/dailico-2' of github-monadical:Monadical-SAS/reflector into igor/dailico-2	2025-10-17 10:00:40 -04:00
Sergey Mankovsky	96f05020cc	Align tracks of a multitrack recording	2025-10-17 15:27:27 +02:00
Sergey Mankovsky	fc79ff3114	Use explicit track keys for processing	2025-10-17 14:42:07 +02:00
Igor Loskutov	3641e2e599	apply platform from envs in priority: non-dry	2025-10-16 15:08:19 -04:00
Sergey Mankovsky	c23518d2e3	Trigger multitrack processing for daily recordings	2025-10-16 20:05:26 +02:00
Sergey Mankovsky	23edffe2a2	Mixdown with pyav filter graph	2025-10-16 17:14:55 +02:00
Sergey Mankovsky	e59770ecc9	Mixdown audio tracks	2025-10-16 17:14:55 +02:00
Sergey Mankovsky	6301f2afa6	Add multitrack pipeline	2025-10-16 17:14:55 +02:00
Mathieu Virbel	9ac7f0e8e2	chore(main): release 0.14.0 (#670 )	2025-10-16 17:14:55 +02:00
Igor Loskutov	0a84a9351a	stub processor (vibe) self-review	2025-10-10 20:41:08 -04:00
Igor Loskutov	ca22084845	stub processor (vibe) self-review	2025-10-10 18:45:19 -04:00
Igor Loskutov	f945f84be9	stub processor (vibe)	2025-10-10 18:05:31 -04:00
Igor Loskutov	4c523c8eec	dont show recording ui on call	2025-10-10 12:45:10 -04:00
Igor Loskutov	0fcf8b6875	doc update (vibe)	2025-10-10 10:57:35 -04:00
Igor Loskutov	446cb748ae	vibe dailyco	2025-10-09 17:04:16 -04:00
Igor Loskutov	3e1339a8ea	vibe dailyco	2025-10-09 15:52:23 -04:00
Igor Loskutov	807819bb2f	llm instructions	2025-10-08 13:06:04 -04:00
		`@@ -1,2 +0,0 @@`
			`-- Create hatchet database for Hatchet workflow engine`
			`CREATE DATABASE hatchet;`
		`@@ -1 +0,0 @@`
			`"""Pipeline modules for audio processing."""`