mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
hatchet no-mistake
This commit is contained in:
@@ -48,6 +48,20 @@ services:
|
|||||||
conductor:
|
conductor:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
||||||
|
hatchet-worker:
|
||||||
|
build:
|
||||||
|
context: server
|
||||||
|
volumes:
|
||||||
|
- ./server/:/app/
|
||||||
|
- /app/.venv
|
||||||
|
env_file:
|
||||||
|
- ./server/.env
|
||||||
|
environment:
|
||||||
|
ENTRYPOINT: hatchet-worker
|
||||||
|
depends_on:
|
||||||
|
hatchet:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
redis:
|
redis:
|
||||||
image: redis:7.2
|
image: redis:7.2
|
||||||
ports:
|
ports:
|
||||||
@@ -81,8 +95,8 @@ services:
|
|||||||
conductor:
|
conductor:
|
||||||
image: conductoross/conductor-standalone:3.15.0
|
image: conductoross/conductor-standalone:3.15.0
|
||||||
ports:
|
ports:
|
||||||
- 8180:8080
|
- "8180:8080"
|
||||||
- 5001:5000
|
- "5001:5000"
|
||||||
environment:
|
environment:
|
||||||
- conductor.db.type=memory
|
- conductor.db.type=memory
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@@ -91,6 +105,54 @@ services:
|
|||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 5
|
retries: 5
|
||||||
|
|
||||||
|
hatchet-postgres:
|
||||||
|
image: postgres:15.6
|
||||||
|
command: postgres -c 'max_connections=200'
|
||||||
|
restart: always
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=hatchet
|
||||||
|
- POSTGRES_PASSWORD=hatchet
|
||||||
|
- POSTGRES_DB=hatchet
|
||||||
|
ports:
|
||||||
|
- "5436:5432"
|
||||||
|
volumes:
|
||||||
|
- ./data/hatchet-postgres:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -d hatchet -U hatchet"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 10s
|
||||||
|
|
||||||
|
hatchet:
|
||||||
|
image: ghcr.io/hatchet-dev/hatchet/hatchet-lite:latest
|
||||||
|
ports:
|
||||||
|
- "8889:8888"
|
||||||
|
- "7078:7077"
|
||||||
|
depends_on:
|
||||||
|
hatchet-postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: "postgresql://hatchet:hatchet@hatchet-postgres:5432/hatchet?sslmode=disable"
|
||||||
|
SERVER_AUTH_COOKIE_DOMAIN: localhost
|
||||||
|
SERVER_AUTH_COOKIE_INSECURE: "t"
|
||||||
|
SERVER_GRPC_BIND_ADDRESS: "0.0.0.0"
|
||||||
|
SERVER_GRPC_INSECURE: "t"
|
||||||
|
SERVER_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||||
|
SERVER_GRPC_PORT: "7077"
|
||||||
|
SERVER_URL: http://localhost:8889
|
||||||
|
SERVER_AUTH_SET_EMAIL_VERIFIED: "t"
|
||||||
|
SERVER_DEFAULT_ENGINE_VERSION: "V1"
|
||||||
|
SERVER_INTERNAL_CLIENT_INTERNAL_GRPC_BROADCAST_ADDRESS: hatchet:7077
|
||||||
|
volumes:
|
||||||
|
- ./data/hatchet-config:/config
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8888/api/live"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
attachable: true
|
attachable: true
|
||||||
|
|||||||
339
server/HATCHET_LLM_OBSERVATIONS.md
Normal file
339
server/HATCHET_LLM_OBSERVATIONS.md
Normal file
@@ -0,0 +1,339 @@
|
|||||||
|
# Hatchet Migration - LLM Debugging Observations
|
||||||
|
|
||||||
|
This document captures hard-won debugging insights from implementing the multitrack diarization pipeline with Hatchet. These observations are particularly relevant for LLM assistants working on this codebase.
|
||||||
|
|
||||||
|
## Architecture Context
|
||||||
|
|
||||||
|
- **Hatchet SDK v1.21+** uses async workers with gRPC for task polling
|
||||||
|
- Workers connect to Hatchet server via gRPC (port 7077) and trigger workflows via REST (port 8888)
|
||||||
|
- `hatchet-lite` image bundles server, engine, and database in one container
|
||||||
|
- Tasks are decorated with `@workflow.task()` (not `@hatchet.step()` as in older examples)
|
||||||
|
- Workflow input is validated via Pydantic models with `input_validator=` parameter
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Challenge 1: SDK Version API Breaking Changes
|
||||||
|
|
||||||
|
### Symptoms
|
||||||
|
```
|
||||||
|
AttributeError: 'V1WorkflowRunDetails' object has no attribute 'workflow_run_id'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
Hatchet SDK v1.21+ changed the response structure for workflow creation. Old examples show:
|
||||||
|
```python
|
||||||
|
result = await client.runs.aio_create(workflow_name, input_data)
|
||||||
|
return result.workflow_run_id # OLD - doesn't work
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
Access the run ID through the new nested structure:
|
||||||
|
```python
|
||||||
|
result = await client.runs.aio_create(workflow_name, input_data)
|
||||||
|
return result.run.metadata.id # NEW - SDK v1.21+
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Insight
|
||||||
|
**Don't trust documentation or examples.** Read the SDK source code or use IDE autocomplete to discover actual attribute names. The SDK evolves faster than docs.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Challenge 2: Worker Appears Hung at "starting runner..."
|
||||||
|
|
||||||
|
### Symptoms
|
||||||
|
```
|
||||||
|
[INFO] Starting Hatchet workers
|
||||||
|
[INFO] Starting Hatchet worker polling...
|
||||||
|
[INFO] STARTING HATCHET...
|
||||||
|
[INFO] starting runner...
|
||||||
|
# ... nothing else, appears stuck
|
||||||
|
```
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
Without debug mode, Hatchet SDK doesn't log:
|
||||||
|
- Workflow registration
|
||||||
|
- gRPC connection status
|
||||||
|
- Heartbeat activity
|
||||||
|
- Action listener acquisition
|
||||||
|
|
||||||
|
The worker IS working, you just can't see it.
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
Always enable debug mode during development:
|
||||||
|
```bash
|
||||||
|
HATCHET_DEBUG=true
|
||||||
|
```
|
||||||
|
|
||||||
|
With debug enabled, you'll see the actual activity:
|
||||||
|
```
|
||||||
|
[DEBUG] 'worker-name' waiting for ['workflow:task1', 'workflow:task2']
|
||||||
|
[DEBUG] starting action listener: worker-name
|
||||||
|
[DEBUG] acquired action listener: 562d00a8-8895-42a1-b65b-46f905c902f9
|
||||||
|
[DEBUG] sending heartbeat
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Insight
|
||||||
|
**Start every Hatchet debugging session with `HATCHET_DEBUG=true`.** Silent workers waste hours of debugging time.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Challenge 3: Docker Networking + JWT Token URL Conflicts
|
||||||
|
|
||||||
|
### Symptoms
|
||||||
|
```
|
||||||
|
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
|
||||||
|
status = StatusCode.UNAVAILABLE
|
||||||
|
details = "failed to connect to all addresses"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
The Hatchet API token embeds URLs:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"aud": "http://localhost:8889",
|
||||||
|
"grpc_broadcast_address": "localhost:7077",
|
||||||
|
"server_url": "http://localhost:8889"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Inside Docker containers, `localhost` refers to the container itself, not the Hatchet server.
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
Override the token-embedded URLs with environment variables:
|
||||||
|
```bash
|
||||||
|
# In .env or docker-compose environment
|
||||||
|
HATCHET_CLIENT_HOST_PORT=hatchet:7077
|
||||||
|
HATCHET_CLIENT_SERVER_URL=http://hatchet:8888
|
||||||
|
HATCHET_CLIENT_TLS_STRATEGY=none
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Insight
|
||||||
|
**The JWT token is not the final word on connection settings.** Environment variables override token-embedded URLs, which is essential for Docker networking.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Challenge 4: Workflow Name Case Sensitivity
|
||||||
|
|
||||||
|
### Symptoms
|
||||||
|
```
|
||||||
|
BadRequestException: (400)
|
||||||
|
HTTP response body: errors=[APIError(description='workflow names not found: diarizationpipeline')]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
Hatchet uses the exact workflow name you define for triggering:
|
||||||
|
```python
|
||||||
|
diarization_pipeline = hatchet.workflow(
|
||||||
|
name="DiarizationPipeline", # Use THIS exact name to trigger
|
||||||
|
input_validator=PipelineInput
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Internally, task identifiers are lowercased (`diarizationpipeline:get_recording`), but workflow triggers must match the defined name.
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
```python
|
||||||
|
# Correct
|
||||||
|
await client.start_workflow('DiarizationPipeline', input_data)
|
||||||
|
|
||||||
|
# Wrong
|
||||||
|
await client.start_workflow('diarizationpipeline', input_data)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Insight
|
||||||
|
**Workflow names are case-sensitive for triggering, but task refs are lowercase.** Don't conflate the two.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Challenge 5: Pydantic Response Object Iteration
|
||||||
|
|
||||||
|
### Symptoms
|
||||||
|
```
|
||||||
|
AttributeError: 'tuple' object has no attribute 'participant_id'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
When API responses return Pydantic models with list fields:
|
||||||
|
```python
|
||||||
|
class MeetingParticipantsResponse(BaseModel):
|
||||||
|
data: List[MeetingParticipant]
|
||||||
|
```
|
||||||
|
|
||||||
|
Iterating the response object directly is wrong:
|
||||||
|
```python
|
||||||
|
for p in participants: # WRONG - iterates over model fields as tuples
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
Access the `.data` attribute explicitly:
|
||||||
|
```python
|
||||||
|
for p in participants.data: # CORRECT - iterates over list items
|
||||||
|
print(p.participant_id)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Insight
|
||||||
|
**Pydantic models with list fields require explicit `.data` access.** The model itself is not iterable in the expected way.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Challenge 6: Database Connections in Async Workers
|
||||||
|
|
||||||
|
### Symptoms
|
||||||
|
```
|
||||||
|
InterfaceError: cannot perform operation: another operation is in progress
|
||||||
|
```
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
Similar to Conductor, Hatchet workers may inherit stale database connections. Each task runs in an async context that may not share the same event loop as cached connections.
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
Create fresh database connections per task:
|
||||||
|
```python
|
||||||
|
async def _get_fresh_db_connection():
|
||||||
|
"""Create fresh database connection for worker task."""
|
||||||
|
import databases
|
||||||
|
from reflector.db import _database_context
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
_database_context.set(None)
|
||||||
|
db = databases.Database(settings.DATABASE_URL)
|
||||||
|
_database_context.set(db)
|
||||||
|
await db.connect()
|
||||||
|
return db
|
||||||
|
|
||||||
|
async def _close_db_connection(db):
|
||||||
|
await db.disconnect()
|
||||||
|
_database_context.set(None)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Insight
|
||||||
|
**Cached singletons (DB, HTTP clients) are unsafe in workflow workers.** Always create fresh connections.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Challenge 7: Child Workflow Fan-out Pattern
|
||||||
|
|
||||||
|
### Symptoms
|
||||||
|
Child workflows spawn but parent doesn't wait for completion, or results aren't collected.
|
||||||
|
|
||||||
|
### Root Cause
|
||||||
|
Hatchet child workflows need explicit spawning and result collection:
|
||||||
|
```python
|
||||||
|
# Spawning children
|
||||||
|
child_runs = await asyncio.gather(*[
|
||||||
|
child_workflow.aio_run(child_input)
|
||||||
|
for child_input in inputs
|
||||||
|
])
|
||||||
|
|
||||||
|
# Results are returned directly from aio_run()
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
Use `aio_run()` for child workflows and `asyncio.gather()` for parallelism:
|
||||||
|
```python
|
||||||
|
@parent_workflow.task(parents=[setup_task])
|
||||||
|
async def process_tracks(input: ParentInput, ctx: Context) -> dict:
|
||||||
|
child_coroutines = [
|
||||||
|
track_workflow.aio_run(TrackInput(track_index=i, ...))
|
||||||
|
for i in range(len(input.tracks))
|
||||||
|
]
|
||||||
|
|
||||||
|
results = await asyncio.gather(*child_coroutines, return_exceptions=True)
|
||||||
|
|
||||||
|
# Handle failures
|
||||||
|
for i, result in enumerate(results):
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
logger.error(f"Track {i} failed: {result}")
|
||||||
|
|
||||||
|
return {"track_results": [r for r in results if not isinstance(r, Exception)]}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Insight
|
||||||
|
**Child workflows in Hatchet return results directly.** No need to poll for completion like in Conductor.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Debugging Workflow
|
||||||
|
|
||||||
|
### 1. Enable Debug Mode First
|
||||||
|
```bash
|
||||||
|
HATCHET_DEBUG=true
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Verify Worker Registration
|
||||||
|
Look for this in debug logs:
|
||||||
|
```
|
||||||
|
[DEBUG] 'worker-name' waiting for ['workflow:task1', 'workflow:task2', ...]
|
||||||
|
[DEBUG] acquired action listener: {uuid}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Test Workflow Trigger Separately
|
||||||
|
```python
|
||||||
|
docker exec server uv run python -c "
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
from reflector.hatchet.workflows.diarization_pipeline import PipelineInput
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
async def test():
|
||||||
|
input_data = PipelineInput(
|
||||||
|
transcript_id='test',
|
||||||
|
recording_id=None,
|
||||||
|
room_name='test-room',
|
||||||
|
bucket_name='bucket',
|
||||||
|
tracks=[],
|
||||||
|
)
|
||||||
|
run_id = await HatchetClientManager.start_workflow(
|
||||||
|
'DiarizationPipeline',
|
||||||
|
input_data.model_dump()
|
||||||
|
)
|
||||||
|
print(f'Triggered: {run_id}')
|
||||||
|
|
||||||
|
asyncio.run(test())
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Check Hatchet Server Logs
|
||||||
|
```bash
|
||||||
|
docker logs reflector-hatchet-1 --tail 50
|
||||||
|
```
|
||||||
|
|
||||||
|
Look for `WRN` entries indicating API errors or connection issues.
|
||||||
|
|
||||||
|
### 5. Verify gRPC Connectivity
|
||||||
|
```python
|
||||||
|
docker exec worker python -c "
|
||||||
|
import socket
|
||||||
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
result = sock.connect_ex(('hatchet', 7077))
|
||||||
|
print(f'gRPC port 7077: {\"reachable\" if result == 0 else \"blocked\"}')"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Force Container Rebuild
|
||||||
|
Volume mounts may cache old bytecode:
|
||||||
|
```bash
|
||||||
|
docker compose up -d --build --force-recreate hatchet-worker
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Common Gotchas Summary
|
||||||
|
|
||||||
|
| Issue | Signal | Fix |
|
||||||
|
|-------|--------|-----|
|
||||||
|
| SDK API changed | `AttributeError` on result | Check SDK source for actual attributes |
|
||||||
|
| Worker appears stuck | Only "starting runner..." | Enable `HATCHET_DEBUG=true` |
|
||||||
|
| Can't connect from Docker | gRPC unavailable | Set `HATCHET_CLIENT_HOST_PORT` and `_SERVER_URL` |
|
||||||
|
| Workflow not found | 400 Bad Request | Use exact case-sensitive workflow name |
|
||||||
|
| Tuple iteration error | `'tuple' has no attribute` | Access `.data` on Pydantic response models |
|
||||||
|
| DB conflicts | "another operation in progress" | Fresh DB connection per task |
|
||||||
|
| Old code running | Fixed code but same error | Force rebuild container, clear `__pycache__` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files Most Likely to Need Hatchet-Specific Handling
|
||||||
|
|
||||||
|
- `server/reflector/hatchet/workflows/*.py` - Workflow and task definitions
|
||||||
|
- `server/reflector/hatchet/client.py` - Client wrapper, SDK version compatibility
|
||||||
|
- `server/reflector/hatchet/run_workers.py` - Worker startup and registration
|
||||||
|
- `server/reflector/hatchet/progress.py` - Progress emission for UI updates
|
||||||
|
- `docker-compose.yml` - Hatchet infrastructure services
|
||||||
@@ -40,6 +40,7 @@ dependencies = [
|
|||||||
"webvtt-py>=0.5.0",
|
"webvtt-py>=0.5.0",
|
||||||
"icalendar>=6.0.0",
|
"icalendar>=6.0.0",
|
||||||
"conductor-python>=1.2.3",
|
"conductor-python>=1.2.3",
|
||||||
|
"hatchet-sdk>=0.47.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
@@ -135,5 +136,10 @@ select = [
|
|||||||
"reflector/processors/summary/summary_builder.py" = ["E501"]
|
"reflector/processors/summary/summary_builder.py" = ["E501"]
|
||||||
"gpu/modal_deployments/**.py" = ["PLC0415"]
|
"gpu/modal_deployments/**.py" = ["PLC0415"]
|
||||||
"reflector/tools/**.py" = ["PLC0415"]
|
"reflector/tools/**.py" = ["PLC0415"]
|
||||||
|
"reflector/hatchet/run_workers.py" = ["PLC0415"]
|
||||||
|
"reflector/hatchet/workflows/**.py" = ["PLC0415"]
|
||||||
|
"reflector/conductor/run_workers.py" = ["PLC0415"]
|
||||||
|
"reflector/conductor/workers/**.py" = ["PLC0415"]
|
||||||
|
"reflector/views/hatchet.py" = ["PLC0415"]
|
||||||
"migrations/versions/**.py" = ["PLC0415"]
|
"migrations/versions/**.py" = ["PLC0415"]
|
||||||
"tests/**.py" = ["PLC0415"]
|
"tests/**.py" = ["PLC0415"]
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from reflector.metrics import metrics_init
|
|||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.views.conductor import router as conductor_router
|
from reflector.views.conductor import router as conductor_router
|
||||||
from reflector.views.daily import router as daily_router
|
from reflector.views.daily import router as daily_router
|
||||||
|
from reflector.views.hatchet import router as hatchet_router
|
||||||
from reflector.views.meetings import router as meetings_router
|
from reflector.views.meetings import router as meetings_router
|
||||||
from reflector.views.rooms import router as rooms_router
|
from reflector.views.rooms import router as rooms_router
|
||||||
from reflector.views.rtc_offer import router as rtc_offer_router
|
from reflector.views.rtc_offer import router as rtc_offer_router
|
||||||
@@ -100,6 +101,7 @@ app.include_router(zulip_router, prefix="/v1")
|
|||||||
app.include_router(whereby_router, prefix="/v1")
|
app.include_router(whereby_router, prefix="/v1")
|
||||||
app.include_router(daily_router, prefix="/v1/daily")
|
app.include_router(daily_router, prefix="/v1/daily")
|
||||||
app.include_router(conductor_router, prefix="/v1")
|
app.include_router(conductor_router, prefix="/v1")
|
||||||
|
app.include_router(hatchet_router, prefix="/v1")
|
||||||
add_pagination(app)
|
add_pagination(app)
|
||||||
|
|
||||||
# prepare celery
|
# prepare celery
|
||||||
|
|||||||
6
server/reflector/hatchet/__init__.py
Normal file
6
server/reflector/hatchet/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
"""Hatchet workflow orchestration for Reflector."""
|
||||||
|
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
from reflector.hatchet.progress import emit_progress, emit_progress_async
|
||||||
|
|
||||||
|
__all__ = ["HatchetClientManager", "emit_progress", "emit_progress_async"]
|
||||||
48
server/reflector/hatchet/client.py
Normal file
48
server/reflector/hatchet/client.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""Hatchet Python client wrapper."""
|
||||||
|
|
||||||
|
from hatchet_sdk import Hatchet
|
||||||
|
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
|
||||||
|
class HatchetClientManager:
|
||||||
|
"""Singleton manager for Hatchet client connections."""
|
||||||
|
|
||||||
|
_instance: Hatchet | None = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_client(cls) -> Hatchet:
|
||||||
|
"""Get or create the Hatchet client."""
|
||||||
|
if cls._instance is None:
|
||||||
|
if not settings.HATCHET_CLIENT_TOKEN:
|
||||||
|
raise ValueError("HATCHET_CLIENT_TOKEN must be set")
|
||||||
|
|
||||||
|
cls._instance = Hatchet(
|
||||||
|
debug=settings.HATCHET_DEBUG,
|
||||||
|
)
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def start_workflow(
|
||||||
|
cls, workflow_name: str, input_data: dict, key: str | None = None
|
||||||
|
) -> str:
|
||||||
|
"""Start a workflow and return the workflow run ID."""
|
||||||
|
client = cls.get_client()
|
||||||
|
result = await client.runs.aio_create(
|
||||||
|
workflow_name,
|
||||||
|
input_data,
|
||||||
|
)
|
||||||
|
# SDK v1.21+ returns V1WorkflowRunDetails with run.metadata.id
|
||||||
|
return result.run.metadata.id
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def get_workflow_status(cls, workflow_run_id: str) -> dict:
|
||||||
|
"""Get the current status of a workflow run."""
|
||||||
|
client = cls.get_client()
|
||||||
|
run = await client.runs.aio_get(workflow_run_id)
|
||||||
|
return run.to_dict()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def reset(cls) -> None:
|
||||||
|
"""Reset the client instance (for testing)."""
|
||||||
|
cls._instance = None
|
||||||
120
server/reflector/hatchet/progress.py
Normal file
120
server/reflector/hatchet/progress.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
"""Progress event emission for Hatchet workers."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from reflector.db.transcripts import PipelineProgressData
|
||||||
|
from reflector.logger import logger
|
||||||
|
from reflector.ws_manager import get_ws_manager
|
||||||
|
|
||||||
|
# Step mapping for progress tracking (matches Conductor pipeline)
|
||||||
|
PIPELINE_STEPS = {
|
||||||
|
"get_recording": 1,
|
||||||
|
"get_participants": 2,
|
||||||
|
"pad_track": 3, # Fork tasks share same step
|
||||||
|
"mixdown_tracks": 4,
|
||||||
|
"generate_waveform": 5,
|
||||||
|
"transcribe_track": 6, # Fork tasks share same step
|
||||||
|
"merge_transcripts": 7,
|
||||||
|
"detect_topics": 8,
|
||||||
|
"generate_title": 9, # Fork tasks share same step
|
||||||
|
"generate_summary": 9, # Fork tasks share same step
|
||||||
|
"finalize": 10,
|
||||||
|
"cleanup_consent": 11,
|
||||||
|
"post_zulip": 12,
|
||||||
|
"send_webhook": 13,
|
||||||
|
}
|
||||||
|
|
||||||
|
TOTAL_STEPS = 13
|
||||||
|
|
||||||
|
|
||||||
|
async def _emit_progress_async(
|
||||||
|
transcript_id: str,
|
||||||
|
step: str,
|
||||||
|
status: Literal["pending", "in_progress", "completed", "failed"],
|
||||||
|
workflow_id: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Async implementation of progress emission."""
|
||||||
|
ws_manager = get_ws_manager()
|
||||||
|
step_index = PIPELINE_STEPS.get(step, 0)
|
||||||
|
|
||||||
|
data = PipelineProgressData(
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
current_step=step,
|
||||||
|
step_index=step_index,
|
||||||
|
total_steps=TOTAL_STEPS,
|
||||||
|
step_status=status,
|
||||||
|
)
|
||||||
|
|
||||||
|
await ws_manager.send_json(
|
||||||
|
room_id=f"ts:{transcript_id}",
|
||||||
|
message={
|
||||||
|
"event": "PIPELINE_PROGRESS",
|
||||||
|
"data": data.model_dump(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"[Hatchet Progress] Emitted",
|
||||||
|
transcript_id=transcript_id,
|
||||||
|
step=step,
|
||||||
|
status=status,
|
||||||
|
step_index=step_index,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def emit_progress(
|
||||||
|
transcript_id: str,
|
||||||
|
step: str,
|
||||||
|
status: Literal["pending", "in_progress", "completed", "failed"],
|
||||||
|
workflow_id: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Emit a pipeline progress event (sync wrapper for Hatchet workers).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
transcript_id: The transcript ID to emit progress for
|
||||||
|
step: The current step name (e.g., "transcribe_track")
|
||||||
|
status: The step status
|
||||||
|
workflow_id: Optional workflow run ID
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get or create event loop for sync context
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
except RuntimeError:
|
||||||
|
loop = None
|
||||||
|
|
||||||
|
if loop is not None and loop.is_running():
|
||||||
|
# Already in async context, schedule the coroutine
|
||||||
|
asyncio.create_task(
|
||||||
|
_emit_progress_async(transcript_id, step, status, workflow_id)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Not in async context, run synchronously
|
||||||
|
asyncio.run(_emit_progress_async(transcript_id, step, status, workflow_id))
|
||||||
|
except Exception as e:
|
||||||
|
# Progress emission should never break the pipeline
|
||||||
|
logger.warning(
|
||||||
|
"[Hatchet Progress] Failed to emit progress event",
|
||||||
|
error=str(e),
|
||||||
|
transcript_id=transcript_id,
|
||||||
|
step=step,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def emit_progress_async(
|
||||||
|
transcript_id: str,
|
||||||
|
step: str,
|
||||||
|
status: Literal["pending", "in_progress", "completed", "failed"],
|
||||||
|
workflow_id: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Async version of emit_progress for use in async Hatchet tasks."""
|
||||||
|
try:
|
||||||
|
await _emit_progress_async(transcript_id, step, status, workflow_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"[Hatchet Progress] Failed to emit progress event",
|
||||||
|
error=str(e),
|
||||||
|
transcript_id=transcript_id,
|
||||||
|
step=step,
|
||||||
|
)
|
||||||
59
server/reflector/hatchet/run_workers.py
Normal file
59
server/reflector/hatchet/run_workers.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
"""
|
||||||
|
Run Hatchet workers for the diarization pipeline.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
uv run -m reflector.hatchet.run_workers
|
||||||
|
|
||||||
|
# Or via docker:
|
||||||
|
docker compose exec server uv run -m reflector.hatchet.run_workers
|
||||||
|
"""
|
||||||
|
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from reflector.logger import logger
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Start Hatchet worker polling."""
|
||||||
|
if not settings.HATCHET_ENABLED:
|
||||||
|
logger.error("HATCHET_ENABLED is False, not starting workers")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not settings.HATCHET_CLIENT_TOKEN:
|
||||||
|
logger.error("HATCHET_CLIENT_TOKEN is not set")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Starting Hatchet workers",
|
||||||
|
debug=settings.HATCHET_DEBUG,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Import workflows to register them
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
from reflector.hatchet.workflows import diarization_pipeline, track_workflow
|
||||||
|
|
||||||
|
hatchet = HatchetClientManager.get_client()
|
||||||
|
|
||||||
|
# Create worker with both workflows
|
||||||
|
worker = hatchet.worker(
|
||||||
|
"reflector-diarization-worker",
|
||||||
|
workflows=[diarization_pipeline, track_workflow],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle graceful shutdown
|
||||||
|
def shutdown_handler(signum: int, frame) -> None:
|
||||||
|
logger.info("Received shutdown signal, stopping workers...")
|
||||||
|
# Worker cleanup happens automatically on exit
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, shutdown_handler)
|
||||||
|
signal.signal(signal.SIGTERM, shutdown_handler)
|
||||||
|
|
||||||
|
logger.info("Starting Hatchet worker polling...")
|
||||||
|
worker.start()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
14
server/reflector/hatchet/workflows/__init__.py
Normal file
14
server/reflector/hatchet/workflows/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
"""Hatchet workflow definitions."""
|
||||||
|
|
||||||
|
from reflector.hatchet.workflows.diarization_pipeline import (
|
||||||
|
PipelineInput,
|
||||||
|
diarization_pipeline,
|
||||||
|
)
|
||||||
|
from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"diarization_pipeline",
|
||||||
|
"track_workflow",
|
||||||
|
"PipelineInput",
|
||||||
|
"TrackInput",
|
||||||
|
]
|
||||||
808
server/reflector/hatchet/workflows/diarization_pipeline.py
Normal file
808
server/reflector/hatchet/workflows/diarization_pipeline.py
Normal file
@@ -0,0 +1,808 @@
|
|||||||
|
"""
|
||||||
|
Hatchet main workflow: DiarizationPipeline
|
||||||
|
|
||||||
|
Multitrack diarization pipeline for Daily.co recordings.
|
||||||
|
Orchestrates the full processing flow from recording metadata to final transcript.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import tempfile
|
||||||
|
from datetime import timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import av
|
||||||
|
from hatchet_sdk import Context
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
from reflector.hatchet.progress import emit_progress_async
|
||||||
|
from reflector.hatchet.workflows.track_processing import TrackInput, track_workflow
|
||||||
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
# Audio constants
|
||||||
|
OPUS_STANDARD_SAMPLE_RATE = 48000
|
||||||
|
OPUS_DEFAULT_BIT_RATE = 64000
|
||||||
|
PRESIGNED_URL_EXPIRATION_SECONDS = 7200
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineInput(BaseModel):
|
||||||
|
"""Input to trigger the diarization pipeline."""
|
||||||
|
|
||||||
|
recording_id: str | None
|
||||||
|
room_name: str | None
|
||||||
|
tracks: list[dict] # List of {"s3_key": str}
|
||||||
|
bucket_name: str
|
||||||
|
transcript_id: str
|
||||||
|
room_id: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
# Get hatchet client and define workflow
|
||||||
|
hatchet = HatchetClientManager.get_client()
|
||||||
|
|
||||||
|
diarization_pipeline = hatchet.workflow(
|
||||||
|
name="DiarizationPipeline", input_validator=PipelineInput
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Helper Functions
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_fresh_db_connection():
|
||||||
|
"""Create fresh database connection for subprocess."""
|
||||||
|
import databases
|
||||||
|
|
||||||
|
from reflector.db import _database_context
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
_database_context.set(None)
|
||||||
|
db = databases.Database(settings.DATABASE_URL)
|
||||||
|
_database_context.set(db)
|
||||||
|
await db.connect()
|
||||||
|
return db
|
||||||
|
|
||||||
|
|
||||||
|
async def _close_db_connection(db):
|
||||||
|
"""Close database connection."""
|
||||||
|
from reflector.db import _database_context
|
||||||
|
|
||||||
|
await db.disconnect()
|
||||||
|
_database_context.set(None)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_storage():
|
||||||
|
"""Create fresh storage instance."""
|
||||||
|
from reflector.settings import settings
|
||||||
|
from reflector.storage.storage_aws import AwsStorage
|
||||||
|
|
||||||
|
return AwsStorage(
|
||||||
|
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||||
|
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||||
|
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||||
|
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Pipeline Tasks
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(execution_timeout=timedelta(seconds=60), retries=3)
|
||||||
|
async def get_recording(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Fetch recording metadata from Daily.co API."""
|
||||||
|
logger.info("[Hatchet] get_recording", recording_id=input.recording_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "get_recording", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.dailyco_api.client import DailyApiClient
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
if not input.recording_id:
|
||||||
|
# No recording_id in reprocess path - return minimal data
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "get_recording", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"id": None,
|
||||||
|
"mtg_session_id": None,
|
||||||
|
"room_name": input.room_name,
|
||||||
|
"duration": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not settings.DAILY_API_KEY:
|
||||||
|
raise ValueError("DAILY_API_KEY not configured")
|
||||||
|
|
||||||
|
async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
|
||||||
|
recording = await client.get_recording(input.recording_id)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] get_recording complete",
|
||||||
|
recording_id=input.recording_id,
|
||||||
|
room_name=recording.room_name,
|
||||||
|
duration=recording.duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "get_recording", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": recording.id,
|
||||||
|
"mtg_session_id": recording.mtgSessionId,
|
||||||
|
"room_name": recording.room_name,
|
||||||
|
"duration": recording.duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] get_recording failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "get_recording", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[get_recording], execution_timeout=timedelta(seconds=60), retries=3
|
||||||
|
)
|
||||||
|
async def get_participants(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Fetch participant list from Daily.co API."""
|
||||||
|
logger.info("[Hatchet] get_participants", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "get_participants", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
recording_data = ctx.task_output(get_recording)
|
||||||
|
mtg_session_id = recording_data.get("mtg_session_id")
|
||||||
|
|
||||||
|
from reflector.dailyco_api.client import DailyApiClient
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
if not mtg_session_id or not settings.DAILY_API_KEY:
|
||||||
|
# Return empty participants if no session ID
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id,
|
||||||
|
"get_participants",
|
||||||
|
"completed",
|
||||||
|
ctx.workflow_run_id,
|
||||||
|
)
|
||||||
|
return {"participants": [], "num_tracks": len(input.tracks)}
|
||||||
|
|
||||||
|
async with DailyApiClient(api_key=settings.DAILY_API_KEY) as client:
|
||||||
|
participants = await client.get_meeting_participants(mtg_session_id)
|
||||||
|
|
||||||
|
participants_list = [
|
||||||
|
{"participant_id": p.participant_id, "user_name": p.user_name}
|
||||||
|
for p in participants.data
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] get_participants complete",
|
||||||
|
participant_count=len(participants_list),
|
||||||
|
)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "get_participants", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"participants": participants_list, "num_tracks": len(input.tracks)}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] get_participants failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "get_participants", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[get_participants], execution_timeout=timedelta(seconds=600), retries=3
|
||||||
|
)
|
||||||
|
async def process_tracks(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Spawn child workflows for each track (dynamic fan-out).
|
||||||
|
|
||||||
|
Processes pad_track and transcribe_track for each audio track in parallel.
|
||||||
|
"""
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] process_tracks",
|
||||||
|
num_tracks=len(input.tracks),
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Spawn child workflows for each track
|
||||||
|
child_coroutines = [
|
||||||
|
track_workflow.aio_run(
|
||||||
|
TrackInput(
|
||||||
|
track_index=i,
|
||||||
|
s3_key=track["s3_key"],
|
||||||
|
bucket_name=input.bucket_name,
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for i, track in enumerate(input.tracks)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Wait for all child workflows to complete
|
||||||
|
results = await asyncio.gather(*child_coroutines)
|
||||||
|
|
||||||
|
# Collect all track results
|
||||||
|
all_words = []
|
||||||
|
padded_urls = []
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
transcribe_result = result.get("transcribe_track", {})
|
||||||
|
all_words.extend(transcribe_result.get("words", []))
|
||||||
|
|
||||||
|
pad_result = result.get("pad_track", {})
|
||||||
|
padded_urls.append(pad_result.get("padded_url"))
|
||||||
|
|
||||||
|
# Sort words by start time
|
||||||
|
all_words.sort(key=lambda w: w.get("start", 0))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] process_tracks complete",
|
||||||
|
num_tracks=len(input.tracks),
|
||||||
|
total_words=len(all_words),
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"all_words": all_words,
|
||||||
|
"padded_urls": padded_urls,
|
||||||
|
"word_count": len(all_words),
|
||||||
|
"num_tracks": len(input.tracks),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[process_tracks], execution_timeout=timedelta(seconds=300), retries=3
|
||||||
|
)
|
||||||
|
async def mixdown_tracks(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Mix all padded tracks into single audio file."""
|
||||||
|
logger.info("[Hatchet] mixdown_tracks", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "mixdown_tracks", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
track_data = ctx.task_output(process_tracks)
|
||||||
|
padded_urls = track_data.get("padded_urls", [])
|
||||||
|
|
||||||
|
if not padded_urls:
|
||||||
|
raise ValueError("No padded tracks to mixdown")
|
||||||
|
|
||||||
|
storage = _get_storage()
|
||||||
|
|
||||||
|
# Download all tracks and mix
|
||||||
|
temp_inputs = []
|
||||||
|
try:
|
||||||
|
for i, url in enumerate(padded_urls):
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
temp_input = tempfile.NamedTemporaryFile(suffix=".webm", delete=False)
|
||||||
|
temp_inputs.append(temp_input.name)
|
||||||
|
|
||||||
|
# Download track
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
with open(temp_input.name, "wb") as f:
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
# Mix using PyAV amix filter
|
||||||
|
if len(temp_inputs) == 0:
|
||||||
|
raise ValueError("No valid tracks to mixdown")
|
||||||
|
|
||||||
|
output_path = tempfile.mktemp(suffix=".mp3")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use ffmpeg-style mixing via PyAV
|
||||||
|
containers = [av.open(path) for path in temp_inputs]
|
||||||
|
|
||||||
|
# Get the longest duration
|
||||||
|
max_duration = 0.0
|
||||||
|
for container in containers:
|
||||||
|
if container.duration:
|
||||||
|
duration = float(container.duration * av.time_base)
|
||||||
|
max_duration = max(max_duration, duration)
|
||||||
|
|
||||||
|
# Close containers for now
|
||||||
|
for container in containers:
|
||||||
|
container.close()
|
||||||
|
|
||||||
|
# Use subprocess for mixing (simpler than complex PyAV graph)
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# Build ffmpeg command
|
||||||
|
cmd = ["ffmpeg", "-y"]
|
||||||
|
for path in temp_inputs:
|
||||||
|
cmd.extend(["-i", path])
|
||||||
|
|
||||||
|
# Build filter for N inputs
|
||||||
|
n = len(temp_inputs)
|
||||||
|
filter_str = f"amix=inputs={n}:duration=longest:normalize=0"
|
||||||
|
cmd.extend(["-filter_complex", filter_str])
|
||||||
|
cmd.extend(["-ac", "2", "-ar", "48000", "-b:a", "128k", output_path])
|
||||||
|
|
||||||
|
subprocess.run(cmd, check=True, capture_output=True)
|
||||||
|
|
||||||
|
# Upload mixed file
|
||||||
|
file_size = Path(output_path).stat().st_size
|
||||||
|
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/mixed.mp3"
|
||||||
|
|
||||||
|
with open(output_path, "rb") as mixed_file:
|
||||||
|
await storage.put_file(storage_path, mixed_file)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] mixdown_tracks uploaded",
|
||||||
|
key=storage_path,
|
||||||
|
size=file_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
Path(output_path).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
for path in temp_inputs:
|
||||||
|
Path(path).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "mixdown_tracks", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"audio_key": storage_path,
|
||||||
|
"duration": max_duration,
|
||||||
|
"tracks_mixed": len(temp_inputs),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] mixdown_tracks failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "mixdown_tracks", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[mixdown_tracks], execution_timeout=timedelta(seconds=120), retries=3
|
||||||
|
)
|
||||||
|
async def generate_waveform(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Generate audio waveform visualization."""
|
||||||
|
logger.info("[Hatchet] generate_waveform", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_waveform", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
mixdown_data = ctx.task_output(mixdown_tracks)
|
||||||
|
audio_key = mixdown_data.get("audio_key")
|
||||||
|
|
||||||
|
storage = _get_storage()
|
||||||
|
audio_url = await storage.get_file_url(
|
||||||
|
audio_key,
|
||||||
|
operation="get_object",
|
||||||
|
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
from reflector.pipelines.waveform_helpers import generate_waveform_data
|
||||||
|
|
||||||
|
waveform = await generate_waveform_data(audio_url)
|
||||||
|
|
||||||
|
# Store waveform
|
||||||
|
waveform_key = f"file_pipeline_hatchet/{input.transcript_id}/waveform.json"
|
||||||
|
import json
|
||||||
|
|
||||||
|
waveform_bytes = json.dumps(waveform).encode()
|
||||||
|
import io
|
||||||
|
|
||||||
|
await storage.put_file(waveform_key, io.BytesIO(waveform_bytes))
|
||||||
|
|
||||||
|
logger.info("[Hatchet] generate_waveform complete")
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_waveform", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"waveform_key": waveform_key}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] generate_waveform failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_waveform", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[mixdown_tracks], execution_timeout=timedelta(seconds=300), retries=3
|
||||||
|
)
|
||||||
|
async def detect_topics(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Detect topics using LLM."""
|
||||||
|
logger.info("[Hatchet] detect_topics", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "detect_topics", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
track_data = ctx.task_output(process_tracks)
|
||||||
|
words = track_data.get("all_words", [])
|
||||||
|
|
||||||
|
from reflector.pipelines import topic_processing
|
||||||
|
from reflector.processors.types import Transcript as TranscriptType
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
# Convert word dicts to Word objects
|
||||||
|
word_objects = [Word(**w) for w in words]
|
||||||
|
transcript = TranscriptType(words=word_objects)
|
||||||
|
|
||||||
|
empty_pipeline = topic_processing.EmptyPipeline(logger=logger)
|
||||||
|
|
||||||
|
async def noop_callback(t):
|
||||||
|
pass
|
||||||
|
|
||||||
|
topics = await topic_processing.detect_topics(
|
||||||
|
transcript,
|
||||||
|
"en", # target_language
|
||||||
|
on_topic_callback=noop_callback,
|
||||||
|
empty_pipeline=empty_pipeline,
|
||||||
|
)
|
||||||
|
|
||||||
|
topics_list = [t.model_dump() for t in topics]
|
||||||
|
|
||||||
|
logger.info("[Hatchet] detect_topics complete", topic_count=len(topics_list))
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "detect_topics", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"topics": topics_list}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] detect_topics failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "detect_topics", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[detect_topics], execution_timeout=timedelta(seconds=120), retries=3
|
||||||
|
)
|
||||||
|
async def generate_title(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Generate meeting title using LLM."""
|
||||||
|
logger.info("[Hatchet] generate_title", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_title", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
topics_data = ctx.task_output(detect_topics)
|
||||||
|
topics = topics_data.get("topics", [])
|
||||||
|
|
||||||
|
from reflector.pipelines import topic_processing
|
||||||
|
from reflector.processors.types import Topic
|
||||||
|
|
||||||
|
topic_objects = [Topic(**t) for t in topics]
|
||||||
|
|
||||||
|
title = await topic_processing.generate_title(topic_objects)
|
||||||
|
|
||||||
|
logger.info("[Hatchet] generate_title complete", title=title)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_title", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"title": title}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] generate_title failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_title", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[detect_topics], execution_timeout=timedelta(seconds=300), retries=3
|
||||||
|
)
|
||||||
|
async def generate_summary(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Generate meeting summary using LLM."""
|
||||||
|
logger.info("[Hatchet] generate_summary", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_summary", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
track_data = ctx.task_output(process_tracks)
|
||||||
|
topics_data = ctx.task_output(detect_topics)
|
||||||
|
|
||||||
|
words = track_data.get("all_words", [])
|
||||||
|
topics = topics_data.get("topics", [])
|
||||||
|
|
||||||
|
from reflector.pipelines import topic_processing
|
||||||
|
from reflector.processors.types import Topic, Word
|
||||||
|
from reflector.processors.types import Transcript as TranscriptType
|
||||||
|
|
||||||
|
word_objects = [Word(**w) for w in words]
|
||||||
|
transcript = TranscriptType(words=word_objects)
|
||||||
|
topic_objects = [Topic(**t) for t in topics]
|
||||||
|
|
||||||
|
summary, short_summary = await topic_processing.generate_summary(
|
||||||
|
transcript, topic_objects
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("[Hatchet] generate_summary complete")
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_summary", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"summary": summary, "short_summary": short_summary}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] generate_summary failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "generate_summary", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[generate_waveform, generate_title, generate_summary],
|
||||||
|
execution_timeout=timedelta(seconds=60),
|
||||||
|
retries=3,
|
||||||
|
)
|
||||||
|
async def finalize(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Finalize transcript status and update database."""
|
||||||
|
logger.info("[Hatchet] finalize", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "finalize", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
title_data = ctx.task_output(generate_title)
|
||||||
|
summary_data = ctx.task_output(generate_summary)
|
||||||
|
mixdown_data = ctx.task_output(mixdown_tracks)
|
||||||
|
track_data = ctx.task_output(process_tracks)
|
||||||
|
|
||||||
|
title = title_data.get("title", "")
|
||||||
|
summary = summary_data.get("summary", "")
|
||||||
|
short_summary = summary_data.get("short_summary", "")
|
||||||
|
duration = mixdown_data.get("duration", 0)
|
||||||
|
all_words = track_data.get("all_words", [])
|
||||||
|
|
||||||
|
db = await _get_fresh_db_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.db.transcripts import transcripts_controller
|
||||||
|
from reflector.processors.types import Word
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||||
|
if transcript is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Transcript {input.transcript_id} not found in database"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert words back to Word objects for storage
|
||||||
|
word_objects = [Word(**w) for w in all_words]
|
||||||
|
|
||||||
|
await transcripts_controller.update(
|
||||||
|
transcript,
|
||||||
|
{
|
||||||
|
"status": "ended",
|
||||||
|
"title": title,
|
||||||
|
"long_summary": summary,
|
||||||
|
"short_summary": short_summary,
|
||||||
|
"duration": duration,
|
||||||
|
"words": word_objects,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] finalize complete", transcript_id=input.transcript_id
|
||||||
|
)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await _close_db_connection(db)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "finalize", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"status": "COMPLETED"}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] finalize failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "finalize", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[finalize], execution_timeout=timedelta(seconds=60), retries=3
|
||||||
|
)
|
||||||
|
async def cleanup_consent(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Check and handle consent requirements."""
|
||||||
|
logger.info("[Hatchet] cleanup_consent", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "cleanup_consent", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = await _get_fresh_db_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.db.meetings import meetings_controller
|
||||||
|
from reflector.db.transcripts import transcripts_controller
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||||
|
if transcript and transcript.meeting_id:
|
||||||
|
meeting = await meetings_controller.get_by_id(transcript.meeting_id)
|
||||||
|
if meeting:
|
||||||
|
# Check consent logic here
|
||||||
|
# For now just mark as checked
|
||||||
|
pass
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] cleanup_consent complete", transcript_id=input.transcript_id
|
||||||
|
)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await _close_db_connection(db)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "cleanup_consent", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"consent_checked": True}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] cleanup_consent failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "cleanup_consent", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[cleanup_consent], execution_timeout=timedelta(seconds=60), retries=5
|
||||||
|
)
|
||||||
|
async def post_zulip(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Post notification to Zulip."""
|
||||||
|
logger.info("[Hatchet] post_zulip", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "post_zulip", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
if not settings.ZULIP_REALM:
|
||||||
|
logger.info("[Hatchet] post_zulip skipped (Zulip not configured)")
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "post_zulip", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
return {"zulip_message_id": None, "skipped": True}
|
||||||
|
|
||||||
|
from reflector.zulip import post_transcript_notification
|
||||||
|
|
||||||
|
db = await _get_fresh_db_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.db.transcripts import transcripts_controller
|
||||||
|
|
||||||
|
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||||
|
if transcript:
|
||||||
|
message_id = await post_transcript_notification(transcript)
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] post_zulip complete", zulip_message_id=message_id
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
message_id = None
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await _close_db_connection(db)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "post_zulip", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"zulip_message_id": message_id}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] post_zulip failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "post_zulip", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@diarization_pipeline.task(
|
||||||
|
parents=[post_zulip], execution_timeout=timedelta(seconds=120), retries=30
|
||||||
|
)
|
||||||
|
async def send_webhook(input: PipelineInput, ctx: Context) -> dict:
|
||||||
|
"""Send completion webhook to external service."""
|
||||||
|
logger.info("[Hatchet] send_webhook", transcript_id=input.transcript_id)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "send_webhook", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not input.room_id:
|
||||||
|
logger.info("[Hatchet] send_webhook skipped (no room_id)")
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "send_webhook", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
return {"webhook_sent": False, "skipped": True}
|
||||||
|
|
||||||
|
db = await _get_fresh_db_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.db.rooms import rooms_controller
|
||||||
|
from reflector.db.transcripts import transcripts_controller
|
||||||
|
|
||||||
|
room = await rooms_controller.get_by_id(input.room_id)
|
||||||
|
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||||
|
|
||||||
|
if room and room.webhook_url and transcript:
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
webhook_payload = {
|
||||||
|
"event": "transcript.completed",
|
||||||
|
"transcript_id": input.transcript_id,
|
||||||
|
"title": transcript.title,
|
||||||
|
"duration": transcript.duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.post(
|
||||||
|
room.webhook_url, json=webhook_payload, timeout=30
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] send_webhook complete", status_code=response.status_code
|
||||||
|
)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id,
|
||||||
|
"send_webhook",
|
||||||
|
"completed",
|
||||||
|
ctx.workflow_run_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"webhook_sent": True, "response_code": response.status_code}
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await _close_db_connection(db)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "send_webhook", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"webhook_sent": False, "skipped": True}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] send_webhook failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "send_webhook", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
337
server/reflector/hatchet/workflows/track_processing.py
Normal file
337
server/reflector/hatchet/workflows/track_processing.py
Normal file
@@ -0,0 +1,337 @@
|
|||||||
|
"""
|
||||||
|
Hatchet child workflow: TrackProcessing
|
||||||
|
|
||||||
|
Handles individual audio track processing: padding and transcription.
|
||||||
|
Spawned dynamically by the main diarization pipeline for each track.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import tempfile
|
||||||
|
from datetime import timedelta
|
||||||
|
from fractions import Fraction
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import av
|
||||||
|
from av.audio.resampler import AudioResampler
|
||||||
|
from hatchet_sdk import Context
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
from reflector.hatchet.progress import emit_progress_async
|
||||||
|
from reflector.logger import logger
|
||||||
|
|
||||||
|
# Audio constants matching existing pipeline
|
||||||
|
OPUS_STANDARD_SAMPLE_RATE = 48000
|
||||||
|
OPUS_DEFAULT_BIT_RATE = 64000
|
||||||
|
PRESIGNED_URL_EXPIRATION_SECONDS = 7200
|
||||||
|
|
||||||
|
|
||||||
|
class TrackInput(BaseModel):
|
||||||
|
"""Input for individual track processing."""
|
||||||
|
|
||||||
|
track_index: int
|
||||||
|
s3_key: str
|
||||||
|
bucket_name: str
|
||||||
|
transcript_id: str
|
||||||
|
language: str = "en"
|
||||||
|
|
||||||
|
|
||||||
|
# Get hatchet client and define workflow
|
||||||
|
hatchet = HatchetClientManager.get_client()
|
||||||
|
|
||||||
|
track_workflow = hatchet.workflow(name="TrackProcessing", input_validator=TrackInput)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_stream_start_time_from_container(container, track_idx: int) -> float:
|
||||||
|
"""Extract meeting-relative start time from WebM stream metadata.
|
||||||
|
|
||||||
|
Uses PyAV to read stream.start_time from WebM container.
|
||||||
|
More accurate than filename timestamps by ~209ms due to network/encoding delays.
|
||||||
|
"""
|
||||||
|
start_time_seconds = 0.0
|
||||||
|
try:
|
||||||
|
audio_streams = [s for s in container.streams if s.type == "audio"]
|
||||||
|
stream = audio_streams[0] if audio_streams else container.streams[0]
|
||||||
|
|
||||||
|
# 1) Try stream-level start_time (most reliable for Daily.co tracks)
|
||||||
|
if stream.start_time is not None and stream.time_base is not None:
|
||||||
|
start_time_seconds = float(stream.start_time * stream.time_base)
|
||||||
|
|
||||||
|
# 2) Fallback to container-level start_time
|
||||||
|
if (start_time_seconds <= 0) and (container.start_time is not None):
|
||||||
|
start_time_seconds = float(container.start_time * av.time_base)
|
||||||
|
|
||||||
|
# 3) Fallback to first packet DTS
|
||||||
|
if start_time_seconds <= 0:
|
||||||
|
for packet in container.demux(stream):
|
||||||
|
if packet.dts is not None:
|
||||||
|
start_time_seconds = float(packet.dts * stream.time_base)
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"PyAV metadata read failed; assuming 0 start_time",
|
||||||
|
track_idx=track_idx,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
start_time_seconds = 0.0
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Track {track_idx} stream metadata: start_time={start_time_seconds:.3f}s",
|
||||||
|
track_idx=track_idx,
|
||||||
|
)
|
||||||
|
return start_time_seconds
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_audio_padding_to_file(
|
||||||
|
in_container,
|
||||||
|
output_path: str,
|
||||||
|
start_time_seconds: float,
|
||||||
|
track_idx: int,
|
||||||
|
) -> None:
|
||||||
|
"""Apply silence padding to audio track using PyAV filter graph."""
|
||||||
|
delay_ms = math.floor(start_time_seconds * 1000)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Padding track {track_idx} with {delay_ms}ms delay using PyAV",
|
||||||
|
track_idx=track_idx,
|
||||||
|
delay_ms=delay_ms,
|
||||||
|
)
|
||||||
|
|
||||||
|
with av.open(output_path, "w", format="webm") as out_container:
|
||||||
|
in_stream = next((s for s in in_container.streams if s.type == "audio"), None)
|
||||||
|
if in_stream is None:
|
||||||
|
raise Exception("No audio stream in input")
|
||||||
|
|
||||||
|
out_stream = out_container.add_stream("libopus", rate=OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
out_stream.bit_rate = OPUS_DEFAULT_BIT_RATE
|
||||||
|
graph = av.filter.Graph()
|
||||||
|
|
||||||
|
abuf_args = (
|
||||||
|
f"time_base=1/{OPUS_STANDARD_SAMPLE_RATE}:"
|
||||||
|
f"sample_rate={OPUS_STANDARD_SAMPLE_RATE}:"
|
||||||
|
f"sample_fmt=s16:"
|
||||||
|
f"channel_layout=stereo"
|
||||||
|
)
|
||||||
|
src = graph.add("abuffer", args=abuf_args, name="src")
|
||||||
|
aresample_f = graph.add("aresample", args="async=1", name="ares")
|
||||||
|
delays_arg = f"{delay_ms}|{delay_ms}"
|
||||||
|
adelay_f = graph.add("adelay", args=f"delays={delays_arg}:all=1", name="delay")
|
||||||
|
sink = graph.add("abuffersink", name="sink")
|
||||||
|
|
||||||
|
src.link_to(aresample_f)
|
||||||
|
aresample_f.link_to(adelay_f)
|
||||||
|
adelay_f.link_to(sink)
|
||||||
|
graph.configure()
|
||||||
|
|
||||||
|
resampler = AudioResampler(
|
||||||
|
format="s16", layout="stereo", rate=OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
)
|
||||||
|
|
||||||
|
for frame in in_container.decode(in_stream):
|
||||||
|
out_frames = resampler.resample(frame) or []
|
||||||
|
for rframe in out_frames:
|
||||||
|
rframe.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
rframe.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
src.push(rframe)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
f_out = sink.pull()
|
||||||
|
except Exception:
|
||||||
|
break
|
||||||
|
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
for packet in out_stream.encode(f_out):
|
||||||
|
out_container.mux(packet)
|
||||||
|
|
||||||
|
# Flush remaining frames
|
||||||
|
src.push(None)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
f_out = sink.pull()
|
||||||
|
except Exception:
|
||||||
|
break
|
||||||
|
f_out.sample_rate = OPUS_STANDARD_SAMPLE_RATE
|
||||||
|
f_out.time_base = Fraction(1, OPUS_STANDARD_SAMPLE_RATE)
|
||||||
|
for packet in out_stream.encode(f_out):
|
||||||
|
out_container.mux(packet)
|
||||||
|
|
||||||
|
for packet in out_stream.encode(None):
|
||||||
|
out_container.mux(packet)
|
||||||
|
|
||||||
|
|
||||||
|
@track_workflow.task(execution_timeout=timedelta(seconds=300), retries=3)
|
||||||
|
async def pad_track(input: TrackInput, ctx: Context) -> dict:
|
||||||
|
"""Pad single audio track with silence for alignment.
|
||||||
|
|
||||||
|
Extracts stream.start_time from WebM container metadata and applies
|
||||||
|
silence padding using PyAV filter graph (adelay).
|
||||||
|
"""
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] pad_track",
|
||||||
|
track_index=input.track_index,
|
||||||
|
s3_key=input.s3_key,
|
||||||
|
transcript_id=input.transcript_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "pad_track", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create fresh storage instance to avoid aioboto3 fork issues
|
||||||
|
from reflector.settings import settings
|
||||||
|
from reflector.storage.storage_aws import AwsStorage
|
||||||
|
|
||||||
|
storage = AwsStorage(
|
||||||
|
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||||
|
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||||
|
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||||
|
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get presigned URL for source file
|
||||||
|
source_url = await storage.get_file_url(
|
||||||
|
input.s3_key,
|
||||||
|
operation="get_object",
|
||||||
|
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||||
|
bucket=input.bucket_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Open container and extract start time
|
||||||
|
with av.open(source_url) as in_container:
|
||||||
|
start_time_seconds = _extract_stream_start_time_from_container(
|
||||||
|
in_container, input.track_index
|
||||||
|
)
|
||||||
|
|
||||||
|
# If no padding needed, return original URL
|
||||||
|
if start_time_seconds <= 0:
|
||||||
|
logger.info(
|
||||||
|
f"Track {input.track_index} requires no padding",
|
||||||
|
track_index=input.track_index,
|
||||||
|
)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"padded_url": source_url,
|
||||||
|
"size": 0,
|
||||||
|
"track_index": input.track_index,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create temp file for padded output
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_file:
|
||||||
|
temp_path = temp_file.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
_apply_audio_padding_to_file(
|
||||||
|
in_container, temp_path, start_time_seconds, input.track_index
|
||||||
|
)
|
||||||
|
|
||||||
|
file_size = Path(temp_path).stat().st_size
|
||||||
|
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"About to upload padded track",
|
||||||
|
key=storage_path,
|
||||||
|
size=file_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(temp_path, "rb") as padded_file:
|
||||||
|
await storage.put_file(storage_path, padded_file)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Uploaded padded track to S3",
|
||||||
|
key=storage_path,
|
||||||
|
size=file_size,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
Path(temp_path).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
# Get presigned URL for padded file
|
||||||
|
padded_url = await storage.get_file_url(
|
||||||
|
storage_path,
|
||||||
|
operation="get_object",
|
||||||
|
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] pad_track complete",
|
||||||
|
track_index=input.track_index,
|
||||||
|
padded_url=padded_url[:50] + "...",
|
||||||
|
)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "pad_track", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"padded_url": padded_url,
|
||||||
|
"size": file_size,
|
||||||
|
"track_index": input.track_index,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] pad_track failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "pad_track", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@track_workflow.task(
|
||||||
|
parents=[pad_track], execution_timeout=timedelta(seconds=600), retries=3
|
||||||
|
)
|
||||||
|
async def transcribe_track(input: TrackInput, ctx: Context) -> dict:
|
||||||
|
"""Transcribe audio track using GPU (Modal.com) or local Whisper."""
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] transcribe_track",
|
||||||
|
track_index=input.track_index,
|
||||||
|
language=input.language,
|
||||||
|
)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "transcribe_track", "in_progress", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
pad_result = ctx.task_output(pad_track)
|
||||||
|
audio_url = pad_result.get("padded_url")
|
||||||
|
|
||||||
|
if not audio_url:
|
||||||
|
raise ValueError("Missing padded_url from pad_track")
|
||||||
|
|
||||||
|
from reflector.pipelines.transcription_helpers import (
|
||||||
|
transcribe_file_with_processor,
|
||||||
|
)
|
||||||
|
|
||||||
|
transcript = await transcribe_file_with_processor(audio_url, input.language)
|
||||||
|
|
||||||
|
# Tag all words with speaker index
|
||||||
|
words = []
|
||||||
|
for word in transcript.words:
|
||||||
|
word_dict = word.model_dump()
|
||||||
|
word_dict["speaker"] = input.track_index
|
||||||
|
words.append(word_dict)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[Hatchet] transcribe_track complete",
|
||||||
|
track_index=input.track_index,
|
||||||
|
word_count=len(words),
|
||||||
|
)
|
||||||
|
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "transcribe_track", "completed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"words": words,
|
||||||
|
"track_index": input.track_index,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("[Hatchet] transcribe_track failed", error=str(e), exc_info=True)
|
||||||
|
await emit_progress_async(
|
||||||
|
input.transcript_id, "transcribe_track", "failed", ctx.workflow_run_id
|
||||||
|
)
|
||||||
|
raise
|
||||||
@@ -15,6 +15,7 @@ from celery.result import AsyncResult
|
|||||||
from reflector.conductor.client import ConductorClientManager
|
from reflector.conductor.client import ConductorClientManager
|
||||||
from reflector.db.recordings import recordings_controller
|
from reflector.db.recordings import recordings_controller
|
||||||
from reflector.db.transcripts import Transcript
|
from reflector.db.transcripts import Transcript
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
from reflector.pipelines.main_file_pipeline import task_pipeline_file_process
|
||||||
from reflector.pipelines.main_multitrack_pipeline import (
|
from reflector.pipelines.main_multitrack_pipeline import (
|
||||||
@@ -156,8 +157,47 @@ async def prepare_transcript_processing(
|
|||||||
|
|
||||||
def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult | None:
|
def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult | None:
|
||||||
if isinstance(config, MultitrackProcessingConfig):
|
if isinstance(config, MultitrackProcessingConfig):
|
||||||
# Start Conductor workflow if enabled
|
# Start durable workflow if enabled (Hatchet or Conductor)
|
||||||
if settings.CONDUCTOR_ENABLED:
|
durable_started = False
|
||||||
|
|
||||||
|
if settings.HATCHET_ENABLED:
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
async def _start_hatchet():
|
||||||
|
return await HatchetClientManager.start_workflow(
|
||||||
|
workflow_name="DiarizationPipeline",
|
||||||
|
input_data={
|
||||||
|
"recording_id": config.recording_id,
|
||||||
|
"room_name": None, # Not available in reprocess path
|
||||||
|
"tracks": [{"s3_key": k} for k in config.track_keys],
|
||||||
|
"bucket_name": config.bucket_name,
|
||||||
|
"transcript_id": config.transcript_id,
|
||||||
|
"room_id": config.room_id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
except RuntimeError:
|
||||||
|
loop = None
|
||||||
|
|
||||||
|
if loop and loop.is_running():
|
||||||
|
# Already in async context
|
||||||
|
import concurrent.futures
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as pool:
|
||||||
|
workflow_id = pool.submit(asyncio.run, _start_hatchet()).result()
|
||||||
|
else:
|
||||||
|
workflow_id = asyncio.run(_start_hatchet())
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Started Hatchet workflow (reprocess)",
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
transcript_id=config.transcript_id,
|
||||||
|
)
|
||||||
|
durable_started = True
|
||||||
|
|
||||||
|
elif settings.CONDUCTOR_ENABLED:
|
||||||
workflow_id = ConductorClientManager.start_workflow(
|
workflow_id = ConductorClientManager.start_workflow(
|
||||||
name="diarization_pipeline",
|
name="diarization_pipeline",
|
||||||
version=1,
|
version=1,
|
||||||
@@ -175,11 +215,13 @@ def dispatch_transcript_processing(config: ProcessingConfig) -> AsyncResult | No
|
|||||||
workflow_id=workflow_id,
|
workflow_id=workflow_id,
|
||||||
transcript_id=config.transcript_id,
|
transcript_id=config.transcript_id,
|
||||||
)
|
)
|
||||||
|
durable_started = True
|
||||||
|
|
||||||
if not settings.CONDUCTOR_SHADOW_MODE:
|
# If durable workflow started and not in shadow mode, skip Celery
|
||||||
return None # Conductor-only, no Celery result
|
if durable_started and not settings.DURABLE_WORKFLOW_SHADOW_MODE:
|
||||||
|
return None
|
||||||
|
|
||||||
# Celery pipeline (shadow mode or Conductor disabled)
|
# Celery pipeline (shadow mode or durable workflows disabled)
|
||||||
return task_pipeline_multitrack_process.delay(
|
return task_pipeline_multitrack_process.delay(
|
||||||
transcript_id=config.transcript_id,
|
transcript_id=config.transcript_id,
|
||||||
bucket_name=config.bucket_name,
|
bucket_name=config.bucket_name,
|
||||||
|
|||||||
@@ -150,11 +150,34 @@ class Settings(BaseSettings):
|
|||||||
ZULIP_API_KEY: str | None = None
|
ZULIP_API_KEY: str | None = None
|
||||||
ZULIP_BOT_EMAIL: str | None = None
|
ZULIP_BOT_EMAIL: str | None = None
|
||||||
|
|
||||||
|
# Durable workflow orchestration
|
||||||
|
# Provider: "hatchet" or "conductor" (or "none" to disable)
|
||||||
|
DURABLE_WORKFLOW_PROVIDER: str = "none"
|
||||||
|
DURABLE_WORKFLOW_SHADOW_MODE: bool = False # Run both provider + Celery
|
||||||
|
|
||||||
# Conductor workflow orchestration
|
# Conductor workflow orchestration
|
||||||
CONDUCTOR_SERVER_URL: str = "http://conductor:8080/api"
|
CONDUCTOR_SERVER_URL: str = "http://conductor:8080/api"
|
||||||
CONDUCTOR_DEBUG: bool = False
|
CONDUCTOR_DEBUG: bool = False
|
||||||
CONDUCTOR_ENABLED: bool = False
|
|
||||||
CONDUCTOR_SHADOW_MODE: bool = False
|
# Hatchet workflow orchestration
|
||||||
|
HATCHET_CLIENT_TOKEN: str | None = None
|
||||||
|
HATCHET_CLIENT_TLS_STRATEGY: str = "none" # none, tls, mtls
|
||||||
|
HATCHET_DEBUG: bool = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def CONDUCTOR_ENABLED(self) -> bool:
|
||||||
|
"""Legacy compatibility: True if Conductor is the active provider."""
|
||||||
|
return self.DURABLE_WORKFLOW_PROVIDER == "conductor"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def HATCHET_ENABLED(self) -> bool:
|
||||||
|
"""True if Hatchet is the active provider."""
|
||||||
|
return self.DURABLE_WORKFLOW_PROVIDER == "hatchet"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def CONDUCTOR_SHADOW_MODE(self) -> bool:
|
||||||
|
"""Legacy compatibility for shadow mode."""
|
||||||
|
return self.DURABLE_WORKFLOW_SHADOW_MODE and self.CONDUCTOR_ENABLED
|
||||||
|
|
||||||
|
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
|||||||
57
server/reflector/views/hatchet.py
Normal file
57
server/reflector/views/hatchet.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
"""Hatchet health and status endpoints."""
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/hatchet", tags=["hatchet"])
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/health")
|
||||||
|
async def hatchet_health():
|
||||||
|
"""Check Hatchet connectivity and status."""
|
||||||
|
if not settings.HATCHET_ENABLED:
|
||||||
|
return {"status": "disabled", "connected": False}
|
||||||
|
|
||||||
|
if not settings.HATCHET_CLIENT_TOKEN:
|
||||||
|
return {
|
||||||
|
"status": "unhealthy",
|
||||||
|
"connected": False,
|
||||||
|
"error": "HATCHET_CLIENT_TOKEN not configured",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
|
||||||
|
# Get client to verify token is valid
|
||||||
|
client = HatchetClientManager.get_client()
|
||||||
|
|
||||||
|
# Try to get the client's gRPC connection status
|
||||||
|
# The SDK doesn't have a simple health check, so we just verify we can create the client
|
||||||
|
if client is not None:
|
||||||
|
return {"status": "healthy", "connected": True}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"status": "unhealthy",
|
||||||
|
"connected": False,
|
||||||
|
"error": "Failed to create client",
|
||||||
|
}
|
||||||
|
except ValueError as e:
|
||||||
|
return {"status": "unhealthy", "connected": False, "error": str(e)}
|
||||||
|
except Exception as e:
|
||||||
|
return {"status": "unhealthy", "connected": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/workflow/{workflow_run_id}")
|
||||||
|
async def get_workflow_status(workflow_run_id: str):
|
||||||
|
"""Get the status of a workflow run."""
|
||||||
|
if not settings.HATCHET_ENABLED:
|
||||||
|
return {"error": "Hatchet is disabled"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reflector.hatchet.client import HatchetClientManager
|
||||||
|
|
||||||
|
status = await HatchetClientManager.get_workflow_status(workflow_run_id)
|
||||||
|
return status
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
@@ -286,8 +286,34 @@ async def _process_multitrack_recording_inner(
|
|||||||
room_id=room.id,
|
room_id=room.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Start Conductor workflow if enabled
|
# Start durable workflow if enabled (Hatchet or Conductor)
|
||||||
if settings.CONDUCTOR_ENABLED:
|
durable_started = False
|
||||||
|
|
||||||
|
if settings.HATCHET_ENABLED:
|
||||||
|
from reflector.hatchet.client import HatchetClientManager # noqa: PLC0415
|
||||||
|
|
||||||
|
workflow_id = await HatchetClientManager.start_workflow(
|
||||||
|
workflow_name="DiarizationPipeline",
|
||||||
|
input_data={
|
||||||
|
"recording_id": recording_id,
|
||||||
|
"room_name": daily_room_name,
|
||||||
|
"tracks": [{"s3_key": k} for k in filter_cam_audio_tracks(track_keys)],
|
||||||
|
"bucket_name": bucket_name,
|
||||||
|
"transcript_id": transcript.id,
|
||||||
|
"room_id": room.id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Started Hatchet workflow",
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
transcript_id=transcript.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Store workflow_id on recording for status tracking
|
||||||
|
await recordings_controller.update(recording, {"workflow_id": workflow_id})
|
||||||
|
durable_started = True
|
||||||
|
|
||||||
|
elif settings.CONDUCTOR_ENABLED:
|
||||||
from reflector.conductor.client import ConductorClientManager # noqa: PLC0415
|
from reflector.conductor.client import ConductorClientManager # noqa: PLC0415
|
||||||
|
|
||||||
workflow_id = ConductorClientManager.start_workflow(
|
workflow_id = ConductorClientManager.start_workflow(
|
||||||
@@ -310,11 +336,13 @@ async def _process_multitrack_recording_inner(
|
|||||||
|
|
||||||
# Store workflow_id on recording for status tracking
|
# Store workflow_id on recording for status tracking
|
||||||
await recordings_controller.update(recording, {"workflow_id": workflow_id})
|
await recordings_controller.update(recording, {"workflow_id": workflow_id})
|
||||||
|
durable_started = True
|
||||||
|
|
||||||
if not settings.CONDUCTOR_SHADOW_MODE:
|
# If durable workflow started and not in shadow mode, skip Celery
|
||||||
return # Don't trigger Celery
|
if durable_started and not settings.DURABLE_WORKFLOW_SHADOW_MODE:
|
||||||
|
return
|
||||||
|
|
||||||
# Celery pipeline (runs when Conductor disabled OR in shadow mode)
|
# Celery pipeline (runs when durable workflows disabled OR in shadow mode)
|
||||||
task_pipeline_multitrack_process.delay(
|
task_pipeline_multitrack_process.delay(
|
||||||
transcript_id=transcript.id,
|
transcript_id=transcript.id,
|
||||||
bucket_name=bucket_name,
|
bucket_name=bucket_name,
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ elif [ "${ENTRYPOINT}" = "beat" ]; then
|
|||||||
uv run celery -A reflector.worker.app beat --loglevel=info
|
uv run celery -A reflector.worker.app beat --loglevel=info
|
||||||
elif [ "${ENTRYPOINT}" = "conductor-worker" ]; then
|
elif [ "${ENTRYPOINT}" = "conductor-worker" ]; then
|
||||||
uv run python -m reflector.conductor.run_workers
|
uv run python -m reflector.conductor.run_workers
|
||||||
|
elif [ "${ENTRYPOINT}" = "hatchet-worker" ]; then
|
||||||
|
uv run python -m reflector.hatchet.run_workers
|
||||||
else
|
else
|
||||||
echo "Unknown command"
|
echo "Unknown command"
|
||||||
fi
|
fi
|
||||||
|
|||||||
87
server/uv.lock
generated
87
server/uv.lock
generated
@@ -1218,6 +1218,70 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/bf/c4/a839fcc28bebfa72925d9121c4d39398f77f95bcba0cf26c972a0cfb1de7/griffe-1.8.0-py3-none-any.whl", hash = "sha256:110faa744b2c5c84dd432f4fa9aa3b14805dd9519777dd55e8db214320593b02", size = 132487 },
|
{ url = "https://files.pythonhosted.org/packages/bf/c4/a839fcc28bebfa72925d9121c4d39398f77f95bcba0cf26c972a0cfb1de7/griffe-1.8.0-py3-none-any.whl", hash = "sha256:110faa744b2c5c84dd432f4fa9aa3b14805dd9519777dd55e8db214320593b02", size = 132487 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "grpcio"
|
||||||
|
version = "1.76.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "typing-extensions" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a0/00/8163a1beeb6971f66b4bbe6ac9457b97948beba8dd2fc8e1281dce7f79ec/grpcio-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a", size = 5843567 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/10/c1/934202f5cf335e6d852530ce14ddb0fef21be612ba9ecbbcbd4d748ca32d/grpcio-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c", size = 11848017 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/11/0b/8dec16b1863d74af6eb3543928600ec2195af49ca58b16334972f6775663/grpcio-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465", size = 6412027 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d7/64/7b9e6e7ab910bea9d46f2c090380bab274a0b91fb0a2fe9b0cd399fffa12/grpcio-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48", size = 7075913 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/68/86/093c46e9546073cefa789bd76d44c5cb2abc824ca62af0c18be590ff13ba/grpcio-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da", size = 6615417 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f7/b6/5709a3a68500a9c03da6fb71740dcdd5ef245e39266461a03f31a57036d8/grpcio-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397", size = 7199683 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/91/d3/4b1f2bf16ed52ce0b508161df3a2d186e4935379a159a834cb4a7d687429/grpcio-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749", size = 8163109 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5c/61/d9043f95f5f4cf085ac5dd6137b469d41befb04bd80280952ffa2a4c3f12/grpcio-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00", size = 7626676 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/36/95/fd9a5152ca02d8881e4dd419cdd790e11805979f499a2e5b96488b85cf27/grpcio-1.76.0-cp311-cp311-win32.whl", hash = "sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054", size = 3997688 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/60/9c/5c359c8d4c9176cfa3c61ecd4efe5affe1f38d9bae81e81ac7186b4c9cc8/grpcio-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d", size = 4709315 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003 },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "grpcio-tools"
|
||||||
|
version = "1.76.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "grpcio" },
|
||||||
|
{ name = "protobuf" },
|
||||||
|
{ name = "setuptools" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/a0/77/17d60d636ccd86a0db0eccc24d02967bbc3eea86b9db7324b04507ebaa40/grpcio_tools-1.76.0.tar.gz", hash = "sha256:ce80169b5e6adf3e8302f3ebb6cb0c3a9f08089133abca4b76ad67f751f5ad88", size = 5390807 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/73/d1/efbeed1a864c846228c0a3b322e7a2d6545f025e35246aebf96496a36004/grpcio_tools-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6480f6af6833850a85cca1c6b435ef4ffd2ac8e88ef683b4065233827950243", size = 2545931 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/af/8e/f257c0f565d9d44658301238b01a9353bc6f3b272bb4191faacae042579d/grpcio_tools-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c7c23fe1dc09818e16a48853477806ad77dd628b33996f78c05a293065f8210c", size = 5844794 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c7/c0/6c1e89c67356cb20e19ed670c5099b13e40fd678cac584c778f931666a86/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fcdce7f7770ff052cd4e60161764b0b3498c909bde69138f8bd2e7b24a3ecd8f", size = 2591772 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c0/10/5f33aa7bc3ddaad0cfd2f4e950ac4f1a310e8d0c7b1358622a581e8b7a2f/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b598fdcebffa931c7da5c9e90b5805fff7e9bc6cf238319358a1b85704c57d33", size = 2905140 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f4/3e/23e3a52a77368f47188ed83c34eb53866d3ce0f73835b2f6764844ae89eb/grpcio_tools-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6a9818ff884796b12dcf8db32126e40ec1098cacf5697f27af9cfccfca1c1fae", size = 2656475 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/51/85/a74ae87ec7dbd3d2243881f5c548215aed1148660df7945be3a125ba9a21/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:105e53435b2eed3961da543db44a2a34479d98d18ea248219856f30a0ca4646b", size = 3106158 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/54/d5/a6ed1e5823bc5d55a1eb93e0c14ccee0b75951f914832ab51fb64d522a0f/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:454a1232c7f99410d92fa9923c7851fd4cdaf657ee194eac73ea1fe21b406d6e", size = 3654980 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f9/29/c05d5501ba156a242079ef71d073116d2509c195b5e5e74c545f0a3a3a69/grpcio_tools-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ca9ccf667afc0268d45ab202af4556c72e57ea36ebddc93535e1a25cbd4f8aba", size = 3322658 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/02/b6/ee0317b91da19a7537d93c4161cbc2a45a165c8893209b0bbd470d830ffa/grpcio_tools-1.76.0-cp311-cp311-win32.whl", hash = "sha256:a83c87513b708228b4cad7619311daba65b40937745103cadca3db94a6472d9c", size = 993837 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/81/63/9623cadf0406b264737f16d4ed273bb2d65001d87fbd803b565c45d665d1/grpcio_tools-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:2ce5e87ec71f2e4041dce4351f2a8e3b713e3bca6b54c69c3fbc6c7ad1f4c386", size = 1158634 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4f/ca/a931c1439cabfe305c9afd07e233150cd0565aa062c20d1ee412ed188852/grpcio_tools-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:4ad555b8647de1ebaffb25170249f89057721ffb74f7da96834a07b4855bb46a", size = 2546852 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4c/07/935cfbb7dccd602723482a86d43fbd992f91e9867bca0056a1e9f348473e/grpcio_tools-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:243af7c8fc7ff22a40a42eb8e0f6f66963c1920b75aae2a2ec503a9c3c8b31c1", size = 5841777 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e4/92/8fcb5acebdccb647e0fa3f002576480459f6cf81e79692d7b3c4d6e29605/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8207b890f423142cc0025d041fb058f7286318df6a049565c27869d73534228b", size = 2594004 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9d/ea/64838e8113b7bfd4842b15c815a7354cb63242fdce9d6648d894b5d50897/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3dafa34c2626a6691d103877e8a145f54c34cf6530975f695b396ed2fc5c98f8", size = 2905563 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a6/d6/53798827d821098219e58518b6db52161ce4985620850aa74ce3795da8a7/grpcio_tools-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:30f1d2dda6ece285b3d9084e94f66fa721ebdba14ae76b2bc4c581c8a166535c", size = 2656936 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/89/a3/d9c1cefc46a790eec520fe4e70e87279abb01a58b1a3b74cf93f62b824a2/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a889af059dc6dbb82d7b417aa581601316e364fe12eb54c1b8d95311ea50916d", size = 3109811 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/50/75/5997752644b73b5d59377d333a51c8a916606df077f5a487853e37dca289/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c3f2c3c44c56eb5d479ab178f0174595d0a974c37dade442f05bb73dfec02f31", size = 3658786 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/84/47/dcf8380df4bd7931ffba32fc6adc2de635b6569ca27fdec7121733797062/grpcio_tools-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:479ce02dff684046f909a487d452a83a96b4231f7c70a3b218a075d54e951f56", size = 3325144 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/04/88/ea3e5fdb874d8c2d04488e4b9d05056537fba70915593f0c283ac77df188/grpcio_tools-1.76.0-cp312-cp312-win32.whl", hash = "sha256:9ba4bb539936642a44418b38ee6c3e8823c037699e2cb282bd8a44d76a4be833", size = 993523 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/de/b1/ce7d59d147675ec191a55816be46bc47a343b5ff07279eef5817c09cc53e/grpcio_tools-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:0cd489016766b05f9ed8a6b6596004b62c57d323f49593eac84add032a6d43f7", size = 1158493 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "h11"
|
name = "h11"
|
||||||
version = "0.16.0"
|
version = "0.16.0"
|
||||||
@@ -1227,6 +1291,27 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
|
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hatchet-sdk"
|
||||||
|
version = "1.21.6"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "aiohttp" },
|
||||||
|
{ name = "grpcio" },
|
||||||
|
{ name = "grpcio-tools" },
|
||||||
|
{ name = "prometheus-client" },
|
||||||
|
{ name = "protobuf" },
|
||||||
|
{ name = "pydantic" },
|
||||||
|
{ name = "pydantic-settings" },
|
||||||
|
{ name = "python-dateutil" },
|
||||||
|
{ name = "tenacity" },
|
||||||
|
{ name = "urllib3" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/7c/df/75dd02e1dc6b99f7151a57f084876c50f739ad4d643b060078f65d51d717/hatchet_sdk-1.21.6.tar.gz", hash = "sha256:b65741324ad721ce57f5fe3f960e2942c4ac2ceec6ca483dd35f84137ff7c46c", size = 219345 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/00/86/e4cd7928bcabd33c634c33d4e878e2454e03f97c87b72947c7ff5762d813/hatchet_sdk-1.21.6-py3-none-any.whl", hash = "sha256:589fba9104a6517e1ba677b9865fa0a20e221863a8c2a2724051198994c11399", size = 529167 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hf-xet"
|
name = "hf-xet"
|
||||||
version = "1.1.5"
|
version = "1.1.5"
|
||||||
@@ -3150,6 +3235,7 @@ dependencies = [
|
|||||||
{ name = "databases", extra = ["aiosqlite", "asyncpg"] },
|
{ name = "databases", extra = ["aiosqlite", "asyncpg"] },
|
||||||
{ name = "fastapi", extra = ["standard"] },
|
{ name = "fastapi", extra = ["standard"] },
|
||||||
{ name = "fastapi-pagination" },
|
{ name = "fastapi-pagination" },
|
||||||
|
{ name = "hatchet-sdk" },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
{ name = "icalendar" },
|
{ name = "icalendar" },
|
||||||
{ name = "jsonschema" },
|
{ name = "jsonschema" },
|
||||||
@@ -3227,6 +3313,7 @@ requires-dist = [
|
|||||||
{ name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
|
{ name = "databases", extras = ["aiosqlite", "asyncpg"], specifier = ">=0.7.0" },
|
||||||
{ name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
|
{ name = "fastapi", extras = ["standard"], specifier = ">=0.100.1" },
|
||||||
{ name = "fastapi-pagination", specifier = ">=0.12.6" },
|
{ name = "fastapi-pagination", specifier = ">=0.12.6" },
|
||||||
|
{ name = "hatchet-sdk", specifier = ">=0.47.0" },
|
||||||
{ name = "httpx", specifier = ">=0.24.1" },
|
{ name = "httpx", specifier = ">=0.24.1" },
|
||||||
{ name = "icalendar", specifier = ">=6.0.0" },
|
{ name = "icalendar", specifier = ">=6.0.0" },
|
||||||
{ name = "jsonschema", specifier = ">=4.23.0" },
|
{ name = "jsonschema", specifier = ">=4.23.0" },
|
||||||
|
|||||||
Reference in New Issue
Block a user