mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-02-04 18:06:48 +00:00
Compare commits
6 Commits
transcript
...
v0.28.1
| Author | SHA1 | Date | |
|---|---|---|---|
| 4dc49e5b25 | |||
| 23d2bc283d | |||
| c8743fdf1c | |||
| 8a293882ad | |||
| d83c4a30b4 | |||
| 3b6540eae5 |
14
CHANGELOG.md
14
CHANGELOG.md
@@ -1,5 +1,19 @@
|
||||
# Changelog
|
||||
|
||||
## [0.28.1](https://github.com/Monadical-SAS/reflector/compare/v0.28.0...v0.28.1) (2026-01-21)
|
||||
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* ics non-sync bugfix ([#823](https://github.com/Monadical-SAS/reflector/issues/823)) ([23d2bc2](https://github.com/Monadical-SAS/reflector/commit/23d2bc283d4d02187b250d2055103e0374ee93d6))
|
||||
|
||||
## [0.28.0](https://github.com/Monadical-SAS/reflector/compare/v0.27.0...v0.28.0) (2026-01-20)
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
* worker affinity ([#819](https://github.com/Monadical-SAS/reflector/issues/819)) ([3b6540e](https://github.com/Monadical-SAS/reflector/commit/3b6540eae5b597449f98661bdf15483b77be3268))
|
||||
|
||||
## [0.27.0](https://github.com/Monadical-SAS/reflector/compare/v0.26.0...v0.27.0) (2025-12-26)
|
||||
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ services:
|
||||
environment:
|
||||
ENTRYPOINT: beat
|
||||
|
||||
hatchet-worker:
|
||||
hatchet-worker-cpu:
|
||||
build:
|
||||
context: server
|
||||
volumes:
|
||||
@@ -43,7 +43,20 @@ services:
|
||||
env_file:
|
||||
- ./server/.env
|
||||
environment:
|
||||
ENTRYPOINT: hatchet-worker
|
||||
ENTRYPOINT: hatchet-worker-cpu
|
||||
depends_on:
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
hatchet-worker-llm:
|
||||
build:
|
||||
context: server
|
||||
volumes:
|
||||
- ./server/:/app/
|
||||
- /app/.venv
|
||||
env_file:
|
||||
- ./server/.env
|
||||
environment:
|
||||
ENTRYPOINT: hatchet-worker-llm
|
||||
depends_on:
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
"""
|
||||
Run Hatchet workers for the multitrack pipeline.
|
||||
Runs as a separate process, just like Celery workers.
|
||||
|
||||
Usage:
|
||||
uv run -m reflector.hatchet.run_workers
|
||||
|
||||
# Or via docker:
|
||||
docker compose exec server uv run -m reflector.hatchet.run_workers
|
||||
"""
|
||||
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from hatchet_sdk.rate_limit import RateLimitDuration
|
||||
|
||||
from reflector.hatchet.constants import LLM_RATE_LIMIT_KEY, LLM_RATE_LIMIT_PER_SECOND
|
||||
from reflector.logger import logger
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Start Hatchet worker polling."""
|
||||
if not settings.HATCHET_ENABLED:
|
||||
logger.error("HATCHET_ENABLED is False, not starting workers")
|
||||
sys.exit(1)
|
||||
|
||||
if not settings.HATCHET_CLIENT_TOKEN:
|
||||
logger.error("HATCHET_CLIENT_TOKEN is not set")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(
|
||||
"Starting Hatchet workers",
|
||||
debug=settings.HATCHET_DEBUG,
|
||||
)
|
||||
|
||||
# Import here (not top-level) - workflow modules call HatchetClientManager.get_client()
|
||||
# at module level because Hatchet SDK decorators (@workflow.task) bind at import time.
|
||||
# Can't use lazy init: decorators need the client object when function is defined.
|
||||
from reflector.hatchet.client import HatchetClientManager # noqa: PLC0415
|
||||
from reflector.hatchet.workflows import ( # noqa: PLC0415
|
||||
daily_multitrack_pipeline,
|
||||
subject_workflow,
|
||||
topic_chunk_workflow,
|
||||
track_workflow,
|
||||
)
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
hatchet.rate_limits.put(
|
||||
LLM_RATE_LIMIT_KEY, LLM_RATE_LIMIT_PER_SECOND, RateLimitDuration.SECOND
|
||||
)
|
||||
|
||||
worker = hatchet.worker(
|
||||
"reflector-pipeline-worker",
|
||||
workflows=[
|
||||
daily_multitrack_pipeline,
|
||||
subject_workflow,
|
||||
topic_chunk_workflow,
|
||||
track_workflow,
|
||||
],
|
||||
)
|
||||
|
||||
def shutdown_handler(signum: int, frame) -> None:
|
||||
logger.info("Received shutdown signal, stopping workers...")
|
||||
# Worker cleanup happens automatically on exit
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, shutdown_handler)
|
||||
signal.signal(signal.SIGTERM, shutdown_handler)
|
||||
|
||||
logger.info("Starting Hatchet worker polling...")
|
||||
worker.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
48
server/reflector/hatchet/run_workers_cpu.py
Normal file
48
server/reflector/hatchet/run_workers_cpu.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
CPU-heavy worker pool for audio processing tasks.
|
||||
Handles ONLY: mixdown_tracks
|
||||
|
||||
Configuration:
|
||||
- slots=1: Only mixdown (already serialized globally with max_runs=1)
|
||||
- Worker affinity: pool=cpu-heavy
|
||||
"""
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
daily_multitrack_pipeline,
|
||||
)
|
||||
from reflector.logger import logger
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
def main():
|
||||
if not settings.HATCHET_ENABLED:
|
||||
logger.error("HATCHET_ENABLED is False, not starting CPU workers")
|
||||
return
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
logger.info(
|
||||
"Starting Hatchet CPU worker pool (mixdown only)",
|
||||
worker_name="cpu-worker-pool",
|
||||
slots=1,
|
||||
labels={"pool": "cpu-heavy"},
|
||||
)
|
||||
|
||||
cpu_worker = hatchet.worker(
|
||||
"cpu-worker-pool",
|
||||
slots=1, # Only 1 mixdown at a time (already serialized globally)
|
||||
labels={
|
||||
"pool": "cpu-heavy",
|
||||
},
|
||||
workflows=[daily_multitrack_pipeline],
|
||||
)
|
||||
|
||||
try:
|
||||
cpu_worker.start()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received shutdown signal, stopping CPU workers...")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
server/reflector/hatchet/run_workers_llm.py
Normal file
56
server/reflector/hatchet/run_workers_llm.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
LLM/I/O worker pool for all non-CPU tasks.
|
||||
Handles: all tasks except mixdown_tracks (transcription, LLM inference, orchestration)
|
||||
"""
|
||||
|
||||
from reflector.hatchet.client import HatchetClientManager
|
||||
from reflector.hatchet.workflows.daily_multitrack_pipeline import (
|
||||
daily_multitrack_pipeline,
|
||||
)
|
||||
from reflector.hatchet.workflows.subject_processing import subject_workflow
|
||||
from reflector.hatchet.workflows.topic_chunk_processing import topic_chunk_workflow
|
||||
from reflector.hatchet.workflows.track_processing import track_workflow
|
||||
from reflector.logger import logger
|
||||
from reflector.settings import settings
|
||||
|
||||
SLOTS = 10
|
||||
WORKER_NAME = "llm-worker-pool"
|
||||
POOL = "llm-io"
|
||||
|
||||
|
||||
def main():
|
||||
if not settings.HATCHET_ENABLED:
|
||||
logger.error("HATCHET_ENABLED is False, not starting LLM workers")
|
||||
return
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
|
||||
logger.info(
|
||||
"Starting Hatchet LLM worker pool (all tasks except mixdown)",
|
||||
worker_name=WORKER_NAME,
|
||||
slots=SLOTS,
|
||||
labels={"pool": POOL},
|
||||
)
|
||||
|
||||
llm_worker = hatchet.worker(
|
||||
WORKER_NAME,
|
||||
slots=SLOTS, # not all slots are probably used
|
||||
labels={
|
||||
"pool": POOL,
|
||||
},
|
||||
workflows=[
|
||||
daily_multitrack_pipeline,
|
||||
topic_chunk_workflow,
|
||||
subject_workflow,
|
||||
track_workflow,
|
||||
],
|
||||
)
|
||||
|
||||
try:
|
||||
llm_worker.start()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received shutdown signal, stopping LLM workers...")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -23,7 +23,12 @@ from pathlib import Path
|
||||
from typing import Any, Callable, Coroutine, Protocol, TypeVar
|
||||
|
||||
import httpx
|
||||
from hatchet_sdk import Context
|
||||
from hatchet_sdk import (
|
||||
ConcurrencyExpression,
|
||||
ConcurrencyLimitStrategy,
|
||||
Context,
|
||||
)
|
||||
from hatchet_sdk.labels import DesiredWorkerLabel
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reflector.dailyco_api.client import DailyApiClient
|
||||
@@ -467,6 +472,20 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
||||
parents=[process_tracks],
|
||||
execution_timeout=timedelta(seconds=TIMEOUT_AUDIO),
|
||||
retries=3,
|
||||
desired_worker_labels={
|
||||
"pool": DesiredWorkerLabel(
|
||||
value="cpu-heavy",
|
||||
required=True,
|
||||
weight=100,
|
||||
),
|
||||
},
|
||||
concurrency=[
|
||||
ConcurrencyExpression(
|
||||
expression="'mixdown-global'",
|
||||
max_runs=1, # serialize mixdown to prevent resource contention
|
||||
limit_strategy=ConcurrencyLimitStrategy.GROUP_ROUND_ROBIN, # Queue
|
||||
)
|
||||
],
|
||||
)
|
||||
@with_error_handling(TaskName.MIXDOWN_TRACKS)
|
||||
async def mixdown_tracks(input: PipelineInput, ctx: Context) -> MixdownResult:
|
||||
|
||||
@@ -7,7 +7,11 @@ Spawned dynamically by detect_topics via aio_run_many() for parallel processing.
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from hatchet_sdk import ConcurrencyExpression, ConcurrencyLimitStrategy, Context
|
||||
from hatchet_sdk import (
|
||||
ConcurrencyExpression,
|
||||
ConcurrencyLimitStrategy,
|
||||
Context,
|
||||
)
|
||||
from hatchet_sdk.rate_limit import RateLimit
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -34,11 +38,13 @@ hatchet = HatchetClientManager.get_client()
|
||||
topic_chunk_workflow = hatchet.workflow(
|
||||
name="TopicChunkProcessing",
|
||||
input_validator=TopicChunkInput,
|
||||
concurrency=ConcurrencyExpression(
|
||||
expression="'global'", # constant string = global limit across all runs
|
||||
max_runs=20,
|
||||
limit_strategy=ConcurrencyLimitStrategy.GROUP_ROUND_ROBIN,
|
||||
),
|
||||
concurrency=[
|
||||
ConcurrencyExpression(
|
||||
expression="'global'", # constant string = global limit across all runs
|
||||
max_runs=20,
|
||||
limit_strategy=ConcurrencyLimitStrategy.GROUP_ROUND_ROBIN,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -319,21 +319,6 @@ class ICSSyncService:
|
||||
calendar = self.fetch_service.parse_ics(ics_content)
|
||||
|
||||
content_hash = hashlib.md5(ics_content.encode()).hexdigest()
|
||||
if room.ics_last_etag == content_hash:
|
||||
logger.info("No changes in ICS for room", room_id=room.id)
|
||||
room_url = f"{settings.UI_BASE_URL}/{room.name}"
|
||||
events, total_events = self.fetch_service.extract_room_events(
|
||||
calendar, room.name, room_url
|
||||
)
|
||||
return {
|
||||
"status": SyncStatus.UNCHANGED,
|
||||
"hash": content_hash,
|
||||
"events_found": len(events),
|
||||
"total_events": total_events,
|
||||
"events_created": 0,
|
||||
"events_updated": 0,
|
||||
"events_deleted": 0,
|
||||
}
|
||||
|
||||
# Extract matching events
|
||||
room_url = f"{settings.UI_BASE_URL}/{room.name}"
|
||||
@@ -371,6 +356,44 @@ class ICSSyncService:
|
||||
time_since_sync = datetime.now(timezone.utc) - room.ics_last_sync
|
||||
return time_since_sync.total_seconds() >= room.ics_fetch_interval
|
||||
|
||||
def _event_data_changed(self, existing: CalendarEvent, new_data: EventData) -> bool:
|
||||
"""Check if event data has changed by comparing relevant fields.
|
||||
|
||||
IMPORTANT: When adding fields to CalendarEvent/EventData, update this method
|
||||
and the _COMPARED_FIELDS set below for runtime validation.
|
||||
"""
|
||||
# Fields that come from ICS and should trigger updates when changed
|
||||
_COMPARED_FIELDS = {
|
||||
"title",
|
||||
"description",
|
||||
"start_time",
|
||||
"end_time",
|
||||
"location",
|
||||
"attendees",
|
||||
"ics_raw_data",
|
||||
}
|
||||
|
||||
# Runtime exhaustiveness check: ensure we're comparing all EventData fields
|
||||
event_data_fields = set(EventData.__annotations__.keys()) - {"ics_uid"}
|
||||
if event_data_fields != _COMPARED_FIELDS:
|
||||
missing = event_data_fields - _COMPARED_FIELDS
|
||||
extra = _COMPARED_FIELDS - event_data_fields
|
||||
raise RuntimeError(
|
||||
f"_event_data_changed() field mismatch: "
|
||||
f"missing={missing}, extra={extra}. "
|
||||
f"Update the comparison logic when adding/removing fields."
|
||||
)
|
||||
|
||||
return (
|
||||
existing.title != new_data["title"]
|
||||
or existing.description != new_data["description"]
|
||||
or existing.start_time != new_data["start_time"]
|
||||
or existing.end_time != new_data["end_time"]
|
||||
or existing.location != new_data["location"]
|
||||
or existing.attendees != new_data["attendees"]
|
||||
or existing.ics_raw_data != new_data["ics_raw_data"]
|
||||
)
|
||||
|
||||
async def _sync_events_to_database(
|
||||
self, room_id: str, events: list[EventData]
|
||||
) -> SyncStats:
|
||||
@@ -386,11 +409,14 @@ class ICSSyncService:
|
||||
)
|
||||
|
||||
if existing:
|
||||
updated += 1
|
||||
# Only count as updated if data actually changed
|
||||
if self._event_data_changed(existing, event_data):
|
||||
updated += 1
|
||||
await calendar_events_controller.upsert(calendar_event)
|
||||
else:
|
||||
created += 1
|
||||
await calendar_events_controller.upsert(calendar_event)
|
||||
|
||||
await calendar_events_controller.upsert(calendar_event)
|
||||
current_ics_uids.append(event_data["ics_uid"])
|
||||
|
||||
# Soft delete events that are no longer in calendar
|
||||
|
||||
@@ -7,8 +7,10 @@ elif [ "${ENTRYPOINT}" = "worker" ]; then
|
||||
uv run celery -A reflector.worker.app worker --loglevel=info
|
||||
elif [ "${ENTRYPOINT}" = "beat" ]; then
|
||||
uv run celery -A reflector.worker.app beat --loglevel=info
|
||||
elif [ "${ENTRYPOINT}" = "hatchet-worker" ]; then
|
||||
uv run python -m reflector.hatchet.run_workers
|
||||
elif [ "${ENTRYPOINT}" = "hatchet-worker-cpu" ]; then
|
||||
uv run python -m reflector.hatchet.run_workers_cpu
|
||||
elif [ "${ENTRYPOINT}" = "hatchet-worker-llm" ]; then
|
||||
uv run python -m reflector.hatchet.run_workers_llm
|
||||
else
|
||||
echo "Unknown command"
|
||||
fi
|
||||
|
||||
@@ -189,14 +189,17 @@ async def test_ics_sync_service_sync_room_calendar():
|
||||
assert events[0].ics_uid == "sync-event-1"
|
||||
assert events[0].title == "Sync Test Meeting"
|
||||
|
||||
# Second sync with same content (should be unchanged)
|
||||
# Second sync with same content (calendar unchanged, but sync always runs)
|
||||
# Refresh room to get updated etag and force sync by setting old sync time
|
||||
room = await rooms_controller.get_by_id(room.id)
|
||||
await rooms_controller.update(
|
||||
room, {"ics_last_sync": datetime.now(timezone.utc) - timedelta(minutes=10)}
|
||||
)
|
||||
result = await sync_service.sync_room_calendar(room)
|
||||
assert result["status"] == "unchanged"
|
||||
assert result["status"] == "success"
|
||||
assert result["events_created"] == 0
|
||||
assert result["events_updated"] == 0
|
||||
assert result["events_deleted"] == 0
|
||||
|
||||
# Third sync with updated event
|
||||
event["summary"] = "Updated Meeting Title"
|
||||
@@ -288,3 +291,43 @@ async def test_ics_sync_service_error_handling():
|
||||
result = await sync_service.sync_room_calendar(room)
|
||||
assert result["status"] == "error"
|
||||
assert "Network error" in result["error"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_event_data_changed_exhaustiveness():
|
||||
"""Test that _event_data_changed compares all EventData fields (except ics_uid).
|
||||
|
||||
This test ensures programmers don't forget to update the comparison logic
|
||||
when adding new fields to EventData/CalendarEvent.
|
||||
"""
|
||||
from reflector.services.ics_sync import EventData
|
||||
|
||||
sync_service = ICSSyncService()
|
||||
|
||||
from reflector.db.calendar_events import CalendarEvent
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
event_data: EventData = {
|
||||
"ics_uid": "test-123",
|
||||
"title": "Test",
|
||||
"description": "Desc",
|
||||
"location": "Loc",
|
||||
"start_time": now,
|
||||
"end_time": now + timedelta(hours=1),
|
||||
"attendees": [],
|
||||
"ics_raw_data": "raw",
|
||||
}
|
||||
|
||||
existing = CalendarEvent(
|
||||
room_id="room1",
|
||||
**event_data,
|
||||
)
|
||||
|
||||
# Will raise RuntimeError if fields are missing from comparison
|
||||
result = sync_service._event_data_changed(existing, event_data)
|
||||
assert result is False
|
||||
|
||||
modified_data = event_data.copy()
|
||||
modified_data["title"] = "Changed Title"
|
||||
result = sync_service._event_data_changed(existing, modified_data)
|
||||
assert result is True
|
||||
|
||||
Reference in New Issue
Block a user