mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-21 22:56:47 +00:00
feat: enable daily co in selfhosted + only schedule tasks when necessary (#883)
* feat: enable daily co in selfhosted + only schedule tasks when necessary * feat: refactor aws storage to be platform agnostic + add local pad tracking with slfhosted support
This commit is contained in:
committed by
GitHub
parent
f6cc03286b
commit
045eae8ff2
@@ -86,6 +86,18 @@ LLM_API_KEY=not-needed
|
||||
## Context size for summary generation (tokens)
|
||||
LLM_CONTEXT_WINDOW=16000
|
||||
|
||||
## =======================================================
|
||||
## Audio Padding
|
||||
##
|
||||
## backends: local (in-process PyAV), modal (HTTP API client)
|
||||
## Default is "local" — no external service needed.
|
||||
## Set to "modal" when using Modal.com or self-hosted gpu/self_hosted/ container.
|
||||
## =======================================================
|
||||
#PADDING_BACKEND=local
|
||||
#PADDING_BACKEND=modal
|
||||
#PADDING_URL=https://xxxxx--reflector-padding-web.modal.run
|
||||
#PADDING_MODAL_API_KEY=xxxxx
|
||||
|
||||
## =======================================================
|
||||
## Diarization
|
||||
##
|
||||
@@ -137,6 +149,10 @@ TRANSCRIPT_STORAGE_AWS_REGION=us-east-1
|
||||
#DAILYCO_STORAGE_AWS_ROLE_ARN=... # IAM role ARN for Daily.co S3 access
|
||||
#DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco
|
||||
#DAILYCO_STORAGE_AWS_REGION=us-west-2
|
||||
# Worker credentials for reading/deleting from Daily's recording bucket
|
||||
# Required when transcript storage is separate from Daily's bucket (e.g., selfhosted with Garage)
|
||||
#DAILYCO_STORAGE_AWS_ACCESS_KEY_ID=your-aws-access-key
|
||||
#DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY=your-aws-secret-key
|
||||
|
||||
## Whereby (optional separate bucket)
|
||||
#WHEREBY_STORAGE_AWS_BUCKET_NAME=reflector-whereby
|
||||
|
||||
@@ -47,6 +47,9 @@ DIARIZATION_URL=http://transcription:8000
|
||||
TRANSLATION_BACKEND=modal
|
||||
TRANSLATE_URL=http://transcription:8000
|
||||
|
||||
PADDING_BACKEND=modal
|
||||
PADDING_URL=http://transcription:8000
|
||||
|
||||
# HuggingFace token — optional, for gated models (e.g. pyannote).
|
||||
# Falls back to public S3 model bundle if not set.
|
||||
# HF_TOKEN=hf_xxxxx
|
||||
@@ -93,15 +96,42 @@ TRANSCRIPT_STORAGE_AWS_REGION=us-east-1
|
||||
# =======================================================
|
||||
# Daily.co Live Rooms (Optional)
|
||||
# Enable real-time meeting rooms with Daily.co integration.
|
||||
# Requires a Daily.co account: https://www.daily.co/
|
||||
# Configure these BEFORE running setup-selfhosted.sh and the
|
||||
# script will auto-detect and start Hatchet workflow services.
|
||||
#
|
||||
# Prerequisites:
|
||||
# 1. Daily.co account: https://www.daily.co/
|
||||
# 2. API key: Dashboard → Developers → API Keys
|
||||
# 3. S3 bucket for recordings: https://docs.daily.co/guides/products/live-streaming-recording/storing-recordings-in-a-custom-s3-bucket
|
||||
# 4. IAM role ARN for Daily.co to write recordings to your bucket
|
||||
#
|
||||
# After configuring, run: ./scripts/setup-selfhosted.sh <your-flags>
|
||||
# The script will detect DAILY_API_KEY and automatically:
|
||||
# - Start Hatchet workflow engine + CPU/LLM workers
|
||||
# - Generate a Hatchet API token
|
||||
# - Enable FEATURE_ROOMS in the frontend
|
||||
# =======================================================
|
||||
# DEFAULT_VIDEO_PLATFORM=daily
|
||||
# DAILY_API_KEY=your-daily-api-key
|
||||
# DAILY_SUBDOMAIN=your-subdomain
|
||||
# DAILY_WEBHOOK_SECRET=your-daily-webhook-secret
|
||||
# DEFAULT_VIDEO_PLATFORM=daily
|
||||
# DAILYCO_STORAGE_AWS_BUCKET_NAME=reflector-dailyco
|
||||
# DAILYCO_STORAGE_AWS_REGION=us-east-1
|
||||
# DAILYCO_STORAGE_AWS_ROLE_ARN=arn:aws:iam::role/DailyCoAccess
|
||||
# Worker credentials for reading/deleting from Daily's recording bucket
|
||||
# Required when transcript storage is separate from Daily's bucket (e.g., selfhosted with Garage)
|
||||
# DAILYCO_STORAGE_AWS_ACCESS_KEY_ID=your-aws-access-key
|
||||
# DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY=your-aws-secret-key
|
||||
# DAILY_WEBHOOK_SECRET=your-daily-webhook-secret # optional, for faster recording discovery
|
||||
|
||||
# =======================================================
|
||||
# Hatchet Workflow Engine (Auto-configured for Daily.co)
|
||||
# Required for Daily.co multitrack recording processing.
|
||||
# The setup script generates HATCHET_CLIENT_TOKEN automatically.
|
||||
# Do not set these manually unless you know what you're doing.
|
||||
# =======================================================
|
||||
# HATCHET_CLIENT_TOKEN=<auto-generated-by-script>
|
||||
# HATCHET_CLIENT_SERVER_URL=http://hatchet:8888
|
||||
# HATCHET_CLIENT_HOST_PORT=hatchet:7077
|
||||
|
||||
# =======================================================
|
||||
# Feature Flags
|
||||
|
||||
@@ -90,7 +90,6 @@ from reflector.processors.summary.summary_builder import SummaryBuilder
|
||||
from reflector.processors.types import TitleSummary, Word
|
||||
from reflector.processors.types import Transcript as TranscriptType
|
||||
from reflector.settings import settings
|
||||
from reflector.storage.storage_aws import AwsStorage
|
||||
from reflector.utils.audio_constants import (
|
||||
PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
WAVEFORM_SEGMENTS,
|
||||
@@ -117,6 +116,7 @@ class PipelineInput(BaseModel):
|
||||
bucket_name: NonEmptyString
|
||||
transcript_id: NonEmptyString
|
||||
room_id: NonEmptyString | None = None
|
||||
source_platform: str = "daily"
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
@@ -170,15 +170,10 @@ async def set_workflow_error_status(transcript_id: NonEmptyString) -> bool:
|
||||
|
||||
|
||||
def _spawn_storage():
|
||||
"""Create fresh storage instance."""
|
||||
# TODO: replace direct AwsStorage construction with get_transcripts_storage() factory
|
||||
return AwsStorage(
|
||||
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
aws_endpoint_url=settings.TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL,
|
||||
)
|
||||
"""Create fresh storage instance for writing to our transcript bucket."""
|
||||
from reflector.storage import get_transcripts_storage # noqa: PLC0415
|
||||
|
||||
return get_transcripts_storage()
|
||||
|
||||
|
||||
class Loggable(Protocol):
|
||||
@@ -434,6 +429,7 @@ async def process_tracks(input: PipelineInput, ctx: Context) -> ProcessTracksRes
|
||||
bucket_name=input.bucket_name,
|
||||
transcript_id=input.transcript_id,
|
||||
language=source_language,
|
||||
source_platform=input.source_platform,
|
||||
)
|
||||
)
|
||||
for i, track in enumerate(input.tracks)
|
||||
@@ -1195,7 +1191,10 @@ async def cleanup_consent(input: PipelineInput, ctx: Context) -> ConsentResult:
|
||||
)
|
||||
from reflector.db.recordings import recordings_controller # noqa: PLC0415
|
||||
from reflector.db.transcripts import transcripts_controller # noqa: PLC0415
|
||||
from reflector.storage import get_transcripts_storage # noqa: PLC0415
|
||||
from reflector.storage import ( # noqa: PLC0415
|
||||
get_source_storage,
|
||||
get_transcripts_storage,
|
||||
)
|
||||
|
||||
transcript = await transcripts_controller.get_by_id(input.transcript_id)
|
||||
if not transcript:
|
||||
@@ -1245,7 +1244,7 @@ async def cleanup_consent(input: PipelineInput, ctx: Context) -> ConsentResult:
|
||||
deletion_errors = []
|
||||
|
||||
if input_track_keys and input.bucket_name:
|
||||
master_storage = get_transcripts_storage()
|
||||
master_storage = get_source_storage(input.source_platform)
|
||||
for key in input_track_keys:
|
||||
try:
|
||||
await master_storage.delete_file(key, bucket=input.bucket_name)
|
||||
|
||||
@@ -24,6 +24,7 @@ class PaddingInput(BaseModel):
|
||||
s3_key: str
|
||||
bucket_name: str
|
||||
transcript_id: str
|
||||
source_platform: str = "daily"
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
@@ -45,20 +46,14 @@ async def pad_track(input: PaddingInput, ctx: Context) -> PadTrackResult:
|
||||
)
|
||||
|
||||
try:
|
||||
# Create fresh storage instance to avoid aioboto3 fork issues
|
||||
from reflector.settings import settings # noqa: PLC0415
|
||||
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
|
||||
|
||||
# TODO: replace direct AwsStorage construction with get_transcripts_storage() factory
|
||||
storage = AwsStorage(
|
||||
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
aws_endpoint_url=settings.TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL,
|
||||
from reflector.storage import ( # noqa: PLC0415
|
||||
get_source_storage,
|
||||
get_transcripts_storage,
|
||||
)
|
||||
|
||||
source_url = await storage.get_file_url(
|
||||
# Source reads: use platform-specific credentials
|
||||
source_storage = get_source_storage(input.source_platform)
|
||||
source_url = await source_storage.get_file_url(
|
||||
input.s3_key,
|
||||
operation="get_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
@@ -96,52 +91,28 @@ async def pad_track(input: PaddingInput, ctx: Context) -> PadTrackResult:
|
||||
|
||||
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||
|
||||
# Presign PUT URL for output (Modal will upload directly)
|
||||
output_url = await storage.get_file_url(
|
||||
# Output writes: use transcript storage (our own bucket)
|
||||
output_storage = get_transcripts_storage()
|
||||
output_url = await output_storage.get_file_url(
|
||||
storage_path,
|
||||
operation="put_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
)
|
||||
|
||||
import httpx # noqa: PLC0415
|
||||
|
||||
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
|
||||
AudioPaddingModalProcessor,
|
||||
from reflector.processors.audio_padding_auto import ( # noqa: PLC0415
|
||||
AudioPaddingAutoProcessor,
|
||||
)
|
||||
|
||||
try:
|
||||
processor = AudioPaddingModalProcessor()
|
||||
result = await processor.pad_track(
|
||||
track_url=source_url,
|
||||
output_url=output_url,
|
||||
start_time_seconds=start_time_seconds,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
file_size = result.size
|
||||
processor = AudioPaddingAutoProcessor()
|
||||
result = await processor.pad_track(
|
||||
track_url=source_url,
|
||||
output_url=output_url,
|
||||
start_time_seconds=start_time_seconds,
|
||||
track_index=input.track_index,
|
||||
)
|
||||
file_size = result.size
|
||||
|
||||
ctx.log(f"pad_track: Modal returned size={file_size}")
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_detail = e.response.text if hasattr(e.response, "text") else str(e)
|
||||
logger.error(
|
||||
"[Hatchet] Modal padding HTTP error",
|
||||
transcript_id=input.transcript_id,
|
||||
track_index=input.track_index,
|
||||
status_code=e.response.status_code if hasattr(e, "response") else None,
|
||||
error=error_detail,
|
||||
exc_info=True,
|
||||
)
|
||||
raise Exception(
|
||||
f"Modal padding failed: HTTP {e.response.status_code}"
|
||||
) from e
|
||||
except httpx.TimeoutException as e:
|
||||
logger.error(
|
||||
"[Hatchet] Modal padding timeout",
|
||||
transcript_id=input.transcript_id,
|
||||
track_index=input.track_index,
|
||||
error=str(e),
|
||||
exc_info=True,
|
||||
)
|
||||
raise Exception("Modal padding timeout") from e
|
||||
ctx.log(f"pad_track: padding returned size={file_size}")
|
||||
|
||||
logger.info(
|
||||
"[Hatchet] pad_track complete",
|
||||
|
||||
@@ -36,6 +36,7 @@ class TrackInput(BaseModel):
|
||||
bucket_name: str
|
||||
transcript_id: str
|
||||
language: str = "en"
|
||||
source_platform: str = "daily"
|
||||
|
||||
|
||||
hatchet = HatchetClientManager.get_client()
|
||||
@@ -59,20 +60,14 @@ async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
|
||||
)
|
||||
|
||||
try:
|
||||
# Create fresh storage instance to avoid aioboto3 fork issues
|
||||
# TODO: replace direct AwsStorage construction with get_transcripts_storage() factory
|
||||
from reflector.settings import settings # noqa: PLC0415
|
||||
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
|
||||
|
||||
storage = AwsStorage(
|
||||
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
aws_endpoint_url=settings.TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL,
|
||||
from reflector.storage import ( # noqa: PLC0415
|
||||
get_source_storage,
|
||||
get_transcripts_storage,
|
||||
)
|
||||
|
||||
source_url = await storage.get_file_url(
|
||||
# Source reads: use platform-specific credentials
|
||||
source_storage = get_source_storage(input.source_platform)
|
||||
source_url = await source_storage.get_file_url(
|
||||
input.s3_key,
|
||||
operation="get_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
@@ -99,18 +94,19 @@ async def pad_track(input: TrackInput, ctx: Context) -> PadTrackResult:
|
||||
|
||||
storage_path = f"file_pipeline_hatchet/{input.transcript_id}/tracks/padded_{input.track_index}.webm"
|
||||
|
||||
# Presign PUT URL for output (Modal uploads directly)
|
||||
output_url = await storage.get_file_url(
|
||||
# Output writes: use transcript storage (our own bucket)
|
||||
output_storage = get_transcripts_storage()
|
||||
output_url = await output_storage.get_file_url(
|
||||
storage_path,
|
||||
operation="put_object",
|
||||
expires_in=PRESIGNED_URL_EXPIRATION_SECONDS,
|
||||
)
|
||||
|
||||
from reflector.processors.audio_padding_modal import ( # noqa: PLC0415
|
||||
AudioPaddingModalProcessor,
|
||||
from reflector.processors.audio_padding_auto import ( # noqa: PLC0415
|
||||
AudioPaddingAutoProcessor,
|
||||
)
|
||||
|
||||
processor = AudioPaddingModalProcessor()
|
||||
processor = AudioPaddingAutoProcessor()
|
||||
result = await processor.pad_track(
|
||||
track_url=source_url,
|
||||
output_url=output_url,
|
||||
@@ -161,18 +157,18 @@ async def transcribe_track(input: TrackInput, ctx: Context) -> TranscribeTrackRe
|
||||
raise ValueError("Missing padded_key from pad_track")
|
||||
|
||||
# Presign URL on demand (avoids stale URLs on workflow replay)
|
||||
# TODO: replace direct AwsStorage construction with get_transcripts_storage() factory
|
||||
from reflector.settings import settings # noqa: PLC0415
|
||||
from reflector.storage.storage_aws import AwsStorage # noqa: PLC0415
|
||||
|
||||
storage = AwsStorage(
|
||||
aws_bucket_name=settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.TRANSCRIPT_STORAGE_AWS_REGION,
|
||||
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
aws_endpoint_url=settings.TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL,
|
||||
from reflector.storage import ( # noqa: PLC0415
|
||||
get_source_storage,
|
||||
get_transcripts_storage,
|
||||
)
|
||||
|
||||
# If bucket_name is set, file is still in the platform's source bucket (no padding applied).
|
||||
# If bucket_name is None, padded file was written to our transcript storage.
|
||||
if bucket_name:
|
||||
storage = get_source_storage(input.source_platform)
|
||||
else:
|
||||
storage = get_transcripts_storage()
|
||||
|
||||
audio_url = await storage.get_file_url(
|
||||
padded_key,
|
||||
operation="get_object",
|
||||
|
||||
31
server/reflector/processors/audio_padding_auto.py
Normal file
31
server/reflector/processors/audio_padding_auto.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import importlib
|
||||
|
||||
from reflector.settings import settings
|
||||
|
||||
|
||||
class AudioPaddingAutoProcessor:
|
||||
_registry = {}
|
||||
|
||||
@classmethod
|
||||
def register(cls, name, kclass):
|
||||
cls._registry[name] = kclass
|
||||
|
||||
def __new__(cls, name: str | None = None, **kwargs):
|
||||
if name is None:
|
||||
name = settings.PADDING_BACKEND
|
||||
if name not in cls._registry:
|
||||
module_name = f"reflector.processors.audio_padding_{name}"
|
||||
importlib.import_module(module_name)
|
||||
|
||||
# gather specific configuration for the processor
|
||||
# search `PADDING_XXX_YYY`, push to constructor as `xxx_yyy`
|
||||
config = {}
|
||||
name_upper = name.upper()
|
||||
settings_prefix = "PADDING_"
|
||||
config_prefix = f"{settings_prefix}{name_upper}_"
|
||||
for key, value in settings:
|
||||
if key.startswith(config_prefix):
|
||||
config_name = key[len(settings_prefix) :].lower()
|
||||
config[config_name] = value
|
||||
|
||||
return cls._registry[name](**config | kwargs)
|
||||
133
server/reflector/processors/audio_padding_local.py
Normal file
133
server/reflector/processors/audio_padding_local.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Local audio padding processor using PyAV.
|
||||
|
||||
Pads audio tracks with silence directly in-process (no HTTP).
|
||||
Reuses the shared PyAV utilities from reflector.utils.audio_padding.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import av
|
||||
|
||||
from reflector.logger import logger
|
||||
from reflector.processors.audio_padding_auto import AudioPaddingAutoProcessor
|
||||
from reflector.processors.audio_padding_modal import PaddingResponse
|
||||
from reflector.utils.audio_padding import apply_audio_padding_to_file
|
||||
|
||||
S3_TIMEOUT = 60
|
||||
|
||||
|
||||
class AudioPaddingLocalProcessor:
|
||||
"""Audio padding processor using local PyAV (no HTTP backend)."""
|
||||
|
||||
async def pad_track(
|
||||
self,
|
||||
track_url: str,
|
||||
output_url: str,
|
||||
start_time_seconds: float,
|
||||
track_index: int,
|
||||
) -> PaddingResponse:
|
||||
"""Pad audio track with silence locally via PyAV.
|
||||
|
||||
Args:
|
||||
track_url: Presigned GET URL for source audio track
|
||||
output_url: Presigned PUT URL for output WebM
|
||||
start_time_seconds: Amount of silence to prepend
|
||||
track_index: Track index for logging
|
||||
"""
|
||||
if not track_url:
|
||||
raise ValueError("track_url cannot be empty")
|
||||
if start_time_seconds <= 0:
|
||||
raise ValueError(
|
||||
f"start_time_seconds must be positive, got {start_time_seconds}"
|
||||
)
|
||||
|
||||
log = logger.bind(track_index=track_index, padding_seconds=start_time_seconds)
|
||||
log.info("Starting local PyAV padding")
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
self._pad_track_blocking,
|
||||
track_url,
|
||||
output_url,
|
||||
start_time_seconds,
|
||||
track_index,
|
||||
)
|
||||
|
||||
def _pad_track_blocking(
|
||||
self,
|
||||
track_url: str,
|
||||
output_url: str,
|
||||
start_time_seconds: float,
|
||||
track_index: int,
|
||||
) -> PaddingResponse:
|
||||
"""Blocking padding work: download, pad with PyAV, upload."""
|
||||
import requests
|
||||
|
||||
log = logger.bind(track_index=track_index, padding_seconds=start_time_seconds)
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
input_path = None
|
||||
output_path = None
|
||||
|
||||
try:
|
||||
# Download source audio
|
||||
log.info("Downloading track for local padding")
|
||||
response = requests.get(track_url, stream=True, timeout=S3_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
|
||||
input_path = os.path.join(temp_dir, "track.webm")
|
||||
total_bytes = 0
|
||||
with open(input_path, "wb") as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
total_bytes += len(chunk)
|
||||
log.info("Track downloaded", bytes=total_bytes)
|
||||
|
||||
# Apply padding using shared PyAV utility
|
||||
output_path = os.path.join(temp_dir, "padded.webm")
|
||||
with av.open(input_path) as in_container:
|
||||
apply_audio_padding_to_file(
|
||||
in_container,
|
||||
output_path,
|
||||
start_time_seconds,
|
||||
track_index,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
file_size = os.path.getsize(output_path)
|
||||
log.info("Local padding complete", size=file_size)
|
||||
|
||||
# Upload padded track
|
||||
log.info("Uploading padded track to S3")
|
||||
with open(output_path, "rb") as f:
|
||||
upload_response = requests.put(output_url, data=f, timeout=S3_TIMEOUT)
|
||||
upload_response.raise_for_status()
|
||||
log.info("Upload complete", size=file_size)
|
||||
|
||||
return PaddingResponse(size=file_size)
|
||||
|
||||
except Exception as e:
|
||||
log.error("Local padding failed", error=str(e), exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
if input_path and os.path.exists(input_path):
|
||||
try:
|
||||
os.unlink(input_path)
|
||||
except Exception as e:
|
||||
log.warning("Failed to cleanup input file", error=str(e))
|
||||
if output_path and os.path.exists(output_path):
|
||||
try:
|
||||
os.unlink(output_path)
|
||||
except Exception as e:
|
||||
log.warning("Failed to cleanup output file", error=str(e))
|
||||
try:
|
||||
os.rmdir(temp_dir)
|
||||
except Exception as e:
|
||||
log.warning("Failed to cleanup temp directory", error=str(e))
|
||||
|
||||
|
||||
AudioPaddingAutoProcessor.register("local", AudioPaddingLocalProcessor)
|
||||
@@ -10,6 +10,7 @@ from pydantic import BaseModel
|
||||
|
||||
from reflector.hatchet.constants import TIMEOUT_AUDIO
|
||||
from reflector.logger import logger
|
||||
from reflector.processors.audio_padding_auto import AudioPaddingAutoProcessor
|
||||
|
||||
|
||||
class PaddingResponse(BaseModel):
|
||||
@@ -111,3 +112,6 @@ class AudioPaddingModalProcessor:
|
||||
except Exception as e:
|
||||
log.error("Modal padding unexpected error", error=str(e), exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
AudioPaddingAutoProcessor.register("modal", AudioPaddingModalProcessor)
|
||||
|
||||
@@ -40,6 +40,7 @@ class MultitrackProcessingConfig:
|
||||
track_keys: list[str]
|
||||
recording_id: NonEmptyString | None = None
|
||||
room_id: NonEmptyString | None = None
|
||||
source_platform: str = "daily"
|
||||
mode: Literal["multitrack"] = "multitrack"
|
||||
|
||||
|
||||
@@ -256,6 +257,7 @@ async def dispatch_transcript_processing(
|
||||
"bucket_name": config.bucket_name,
|
||||
"transcript_id": config.transcript_id,
|
||||
"room_id": config.room_id,
|
||||
"source_platform": config.source_platform,
|
||||
},
|
||||
additional_metadata={
|
||||
"transcript_id": config.transcript_id,
|
||||
|
||||
@@ -73,6 +73,9 @@ class Settings(BaseSettings):
|
||||
DAILYCO_STORAGE_AWS_BUCKET_NAME: str | None = None
|
||||
DAILYCO_STORAGE_AWS_REGION: str | None = None
|
||||
DAILYCO_STORAGE_AWS_ROLE_ARN: str | None = None
|
||||
# Worker credentials for reading/deleting from Daily's recording bucket
|
||||
DAILYCO_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
|
||||
DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
|
||||
|
||||
# Translate into the target language
|
||||
TRANSLATION_BACKEND: str = "passthrough"
|
||||
@@ -106,7 +109,11 @@ class Settings(BaseSettings):
|
||||
# Diarization: modal backend
|
||||
DIARIZATION_MODAL_API_KEY: str | None = None
|
||||
|
||||
# Audio Padding (Modal.com backend)
|
||||
# Audio Padding
|
||||
# backends:
|
||||
# - local: in-process PyAV padding (no HTTP, runs in same process)
|
||||
# - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
|
||||
PADDING_BACKEND: str = "local"
|
||||
PADDING_URL: str | None = None
|
||||
PADDING_MODAL_API_KEY: str | None = None
|
||||
|
||||
|
||||
@@ -17,6 +17,49 @@ def get_transcripts_storage() -> Storage:
|
||||
)
|
||||
|
||||
|
||||
def get_source_storage(platform: str) -> Storage:
|
||||
"""Get storage for reading/deleting source recording files from the platform's bucket.
|
||||
|
||||
Returns an AwsStorage configured with the platform's worker credentials
|
||||
(access keys), or falls back to get_transcripts_storage() when platform-specific
|
||||
credentials aren't configured (e.g., single-bucket setups).
|
||||
|
||||
Args:
|
||||
platform: Recording platform name ("daily", "whereby", or other).
|
||||
"""
|
||||
if platform == "daily":
|
||||
if (
|
||||
settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID
|
||||
and settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY
|
||||
and settings.DAILYCO_STORAGE_AWS_BUCKET_NAME
|
||||
):
|
||||
from reflector.storage.storage_aws import AwsStorage
|
||||
|
||||
return AwsStorage(
|
||||
aws_bucket_name=settings.DAILYCO_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.DAILYCO_STORAGE_AWS_REGION or "us-east-1",
|
||||
aws_access_key_id=settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
)
|
||||
|
||||
elif platform == "whereby":
|
||||
if (
|
||||
settings.WHEREBY_STORAGE_AWS_ACCESS_KEY_ID
|
||||
and settings.WHEREBY_STORAGE_AWS_SECRET_ACCESS_KEY
|
||||
and settings.WHEREBY_STORAGE_AWS_BUCKET_NAME
|
||||
):
|
||||
from reflector.storage.storage_aws import AwsStorage
|
||||
|
||||
return AwsStorage(
|
||||
aws_bucket_name=settings.WHEREBY_STORAGE_AWS_BUCKET_NAME,
|
||||
aws_region=settings.WHEREBY_STORAGE_AWS_REGION or "us-east-1",
|
||||
aws_access_key_id=settings.WHEREBY_STORAGE_AWS_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=settings.WHEREBY_STORAGE_AWS_SECRET_ACCESS_KEY,
|
||||
)
|
||||
|
||||
return get_transcripts_storage()
|
||||
|
||||
|
||||
def get_whereby_storage() -> Storage:
|
||||
"""
|
||||
Get storage config for Whereby (for passing to Whereby API).
|
||||
|
||||
@@ -24,6 +24,118 @@ RECONCILIATION_INTERVAL = _override or 30.0
|
||||
ICS_SYNC_INTERVAL = _override or 60.0
|
||||
UPCOMING_MEETINGS_INTERVAL = _override or 30.0
|
||||
|
||||
|
||||
def build_beat_schedule(
|
||||
*,
|
||||
whereby_api_key=None,
|
||||
aws_process_recording_queue_url=None,
|
||||
daily_api_key=None,
|
||||
public_mode=False,
|
||||
public_data_retention_days=None,
|
||||
healthcheck_url=None,
|
||||
):
|
||||
"""Build the Celery beat schedule based on configured services.
|
||||
|
||||
Only registers tasks for services that are actually configured,
|
||||
avoiding unnecessary worker wake-ups in selfhosted deployments.
|
||||
"""
|
||||
beat_schedule = {}
|
||||
|
||||
_whereby_enabled = bool(whereby_api_key) or bool(aws_process_recording_queue_url)
|
||||
if _whereby_enabled:
|
||||
beat_schedule["process_messages"] = {
|
||||
"task": "reflector.worker.process.process_messages",
|
||||
"schedule": SQS_POLL_INTERVAL,
|
||||
}
|
||||
beat_schedule["reprocess_failed_recordings"] = {
|
||||
"task": "reflector.worker.process.reprocess_failed_recordings",
|
||||
"schedule": crontab(hour=5, minute=0), # Midnight EST
|
||||
}
|
||||
logger.info(
|
||||
"Whereby beat tasks enabled",
|
||||
tasks=["process_messages", "reprocess_failed_recordings"],
|
||||
)
|
||||
else:
|
||||
logger.info("Whereby beat tasks disabled (no WHEREBY_API_KEY or SQS URL)")
|
||||
|
||||
_daily_enabled = bool(daily_api_key)
|
||||
if _daily_enabled:
|
||||
beat_schedule["poll_daily_recordings"] = {
|
||||
"task": "reflector.worker.process.poll_daily_recordings",
|
||||
"schedule": POLL_DAILY_RECORDINGS_INTERVAL_SEC,
|
||||
}
|
||||
beat_schedule["trigger_daily_reconciliation"] = {
|
||||
"task": "reflector.worker.process.trigger_daily_reconciliation",
|
||||
"schedule": RECONCILIATION_INTERVAL,
|
||||
}
|
||||
beat_schedule["reprocess_failed_daily_recordings"] = {
|
||||
"task": "reflector.worker.process.reprocess_failed_daily_recordings",
|
||||
"schedule": crontab(hour=5, minute=0), # Midnight EST
|
||||
}
|
||||
logger.info(
|
||||
"Daily.co beat tasks enabled",
|
||||
tasks=[
|
||||
"poll_daily_recordings",
|
||||
"trigger_daily_reconciliation",
|
||||
"reprocess_failed_daily_recordings",
|
||||
],
|
||||
)
|
||||
else:
|
||||
logger.info("Daily.co beat tasks disabled (no DAILY_API_KEY)")
|
||||
|
||||
_any_platform = _whereby_enabled or _daily_enabled
|
||||
if _any_platform:
|
||||
beat_schedule["process_meetings"] = {
|
||||
"task": "reflector.worker.process.process_meetings",
|
||||
"schedule": SQS_POLL_INTERVAL,
|
||||
}
|
||||
beat_schedule["sync_all_ics_calendars"] = {
|
||||
"task": "reflector.worker.ics_sync.sync_all_ics_calendars",
|
||||
"schedule": ICS_SYNC_INTERVAL,
|
||||
}
|
||||
beat_schedule["create_upcoming_meetings"] = {
|
||||
"task": "reflector.worker.ics_sync.create_upcoming_meetings",
|
||||
"schedule": UPCOMING_MEETINGS_INTERVAL,
|
||||
}
|
||||
logger.info(
|
||||
"Platform tasks enabled",
|
||||
tasks=[
|
||||
"process_meetings",
|
||||
"sync_all_ics_calendars",
|
||||
"create_upcoming_meetings",
|
||||
],
|
||||
)
|
||||
else:
|
||||
logger.info("Platform tasks disabled (no video platform configured)")
|
||||
|
||||
if public_mode:
|
||||
beat_schedule["cleanup_old_public_data"] = {
|
||||
"task": "reflector.worker.cleanup.cleanup_old_public_data_task",
|
||||
"schedule": crontab(hour=3, minute=0),
|
||||
}
|
||||
logger.info(
|
||||
"Public mode cleanup enabled",
|
||||
retention_days=public_data_retention_days,
|
||||
)
|
||||
|
||||
if healthcheck_url:
|
||||
beat_schedule["healthcheck_ping"] = {
|
||||
"task": "reflector.worker.healthcheck.healthcheck_ping",
|
||||
"schedule": 60.0 * 10,
|
||||
}
|
||||
logger.info("Healthcheck enabled", url=healthcheck_url)
|
||||
else:
|
||||
logger.warning("Healthcheck disabled, no url configured")
|
||||
|
||||
logger.info(
|
||||
"Beat schedule configured",
|
||||
total_tasks=len(beat_schedule),
|
||||
task_names=sorted(beat_schedule.keys()),
|
||||
)
|
||||
|
||||
return beat_schedule
|
||||
|
||||
|
||||
if celery.current_app.main != "default":
|
||||
logger.info(f"Celery already configured ({celery.current_app})")
|
||||
app = celery.current_app
|
||||
@@ -42,57 +154,11 @@ else:
|
||||
]
|
||||
)
|
||||
|
||||
# crontab
|
||||
app.conf.beat_schedule = {
|
||||
"process_messages": {
|
||||
"task": "reflector.worker.process.process_messages",
|
||||
"schedule": SQS_POLL_INTERVAL,
|
||||
},
|
||||
"process_meetings": {
|
||||
"task": "reflector.worker.process.process_meetings",
|
||||
"schedule": SQS_POLL_INTERVAL,
|
||||
},
|
||||
"reprocess_failed_recordings": {
|
||||
"task": "reflector.worker.process.reprocess_failed_recordings",
|
||||
"schedule": crontab(hour=5, minute=0), # Midnight EST
|
||||
},
|
||||
"reprocess_failed_daily_recordings": {
|
||||
"task": "reflector.worker.process.reprocess_failed_daily_recordings",
|
||||
"schedule": crontab(hour=5, minute=0), # Midnight EST
|
||||
},
|
||||
"poll_daily_recordings": {
|
||||
"task": "reflector.worker.process.poll_daily_recordings",
|
||||
"schedule": POLL_DAILY_RECORDINGS_INTERVAL_SEC,
|
||||
},
|
||||
"trigger_daily_reconciliation": {
|
||||
"task": "reflector.worker.process.trigger_daily_reconciliation",
|
||||
"schedule": RECONCILIATION_INTERVAL,
|
||||
},
|
||||
"sync_all_ics_calendars": {
|
||||
"task": "reflector.worker.ics_sync.sync_all_ics_calendars",
|
||||
"schedule": ICS_SYNC_INTERVAL,
|
||||
},
|
||||
"create_upcoming_meetings": {
|
||||
"task": "reflector.worker.ics_sync.create_upcoming_meetings",
|
||||
"schedule": UPCOMING_MEETINGS_INTERVAL,
|
||||
},
|
||||
}
|
||||
|
||||
if settings.PUBLIC_MODE:
|
||||
app.conf.beat_schedule["cleanup_old_public_data"] = {
|
||||
"task": "reflector.worker.cleanup.cleanup_old_public_data_task",
|
||||
"schedule": crontab(hour=3, minute=0),
|
||||
}
|
||||
logger.info(
|
||||
"Public mode cleanup enabled",
|
||||
retention_days=settings.PUBLIC_DATA_RETENTION_DAYS,
|
||||
)
|
||||
|
||||
if settings.HEALTHCHECK_URL:
|
||||
app.conf.beat_schedule["healthcheck_ping"] = {
|
||||
"task": "reflector.worker.healthcheck.healthcheck_ping",
|
||||
"schedule": 60.0 * 10,
|
||||
}
|
||||
logger.info("Healthcheck enabled", url=settings.HEALTHCHECK_URL)
|
||||
else:
|
||||
logger.warning("Healthcheck disabled, no url configured")
|
||||
app.conf.beat_schedule = build_beat_schedule(
|
||||
whereby_api_key=settings.WHEREBY_API_KEY,
|
||||
aws_process_recording_queue_url=settings.AWS_PROCESS_RECORDING_QUEUE_URL,
|
||||
daily_api_key=settings.DAILY_API_KEY,
|
||||
public_mode=settings.PUBLIC_MODE,
|
||||
public_data_retention_days=settings.PUBLIC_DATA_RETENTION_DAYS,
|
||||
healthcheck_url=settings.HEALTHCHECK_URL,
|
||||
)
|
||||
|
||||
@@ -357,6 +357,7 @@ async def _process_multitrack_recording_inner(
|
||||
"bucket_name": bucket_name,
|
||||
"transcript_id": transcript.id,
|
||||
"room_id": room.id,
|
||||
"source_platform": "daily",
|
||||
},
|
||||
additional_metadata={
|
||||
"transcript_id": transcript.id,
|
||||
@@ -1068,6 +1069,7 @@ async def reprocess_failed_daily_recordings():
|
||||
"bucket_name": bucket_name,
|
||||
"transcript_id": transcript.id,
|
||||
"room_id": room.id if room else None,
|
||||
"source_platform": "daily",
|
||||
},
|
||||
additional_metadata={
|
||||
"transcript_id": transcript.id,
|
||||
|
||||
247
server/tests/test_beat_schedule.py
Normal file
247
server/tests/test_beat_schedule.py
Normal file
@@ -0,0 +1,247 @@
|
||||
"""Tests for conditional Celery beat schedule registration.
|
||||
|
||||
Verifies that beat tasks are only registered when their corresponding
|
||||
services are configured (WHEREBY_API_KEY, DAILY_API_KEY, etc.).
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.worker.app import build_beat_schedule
|
||||
|
||||
|
||||
# Override autouse fixtures from conftest — these tests don't need database or websockets
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_database():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def ws_manager_in_memory():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_hatchet_client():
|
||||
yield
|
||||
|
||||
|
||||
# Task name sets for each group
|
||||
WHEREBY_TASKS = {"process_messages", "reprocess_failed_recordings"}
|
||||
DAILY_TASKS = {
|
||||
"poll_daily_recordings",
|
||||
"trigger_daily_reconciliation",
|
||||
"reprocess_failed_daily_recordings",
|
||||
}
|
||||
PLATFORM_TASKS = {
|
||||
"process_meetings",
|
||||
"sync_all_ics_calendars",
|
||||
"create_upcoming_meetings",
|
||||
}
|
||||
|
||||
|
||||
class TestNoPlatformConfigured:
|
||||
"""When no video platform is configured, no platform tasks should be registered."""
|
||||
|
||||
def test_no_platform_tasks(self):
|
||||
schedule = build_beat_schedule()
|
||||
task_names = set(schedule.keys())
|
||||
assert not task_names & WHEREBY_TASKS
|
||||
assert not task_names & DAILY_TASKS
|
||||
assert not task_names & PLATFORM_TASKS
|
||||
|
||||
def test_only_healthcheck_disabled_warning(self):
|
||||
"""With no config at all, schedule should be empty (healthcheck needs URL)."""
|
||||
schedule = build_beat_schedule()
|
||||
assert len(schedule) == 0
|
||||
|
||||
def test_healthcheck_only(self):
|
||||
schedule = build_beat_schedule(healthcheck_url="https://hc.example.com/ping")
|
||||
assert set(schedule.keys()) == {"healthcheck_ping"}
|
||||
|
||||
def test_public_mode_only(self):
|
||||
schedule = build_beat_schedule(public_mode=True)
|
||||
assert set(schedule.keys()) == {"cleanup_old_public_data"}
|
||||
|
||||
|
||||
class TestWherebyOnly:
|
||||
"""When only Whereby is configured."""
|
||||
|
||||
def test_whereby_api_key(self):
|
||||
schedule = build_beat_schedule(whereby_api_key="test-key")
|
||||
task_names = set(schedule.keys())
|
||||
assert WHEREBY_TASKS <= task_names
|
||||
assert PLATFORM_TASKS <= task_names
|
||||
assert not task_names & DAILY_TASKS
|
||||
|
||||
def test_whereby_sqs_url(self):
|
||||
schedule = build_beat_schedule(
|
||||
aws_process_recording_queue_url="https://sqs.us-east-1.amazonaws.com/123/queue"
|
||||
)
|
||||
task_names = set(schedule.keys())
|
||||
assert WHEREBY_TASKS <= task_names
|
||||
assert PLATFORM_TASKS <= task_names
|
||||
assert not task_names & DAILY_TASKS
|
||||
|
||||
def test_whereby_task_count(self):
|
||||
schedule = build_beat_schedule(whereby_api_key="test-key")
|
||||
# Whereby (2) + Platform (3) = 5
|
||||
assert len(schedule) == 5
|
||||
|
||||
|
||||
class TestDailyOnly:
|
||||
"""When only Daily.co is configured."""
|
||||
|
||||
def test_daily_api_key(self):
|
||||
schedule = build_beat_schedule(daily_api_key="test-daily-key")
|
||||
task_names = set(schedule.keys())
|
||||
assert DAILY_TASKS <= task_names
|
||||
assert PLATFORM_TASKS <= task_names
|
||||
assert not task_names & WHEREBY_TASKS
|
||||
|
||||
def test_daily_task_count(self):
|
||||
schedule = build_beat_schedule(daily_api_key="test-daily-key")
|
||||
# Daily (3) + Platform (3) = 6
|
||||
assert len(schedule) == 6
|
||||
|
||||
|
||||
class TestBothPlatforms:
|
||||
"""When both Whereby and Daily.co are configured."""
|
||||
|
||||
def test_all_tasks_registered(self):
|
||||
schedule = build_beat_schedule(
|
||||
whereby_api_key="test-key",
|
||||
daily_api_key="test-daily-key",
|
||||
)
|
||||
task_names = set(schedule.keys())
|
||||
assert WHEREBY_TASKS <= task_names
|
||||
assert DAILY_TASKS <= task_names
|
||||
assert PLATFORM_TASKS <= task_names
|
||||
|
||||
def test_combined_task_count(self):
|
||||
schedule = build_beat_schedule(
|
||||
whereby_api_key="test-key",
|
||||
daily_api_key="test-daily-key",
|
||||
)
|
||||
# Whereby (2) + Daily (3) + Platform (3) = 8
|
||||
assert len(schedule) == 8
|
||||
|
||||
|
||||
class TestConditionalFlags:
|
||||
"""Test PUBLIC_MODE and HEALTHCHECK_URL interact correctly with platform tasks."""
|
||||
|
||||
def test_all_flags_enabled(self):
|
||||
schedule = build_beat_schedule(
|
||||
whereby_api_key="test-key",
|
||||
daily_api_key="test-daily-key",
|
||||
public_mode=True,
|
||||
healthcheck_url="https://hc.example.com/ping",
|
||||
)
|
||||
task_names = set(schedule.keys())
|
||||
assert "cleanup_old_public_data" in task_names
|
||||
assert "healthcheck_ping" in task_names
|
||||
assert WHEREBY_TASKS <= task_names
|
||||
assert DAILY_TASKS <= task_names
|
||||
assert PLATFORM_TASKS <= task_names
|
||||
# Whereby (2) + Daily (3) + Platform (3) + cleanup (1) + healthcheck (1) = 10
|
||||
assert len(schedule) == 10
|
||||
|
||||
def test_public_mode_with_whereby(self):
|
||||
schedule = build_beat_schedule(
|
||||
whereby_api_key="test-key",
|
||||
public_mode=True,
|
||||
)
|
||||
task_names = set(schedule.keys())
|
||||
assert "cleanup_old_public_data" in task_names
|
||||
assert WHEREBY_TASKS <= task_names
|
||||
assert PLATFORM_TASKS <= task_names
|
||||
|
||||
def test_healthcheck_with_daily(self):
|
||||
schedule = build_beat_schedule(
|
||||
daily_api_key="test-daily-key",
|
||||
healthcheck_url="https://hc.example.com/ping",
|
||||
)
|
||||
task_names = set(schedule.keys())
|
||||
assert "healthcheck_ping" in task_names
|
||||
assert DAILY_TASKS <= task_names
|
||||
assert PLATFORM_TASKS <= task_names
|
||||
|
||||
|
||||
class TestTaskDefinitions:
|
||||
"""Verify task definitions have correct structure."""
|
||||
|
||||
def test_whereby_task_paths(self):
|
||||
schedule = build_beat_schedule(whereby_api_key="test-key")
|
||||
assert (
|
||||
schedule["process_messages"]["task"]
|
||||
== "reflector.worker.process.process_messages"
|
||||
)
|
||||
assert (
|
||||
schedule["reprocess_failed_recordings"]["task"]
|
||||
== "reflector.worker.process.reprocess_failed_recordings"
|
||||
)
|
||||
|
||||
def test_daily_task_paths(self):
|
||||
schedule = build_beat_schedule(daily_api_key="test-daily-key")
|
||||
assert (
|
||||
schedule["poll_daily_recordings"]["task"]
|
||||
== "reflector.worker.process.poll_daily_recordings"
|
||||
)
|
||||
assert (
|
||||
schedule["trigger_daily_reconciliation"]["task"]
|
||||
== "reflector.worker.process.trigger_daily_reconciliation"
|
||||
)
|
||||
assert (
|
||||
schedule["reprocess_failed_daily_recordings"]["task"]
|
||||
== "reflector.worker.process.reprocess_failed_daily_recordings"
|
||||
)
|
||||
|
||||
def test_platform_task_paths(self):
|
||||
schedule = build_beat_schedule(daily_api_key="test-daily-key")
|
||||
assert (
|
||||
schedule["process_meetings"]["task"]
|
||||
== "reflector.worker.process.process_meetings"
|
||||
)
|
||||
assert (
|
||||
schedule["sync_all_ics_calendars"]["task"]
|
||||
== "reflector.worker.ics_sync.sync_all_ics_calendars"
|
||||
)
|
||||
assert (
|
||||
schedule["create_upcoming_meetings"]["task"]
|
||||
== "reflector.worker.ics_sync.create_upcoming_meetings"
|
||||
)
|
||||
|
||||
def test_all_tasks_have_schedule(self):
|
||||
"""Every registered task must have a 'schedule' key."""
|
||||
schedule = build_beat_schedule(
|
||||
whereby_api_key="test-key",
|
||||
daily_api_key="test-daily-key",
|
||||
public_mode=True,
|
||||
healthcheck_url="https://hc.example.com/ping",
|
||||
)
|
||||
for name, config in schedule.items():
|
||||
assert "schedule" in config, f"Task '{name}' missing 'schedule' key"
|
||||
assert "task" in config, f"Task '{name}' missing 'task' key"
|
||||
|
||||
|
||||
class TestEmptyStringValues:
|
||||
"""Empty strings should be treated as not configured (falsy)."""
|
||||
|
||||
def test_empty_whereby_key(self):
|
||||
schedule = build_beat_schedule(whereby_api_key="")
|
||||
assert not set(schedule.keys()) & WHEREBY_TASKS
|
||||
|
||||
def test_empty_daily_key(self):
|
||||
schedule = build_beat_schedule(daily_api_key="")
|
||||
assert not set(schedule.keys()) & DAILY_TASKS
|
||||
|
||||
def test_empty_sqs_url(self):
|
||||
schedule = build_beat_schedule(aws_process_recording_queue_url="")
|
||||
assert not set(schedule.keys()) & WHEREBY_TASKS
|
||||
|
||||
def test_none_values(self):
|
||||
schedule = build_beat_schedule(
|
||||
whereby_api_key=None,
|
||||
daily_api_key=None,
|
||||
aws_process_recording_queue_url=None,
|
||||
)
|
||||
assert len(schedule) == 0
|
||||
@@ -367,3 +367,126 @@ async def test_aws_storage_none_endpoint_url():
|
||||
assert storage.base_url == "https://reflector-bucket.s3.amazonaws.com/"
|
||||
# No s3 addressing_style override — boto_config should only have retries
|
||||
assert not hasattr(storage.boto_config, "s3") or storage.boto_config.s3 is None
|
||||
|
||||
|
||||
# --- Tests for get_source_storage() ---
|
||||
|
||||
|
||||
def test_get_source_storage_daily_with_credentials():
|
||||
"""Daily platform with access keys returns AwsStorage with Daily credentials."""
|
||||
with patch("reflector.storage.settings") as mock_settings:
|
||||
mock_settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID = "daily-key"
|
||||
mock_settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY = "daily-secret"
|
||||
mock_settings.DAILYCO_STORAGE_AWS_BUCKET_NAME = "daily-bucket"
|
||||
mock_settings.DAILYCO_STORAGE_AWS_REGION = "us-west-2"
|
||||
|
||||
from reflector.storage import get_source_storage
|
||||
|
||||
storage = get_source_storage("daily")
|
||||
|
||||
assert isinstance(storage, AwsStorage)
|
||||
assert storage._bucket_name == "daily-bucket"
|
||||
assert storage._region == "us-west-2"
|
||||
assert storage._access_key_id == "daily-key"
|
||||
assert storage._secret_access_key == "daily-secret"
|
||||
assert storage._endpoint_url is None
|
||||
|
||||
|
||||
def test_get_source_storage_daily_falls_back_without_credentials():
|
||||
"""Daily platform without access keys falls back to transcript storage."""
|
||||
with patch("reflector.storage.settings") as mock_settings:
|
||||
mock_settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID = None
|
||||
mock_settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY = None
|
||||
mock_settings.DAILYCO_STORAGE_AWS_BUCKET_NAME = "daily-bucket"
|
||||
mock_settings.TRANSCRIPT_STORAGE_BACKEND = "aws"
|
||||
mock_settings.TRANSCRIPT_STORAGE_AWS_BUCKET_NAME = "transcript-bucket"
|
||||
mock_settings.TRANSCRIPT_STORAGE_AWS_REGION = "us-east-1"
|
||||
mock_settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID = "transcript-key"
|
||||
mock_settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY = "transcript-secret"
|
||||
mock_settings.TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL = None
|
||||
|
||||
from reflector.storage import get_source_storage
|
||||
|
||||
with patch("reflector.storage.get_transcripts_storage") as mock_get_transcripts:
|
||||
fallback = AwsStorage(
|
||||
aws_bucket_name="transcript-bucket",
|
||||
aws_region="us-east-1",
|
||||
aws_access_key_id="transcript-key",
|
||||
aws_secret_access_key="transcript-secret",
|
||||
)
|
||||
mock_get_transcripts.return_value = fallback
|
||||
|
||||
storage = get_source_storage("daily")
|
||||
|
||||
mock_get_transcripts.assert_called_once()
|
||||
assert storage is fallback
|
||||
|
||||
|
||||
def test_get_source_storage_whereby_with_credentials():
|
||||
"""Whereby platform with access keys returns AwsStorage with Whereby credentials."""
|
||||
with patch("reflector.storage.settings") as mock_settings:
|
||||
mock_settings.WHEREBY_STORAGE_AWS_ACCESS_KEY_ID = "whereby-key"
|
||||
mock_settings.WHEREBY_STORAGE_AWS_SECRET_ACCESS_KEY = "whereby-secret"
|
||||
mock_settings.WHEREBY_STORAGE_AWS_BUCKET_NAME = "whereby-bucket"
|
||||
mock_settings.WHEREBY_STORAGE_AWS_REGION = "eu-west-1"
|
||||
|
||||
from reflector.storage import get_source_storage
|
||||
|
||||
storage = get_source_storage("whereby")
|
||||
|
||||
assert isinstance(storage, AwsStorage)
|
||||
assert storage._bucket_name == "whereby-bucket"
|
||||
assert storage._region == "eu-west-1"
|
||||
assert storage._access_key_id == "whereby-key"
|
||||
assert storage._secret_access_key == "whereby-secret"
|
||||
|
||||
|
||||
def test_get_source_storage_unknown_platform_falls_back():
|
||||
"""Unknown platform falls back to transcript storage."""
|
||||
with patch("reflector.storage.settings"):
|
||||
from reflector.storage import get_source_storage
|
||||
|
||||
with patch("reflector.storage.get_transcripts_storage") as mock_get_transcripts:
|
||||
fallback = MagicMock()
|
||||
mock_get_transcripts.return_value = fallback
|
||||
|
||||
storage = get_source_storage("unknown-platform")
|
||||
|
||||
mock_get_transcripts.assert_called_once()
|
||||
assert storage is fallback
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_source_storage_presigns_for_correct_bucket():
|
||||
"""Source storage presigns URLs using the platform's credentials and the override bucket."""
|
||||
with patch("reflector.storage.settings") as mock_settings:
|
||||
mock_settings.DAILYCO_STORAGE_AWS_ACCESS_KEY_ID = "daily-key"
|
||||
mock_settings.DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY = "daily-secret"
|
||||
mock_settings.DAILYCO_STORAGE_AWS_BUCKET_NAME = "daily-bucket"
|
||||
mock_settings.DAILYCO_STORAGE_AWS_REGION = "us-west-2"
|
||||
|
||||
from reflector.storage import get_source_storage
|
||||
|
||||
storage = get_source_storage("daily")
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.generate_presigned_url = AsyncMock(
|
||||
return_value="https://daily-bucket.s3.amazonaws.com/track.webm?signed"
|
||||
)
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with patch.object(storage.session, "client", return_value=mock_client):
|
||||
url = await storage.get_file_url(
|
||||
"track.webm",
|
||||
operation="get_object",
|
||||
expires_in=3600,
|
||||
bucket="override-bucket",
|
||||
)
|
||||
|
||||
assert "track.webm" in url
|
||||
mock_client.generate_presigned_url.assert_called_once()
|
||||
call_kwargs = mock_client.generate_presigned_url.call_args
|
||||
params = call_kwargs[1].get("Params") or call_kwargs[0][1]
|
||||
assert params["Bucket"] == "override-bucket"
|
||||
assert params["Key"] == "track.webm"
|
||||
|
||||
Reference in New Issue
Block a user