reflector/server/reflector/settings.py

from pydantic.types import PositiveInt
from pydantic_settings import BaseSettings, SettingsConfigDict

from reflector.schemas.platform import DAILY_PLATFORM, Platform
from reflector.utils.string import NonEmptyString


class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        extra="ignore",
    )

    ROOT_PATH: str = "/"

    # WebRTC port range for ICE candidates (e.g. "50000-50100").
    # When set, monkey-patches aioice to bind UDP sockets within this range,
    # allowing Docker port mapping instead of network_mode: host.
    WEBRTC_PORT_RANGE: str | None = None
    # Host IP or hostname to advertise in ICE candidates instead of the
    # container's internal IP. Use "host.docker.internal" in Docker with
    # extra_hosts, or a specific LAN IP. Resolved at connection time.
    WEBRTC_HOST: str | None = None

    # CORS
    UI_BASE_URL: str = "http://localhost:3000"
    CORS_ORIGIN: str = "*"
    CORS_ALLOW_CREDENTIALS: bool = False

    # Database
    DATABASE_URL: str = (
        "postgresql+asyncpg://reflector:reflector@localhost:5432/reflector"
    )

    # local data directory
    DATA_DIR: str = "./data"

    # Audio Chunking
    # backends: silero, frames
    AUDIO_CHUNKER_BACKEND: str = "frames"

    # HuggingFace token for gated models (pyannote diarization in --cpu mode)
    HF_TOKEN: str | None = None

    # Audio Transcription
    # backends:
    #   - whisper: in-process model loading (no HTTP, runs in same process)
    #   - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
    TRANSCRIPT_BACKEND: str = "whisper"

    # Whisper model sizes for local transcription
    # Options: "tiny", "base", "small", "medium", "large-v2"
    WHISPER_CHUNK_MODEL: str = "tiny"
    WHISPER_FILE_MODEL: str = "tiny"
    TRANSCRIPT_URL: str | None = None
    TRANSCRIPT_TIMEOUT: int = 90
    TRANSCRIPT_FILE_TIMEOUT: int = (
        540  # Below Hatchet TIMEOUT_HEAVY (600) to avoid timeout race
    )

    # Audio Transcription: modal backend
    TRANSCRIPT_MODAL_API_KEY: str | None = None

    # Audio transcription storage
    TRANSCRIPT_STORAGE_BACKEND: str | None = None

    # Storage configuration for AWS
    TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: str = "reflector-bucket"
    TRANSCRIPT_STORAGE_AWS_REGION: str = "us-east-1"
    TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
    TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
    TRANSCRIPT_STORAGE_AWS_ENDPOINT_URL: str | None = None

    # Platform-specific recording storage (follows {PREFIX}_STORAGE_AWS_{CREDENTIAL} pattern)
    # Whereby storage configuration
    WHEREBY_STORAGE_AWS_BUCKET_NAME: str | None = None
    WHEREBY_STORAGE_AWS_REGION: str | None = None
    WHEREBY_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
    WHEREBY_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None

    # Daily.co storage configuration
    DAILYCO_STORAGE_AWS_BUCKET_NAME: str | None = None
    DAILYCO_STORAGE_AWS_REGION: str | None = None
    DAILYCO_STORAGE_AWS_ROLE_ARN: str | None = None
    # Worker credentials for reading/deleting from Daily's recording bucket
    DAILYCO_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
    DAILYCO_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None

    # Translate into the target language
    TRANSLATION_BACKEND: str = "passthrough"
    TRANSLATE_URL: str | None = None
    TRANSLATE_TIMEOUT: int = 90

    # Translation: modal backend
    TRANSLATE_MODAL_API_KEY: str | None = None

    # LLM
    LLM_MODEL: str = "microsoft/phi-4"
    LLM_URL: str | None = None
    LLM_API_KEY: str | None = None
    LLM_CONTEXT_WINDOW: int = 16000
    LLM_REQUEST_TIMEOUT: float = 300.0  # HTTP request timeout for LLM calls (seconds)

    LLM_PARSE_MAX_RETRIES: int = (
        3  # Max retries for JSON/validation errors (total attempts = retries + 1)
    )
    LLM_STRUCTURED_RESPONSE_TIMEOUT: int = (
        300  # Timeout in seconds for structured responses (5 minutes)
    )

    # Diarization
    # backends: modal — HTTP API client, pyannote — in-process pyannote.audio
    DIARIZATION_ENABLED: bool = True
    DIARIZATION_BACKEND: str = "modal"
    DIARIZATION_URL: str | None = None
    DIARIZATION_FILE_TIMEOUT: int = 600

    # Diarization: modal backend
    DIARIZATION_MODAL_API_KEY: str | None = None

    # Audio Padding
    # backends:
    #   - pyav: in-process PyAV padding (no HTTP, runs in same process)
    #   - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
    PADDING_BACKEND: str = "pyav"
    PADDING_URL: str | None = None
    PADDING_MODAL_API_KEY: str | None = None

    # Audio Mixdown
    # backends:
    #   - pyav: in-process PyAV mixdown (no HTTP, runs in same process)
    #   - modal: HTTP API client (works with Modal.com OR self-hosted gpu/self_hosted/)
    MIXDOWN_BACKEND: str = "pyav"
    MIXDOWN_URL: str | None = None
    MIXDOWN_MODAL_API_KEY: str | None = None

    # Sentry
    SENTRY_DSN: str | None = None

    # User authentication (none, jwt, password)
    AUTH_BACKEND: str = "none"

    # User authentication using JWT
    AUTH_JWT_ALGORITHM: str = "RS256"
    AUTH_JWT_PUBLIC_KEY: str | None = "authentik.monadical.com_public.pem"
    AUTH_JWT_AUDIENCE: str | None = None

    # User authentication using password (selfhosted)
    ADMIN_EMAIL: str | None = None
    ADMIN_PASSWORD_HASH: str | None = None

    PUBLIC_MODE: bool = False
    PUBLIC_DATA_RETENTION_DAYS: PositiveInt = 7

    # Min transcript length to generate topic + summary
    MIN_TRANSCRIPT_LENGTH: int = 750

    # Celery
    CELERY_BROKER_URL: str = "redis://localhost:6379/1"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"

    # Redis
    REDIS_HOST: str = "localhost"
    REDIS_PORT: int = 6379
    REDIS_CACHE_DB: int = 2

    # Secret key
    SECRET_KEY: str = "changeme-f02f86fd8b3e4fd892c6043e5a298e21"

    # Current hosting/domain
    BASE_URL: str = "http://localhost:1250"

    # Profiling
    PROFILING: bool = False

    # Healthcheck
    HEALTHCHECK_URL: str | None = None

    # Whereby integration
    WHEREBY_API_URL: str = "https://api.whereby.dev/v1"
    WHEREBY_API_KEY: NonEmptyString | None = None
    WHEREBY_WEBHOOK_SECRET: str | None = None
    AWS_PROCESS_RECORDING_QUEUE_URL: str | None = None
    SQS_POLLING_TIMEOUT_SECONDS: int = 60
    CELERY_BEAT_POLL_INTERVAL: int = (
        0  # 0 = use individual defaults; set e.g. 300 for 5-min polling
    )

    # Daily.co integration
    DAILY_API_URL: str = "https://api.daily.co/v1"
    DAILY_API_KEY: str | None = None
    DAILY_WEBHOOK_SECRET: str | None = None
    DAILY_SUBDOMAIN: str | None = None
    DAILY_WEBHOOK_UUID: str | None = (
        None  # Webhook UUID for this environment. Not used by production code
    )

    # LiveKit integration (self-hosted open-source video platform)
    LIVEKIT_URL: str | None = (
        None  # e.g. ws://livekit:7880 (internal) or wss://livekit.example.com
    )
    LIVEKIT_API_KEY: str | None = None
    LIVEKIT_API_SECRET: str | None = None
    LIVEKIT_WEBHOOK_SECRET: str | None = None  # Defaults to API_SECRET if not set
    # LiveKit egress S3 storage (Track Egress writes per-participant audio here)
    LIVEKIT_STORAGE_AWS_BUCKET_NAME: str | None = None
    LIVEKIT_STORAGE_AWS_REGION: str | None = None
    LIVEKIT_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
    LIVEKIT_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
    LIVEKIT_STORAGE_AWS_ENDPOINT_URL: str | None = None  # For Garage/MinIO
    # Public URL for LiveKit (used in frontend room_url, e.g. wss://livekit.example.com)
    LIVEKIT_PUBLIC_URL: str | None = None

    # Platform Configuration
    DEFAULT_VIDEO_PLATFORM: Platform = DAILY_PLATFORM

    # Zulip integration
    ZULIP_REALM: str | None = None
    ZULIP_API_KEY: str | None = None
    ZULIP_BOT_EMAIL: str | None = None
    ZULIP_DAG_STREAM: str | None = None
    ZULIP_DAG_TOPIC: str | None = None

    # Email / SMTP integration (for transcript email notifications)
    SMTP_HOST: str | None = None
    SMTP_PORT: int = 587
    SMTP_USERNAME: str | None = None
    SMTP_PASSWORD: str | None = None
    SMTP_FROM_EMAIL: str | None = None
    SMTP_USE_TLS: bool = True

    # Hatchet workflow orchestration (always enabled for multitrack processing)
    HATCHET_CLIENT_TOKEN: str | None = None
    HATCHET_CLIENT_TLS_STRATEGY: str = "none"  # none, tls, mtls
    HATCHET_DEBUG: bool = False


settings = Settings()