mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
* feat: improve pipeline threading, and transcriber (parakeet and silero vad) * refactor: remove whisperx, implement parakeet * refactor: make audio_chunker more smart and wait for speech, instead of fixed frame * refactor: make audio merge to always downscale the audio to 16k for transcription * refactor: make the audio transcript modal accepting batches * refactor: improve type safety and remove prometheus metrics - Add DiarizationSegment TypedDict for proper diarization typing - Replace List/Optional with modern Python list/| None syntax - Remove all Prometheus metrics from TranscriptDiarizationAssemblerProcessor - Add comprehensive file processing pipeline with parallel execution - Update processor imports and type annotations throughout - Implement optimized file pipeline as default in process.py tool * refactor: convert FileDiarizationProcessor I/O types to BaseModel Update FileDiarizationInput and FileDiarizationOutput to inherit from BaseModel instead of plain classes, following the standard pattern used by other processors in the codebase. * test: add tests for file transcript and diarization with pytest-recording * build: add pytest-recording * feat: add local pyannote for testing * fix: replace PyAV AudioResampler with torchaudio for reliable audio processing - Replace problematic PyAV AudioResampler that was causing ValueError: [Errno 22] Invalid argument - Use torchaudio.functional.resample for robust sample rate conversion - Optimize processing: skip conversion for already 16kHz mono audio - Add direct WAV writing with Python wave module for better performance - Consolidate duplicate downsample checks for cleaner code - Maintain list[av.AudioFrame] input interface - Required for Silero VAD which needs 16kHz mono audio * fix: replace PyAV AudioResampler with torchaudio solution - Resolves ValueError: [Errno 22] Invalid argument in AudioMergeProcessor - Replaces problematic PyAV AudioResampler with torchaudio.functional.resample - Optimizes processing to skip unnecessary conversions when audio is already 16kHz mono - Uses direct WAV writing with Python's wave module for better performance - Fixes test_basic_process to disable diarization (pyannote dependency not installed) - Updates test expectations to match actual processor behavior - Removes unused pydub dependency from pyproject.toml - Adds comprehensive TEST_ANALYSIS.md documenting test suite status * feat: add parameterized test for both diarization modes - Adds @pytest.mark.parametrize to test_basic_process with enable_diarization=[False, True] - Test with diarization=False always passes (tests core AudioMergeProcessor functionality) - Test with diarization=True gracefully skips when pyannote.audio is not installed - Provides comprehensive test coverage for both pipeline configurations * fix: resolve pipeline property naming conflict in AudioDiarizationPyannoteProcessor - Renames 'pipeline' property to 'diarization_pipeline' to avoid conflict with base Processor.pipeline attribute - Fixes AttributeError: 'property 'pipeline' object has no setter' when set_pipeline() is called - Updates property usage in _diarize method to use new name - Now correctly supports pipeline initialization for diarization processing * fix: add local for pyannote * test: add diarization test * fix: resample on audio merge now working * fix: correctly restore timestamp * fix: display exception in a threaded processor if that happen * Update pyproject.toml * ci: remove option * ci: update astral-sh/setup-uv * test: add monadical url for pytest-recording * refactor: remove previous version * build: move faster whisper to local dep * test: fix missing import * refactor: improve main_file_pipeline organization and error handling - Move all imports to the top of the file - Create unified EmptyPipeline class to replace duplicate mock pipeline code - Remove timeout and fallback logic - let processors handle their own retries - Fix error handling to raise any exception from parallel tasks - Add proper type hints and validation for captured results * fix: wrong function * fix: remove task_done * feat: add configurable file processing timeouts for modal processors - Add TRANSCRIPT_FILE_TIMEOUT setting (default: 600s) for file transcription - Add DIARIZATION_FILE_TIMEOUT setting (default: 600s) for file diarization - Replace hardcoded timeout=600 with configurable settings in modal processors - Allows customization of timeout values via environment variables * fix: use logger * fix: worker process meetings now use file pipeline * fix: topic not gathered * refactor: remove prepare(), pipeline now work * refactor: implement many review from Igor * test: add test for test_pipeline_main_file * refactor: remove doc * doc: add doc * ci: update build to use native arm64 builder * fix: merge fixes * refactor: changes from Igor review + add test (not by default) to test gpu modal part * ci: update to our own runner linux-amd64 * ci: try using suggested mode=min * fix: update diarizer for latest modal, and use volume * fix: modal file extension detection * fix: put the diarizer as A100
133 lines
3.7 KiB
Python
133 lines
3.7 KiB
Python
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
extra="ignore",
|
|
)
|
|
|
|
# CORS
|
|
UI_BASE_URL: str = "http://localhost:3000"
|
|
CORS_ORIGIN: str = "*"
|
|
CORS_ALLOW_CREDENTIALS: bool = False
|
|
|
|
# Database
|
|
DATABASE_URL: str = (
|
|
"postgresql+asyncpg://reflector:reflector@localhost:5432/reflector"
|
|
)
|
|
|
|
# local data directory
|
|
DATA_DIR: str = "./data"
|
|
|
|
# Audio Transcription
|
|
# backends: whisper, modal
|
|
TRANSCRIPT_BACKEND: str = "whisper"
|
|
TRANSCRIPT_URL: str | None = None
|
|
TRANSCRIPT_TIMEOUT: int = 90
|
|
TRANSCRIPT_FILE_TIMEOUT: int = 600
|
|
|
|
# Audio Transcription: modal backend
|
|
TRANSCRIPT_MODAL_API_KEY: str | None = None
|
|
|
|
# Audio transcription storage
|
|
TRANSCRIPT_STORAGE_BACKEND: str | None = None
|
|
|
|
# Storage configuration for AWS
|
|
TRANSCRIPT_STORAGE_AWS_BUCKET_NAME: str = "reflector-bucket"
|
|
TRANSCRIPT_STORAGE_AWS_REGION: str = "us-east-1"
|
|
TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
|
|
TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
|
|
|
|
# Recording storage
|
|
RECORDING_STORAGE_BACKEND: str | None = None
|
|
|
|
# Recording storage configuration for AWS
|
|
RECORDING_STORAGE_AWS_BUCKET_NAME: str = "recording-bucket"
|
|
RECORDING_STORAGE_AWS_REGION: str = "us-east-1"
|
|
RECORDING_STORAGE_AWS_ACCESS_KEY_ID: str | None = None
|
|
RECORDING_STORAGE_AWS_SECRET_ACCESS_KEY: str | None = None
|
|
|
|
# Translate into the target language
|
|
TRANSLATION_BACKEND: str = "passthrough"
|
|
TRANSLATE_URL: str | None = None
|
|
TRANSLATE_TIMEOUT: int = 90
|
|
|
|
# Translation: modal backend
|
|
TRANSLATE_MODAL_API_KEY: str | None = None
|
|
|
|
# LLM
|
|
LLM_MODEL: str = "microsoft/phi-4"
|
|
LLM_URL: str | None = None
|
|
LLM_API_KEY: str | None = None
|
|
LLM_CONTEXT_WINDOW: int = 16000
|
|
|
|
# Diarization
|
|
DIARIZATION_ENABLED: bool = True
|
|
DIARIZATION_BACKEND: str = "modal"
|
|
DIARIZATION_URL: str | None = None
|
|
DIARIZATION_FILE_TIMEOUT: int = 600
|
|
|
|
# Diarization: modal backend
|
|
DIARIZATION_MODAL_API_KEY: str | None = None
|
|
|
|
# Diarization: local pyannote.audio
|
|
DIARIZATION_PYANNOTE_AUTH_TOKEN: str | None = None
|
|
|
|
# Sentry
|
|
SENTRY_DSN: str | None = None
|
|
|
|
# User authentication (none, jwt)
|
|
AUTH_BACKEND: str = "none"
|
|
|
|
# User authentication using JWT
|
|
AUTH_JWT_ALGORITHM: str = "RS256"
|
|
AUTH_JWT_PUBLIC_KEY: str | None = "authentik.monadical.com_public.pem"
|
|
AUTH_JWT_AUDIENCE: str | None = None
|
|
|
|
# API public mode
|
|
# if set, all anonymous record will be public
|
|
PUBLIC_MODE: bool = False
|
|
|
|
# Min transcript length to generate topic + summary
|
|
MIN_TRANSCRIPT_LENGTH: int = 750
|
|
|
|
# Celery
|
|
CELERY_BROKER_URL: str = "redis://localhost:6379/1"
|
|
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
|
|
|
# Redis
|
|
REDIS_HOST: str = "localhost"
|
|
REDIS_PORT: int = 6379
|
|
REDIS_CACHE_DB: int = 2
|
|
|
|
# Secret key
|
|
SECRET_KEY: str = "changeme-f02f86fd8b3e4fd892c6043e5a298e21"
|
|
|
|
# Current hosting/domain
|
|
BASE_URL: str = "http://localhost:1250"
|
|
|
|
# Profiling
|
|
PROFILING: bool = False
|
|
|
|
# Healthcheck
|
|
HEALTHCHECK_URL: str | None = None
|
|
|
|
# Whereby integration
|
|
WHEREBY_API_URL: str = "https://api.whereby.dev/v1"
|
|
WHEREBY_API_KEY: str | None = None
|
|
WHEREBY_WEBHOOK_SECRET: str | None = None
|
|
AWS_WHEREBY_ACCESS_KEY_ID: str | None = None
|
|
AWS_WHEREBY_ACCESS_KEY_SECRET: str | None = None
|
|
AWS_PROCESS_RECORDING_QUEUE_URL: str | None = None
|
|
SQS_POLLING_TIMEOUT_SECONDS: int = 60
|
|
|
|
# Zulip integration
|
|
ZULIP_REALM: str | None = None
|
|
ZULIP_API_KEY: str | None = None
|
|
ZULIP_BOT_EMAIL: str | None = None
|
|
|
|
|
|
settings = Settings()
|