Files
reflector/server/tests/conftest.py
Sergey Mankovsky 9a71af145e fix: update transcript list on reprocess (#676)
* Update transcript list on reprocess

* Fix transcript create

* Fix multiple sockets issue

* Pass token in sec websocket protocol

* userEvent parse example

* transcript list invalidation non-abstraction

* Emit only relevant events to the user room

* Add ws close code const

* Refactor user websocket endpoint

* Refactor user events provider

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2025-10-07 19:11:30 +02:00

567 lines
18 KiB
Python

import os
from contextlib import asynccontextmanager
from tempfile import NamedTemporaryFile
from unittest.mock import patch
import pytest
@pytest.fixture(scope="session", autouse=True)
def settings_configuration():
# theses settings are linked to monadical for pytest-recording
# if a fork is done, they have to provide their own url when cassettes needs to be updated
# modal api keys has to be defined by the user
from reflector.settings import settings
settings.TRANSCRIPT_BACKEND = "modal"
settings.TRANSCRIPT_URL = (
"https://monadical-sas--reflector-transcriber-parakeet-web.modal.run"
)
settings.DIARIZATION_BACKEND = "modal"
settings.DIARIZATION_URL = "https://monadical-sas--reflector-diarizer-web.modal.run"
@pytest.fixture(scope="module")
def vcr_config():
"""VCR configuration to filter sensitive headers"""
return {
"filter_headers": [("authorization", "DUMMY_API_KEY")],
}
@pytest.fixture(scope="session")
def docker_compose_file(pytestconfig):
return os.path.join(str(pytestconfig.rootdir), "tests", "docker-compose.test.yml")
@pytest.fixture(scope="session")
def postgres_service(docker_ip, docker_services):
"""Ensure that PostgreSQL service is up and responsive."""
port = docker_services.port_for("postgres_test", 5432)
def is_responsive():
try:
import psycopg2
conn = psycopg2.connect(
host=docker_ip,
port=port,
dbname="reflector_test",
user="test_user",
password="test_password",
)
conn.close()
return True
except Exception:
return False
docker_services.wait_until_responsive(timeout=30.0, pause=0.1, check=is_responsive)
# Return connection parameters
return {
"host": docker_ip,
"port": port,
"dbname": "reflector_test",
"user": "test_user",
"password": "test_password",
}
@pytest.fixture(scope="function", autouse=True)
@pytest.mark.asyncio
async def setup_database(postgres_service):
from reflector.db import engine, metadata, get_database # noqa
metadata.drop_all(bind=engine)
metadata.create_all(bind=engine)
database = get_database()
try:
await database.connect()
yield
finally:
await database.disconnect()
@pytest.fixture
def dummy_processors():
with (
patch(
"reflector.processors.transcript_topic_detector.TranscriptTopicDetectorProcessor.get_topic"
) as mock_topic,
patch(
"reflector.processors.transcript_final_title.TranscriptFinalTitleProcessor.get_title"
) as mock_title,
patch(
"reflector.processors.transcript_final_summary.TranscriptFinalSummaryProcessor.get_long_summary"
) as mock_long_summary,
patch(
"reflector.processors.transcript_final_summary.TranscriptFinalSummaryProcessor.get_short_summary"
) as mock_short_summary,
):
from reflector.processors.transcript_topic_detector import TopicResponse
mock_topic.return_value = TopicResponse(
title="LLM TITLE", summary="LLM SUMMARY"
)
mock_title.return_value = "LLM Title"
mock_long_summary.return_value = "LLM LONG SUMMARY"
mock_short_summary.return_value = "LLM SHORT SUMMARY"
yield (
mock_topic,
mock_title,
mock_long_summary,
mock_short_summary,
) # noqa
@pytest.fixture
async def whisper_transcript():
from reflector.processors.audio_transcript_whisper import (
AudioTranscriptWhisperProcessor,
)
with patch(
"reflector.processors.audio_transcript_auto"
".AudioTranscriptAutoProcessor.__new__"
) as mock_audio:
mock_audio.return_value = AudioTranscriptWhisperProcessor()
yield
@pytest.fixture
async def dummy_transcript():
from reflector.processors.audio_transcript import AudioTranscriptProcessor
from reflector.processors.types import AudioFile, Transcript, Word
class TestAudioTranscriptProcessor(AudioTranscriptProcessor):
_time_idx = 0
async def _transcript(self, data: AudioFile):
i = self._time_idx
self._time_idx += 2
return Transcript(
text="Hello world.",
words=[
Word(start=i, end=i + 1, text="Hello", speaker=0),
Word(start=i + 1, end=i + 2, text=" world.", speaker=0),
],
)
with patch(
"reflector.processors.audio_transcript_auto"
".AudioTranscriptAutoProcessor.__new__"
) as mock_audio:
mock_audio.return_value = TestAudioTranscriptProcessor()
yield
@pytest.fixture
async def dummy_diarization():
from reflector.processors.audio_diarization import AudioDiarizationProcessor
class TestAudioDiarizationProcessor(AudioDiarizationProcessor):
_time_idx = 0
async def _diarize(self, data):
i = self._time_idx
self._time_idx += 2
return [
{"start": i, "end": i + 1, "speaker": 0},
{"start": i + 1, "end": i + 2, "speaker": 1},
]
with patch(
"reflector.processors.audio_diarization_auto"
".AudioDiarizationAutoProcessor.__new__"
) as mock_audio:
mock_audio.return_value = TestAudioDiarizationProcessor()
yield
@pytest.fixture
async def dummy_file_transcript():
from reflector.processors.file_transcript import FileTranscriptProcessor
from reflector.processors.types import Transcript, Word
class TestFileTranscriptProcessor(FileTranscriptProcessor):
async def _transcript(self, data):
return Transcript(
text="Hello world. How are you today?",
words=[
Word(start=0.0, end=0.5, text="Hello", speaker=0),
Word(start=0.5, end=0.6, text=" ", speaker=0),
Word(start=0.6, end=1.0, text="world", speaker=0),
Word(start=1.0, end=1.1, text=".", speaker=0),
Word(start=1.1, end=1.2, text=" ", speaker=0),
Word(start=1.2, end=1.5, text="How", speaker=0),
Word(start=1.5, end=1.6, text=" ", speaker=0),
Word(start=1.6, end=1.8, text="are", speaker=0),
Word(start=1.8, end=1.9, text=" ", speaker=0),
Word(start=1.9, end=2.1, text="you", speaker=0),
Word(start=2.1, end=2.2, text=" ", speaker=0),
Word(start=2.2, end=2.5, text="today", speaker=0),
Word(start=2.5, end=2.6, text="?", speaker=0),
],
)
with patch(
"reflector.processors.file_transcript_auto.FileTranscriptAutoProcessor.__new__"
) as mock_auto:
mock_auto.return_value = TestFileTranscriptProcessor()
yield
@pytest.fixture
async def dummy_file_diarization():
from reflector.processors.file_diarization import (
FileDiarizationOutput,
FileDiarizationProcessor,
)
from reflector.processors.types import DiarizationSegment
class TestFileDiarizationProcessor(FileDiarizationProcessor):
async def _diarize(self, data):
return FileDiarizationOutput(
diarization=[
DiarizationSegment(start=0.0, end=1.1, speaker=0),
DiarizationSegment(start=1.2, end=2.6, speaker=1),
]
)
with patch(
"reflector.processors.file_diarization_auto.FileDiarizationAutoProcessor.__new__"
) as mock_auto:
mock_auto.return_value = TestFileDiarizationProcessor()
yield
@pytest.fixture
async def dummy_transcript_translator():
from reflector.processors.transcript_translator import TranscriptTranslatorProcessor
class TestTranscriptTranslatorProcessor(TranscriptTranslatorProcessor):
async def _translate(self, text: str) -> str:
source_language = self.get_pref("audio:source_language", "en")
target_language = self.get_pref("audio:target_language", "en")
return f"{source_language}:{target_language}:{text}"
def mock_new(cls, *args, **kwargs):
return TestTranscriptTranslatorProcessor(*args, **kwargs)
with patch(
"reflector.processors.transcript_translator_auto"
".TranscriptTranslatorAutoProcessor.__new__",
mock_new,
):
yield
@pytest.fixture
async def dummy_llm():
from reflector.llm import LLM
class TestLLM(LLM):
def __init__(self):
self.model_name = "DUMMY MODEL"
self.llm_tokenizer = "DUMMY TOKENIZER"
# LLM doesn't have get_instance anymore, mocking constructor instead
with patch("reflector.llm.LLM") as mock_llm:
mock_llm.return_value = TestLLM()
yield
@pytest.fixture
async def dummy_storage():
from reflector.storage.base import Storage
class DummyStorage(Storage):
async def _put_file(self, *args, **kwargs):
pass
async def _delete_file(self, *args, **kwargs):
pass
async def _get_file_url(self, *args, **kwargs):
return "http://fake_server/audio.mp3"
async def _get_file(self, *args, **kwargs):
from pathlib import Path
test_mp3 = Path(__file__).parent / "records" / "test_mathieu_hello.mp3"
return test_mp3.read_bytes()
dummy = DummyStorage()
with (
patch("reflector.storage.base.Storage.get_instance") as mock_storage,
patch("reflector.storage.get_transcripts_storage") as mock_get_transcripts,
patch(
"reflector.pipelines.main_file_pipeline.get_transcripts_storage"
) as mock_get_transcripts2,
):
mock_storage.return_value = dummy
mock_get_transcripts.return_value = dummy
mock_get_transcripts2.return_value = dummy
yield
@pytest.fixture(scope="session")
def celery_enable_logging():
return True
@pytest.fixture(scope="session")
def celery_config():
with NamedTemporaryFile() as f:
yield {
"broker_url": "memory://",
"result_backend": f"db+sqlite:///{f.name}",
}
@pytest.fixture(scope="session")
def celery_includes():
return [
"reflector.pipelines.main_live_pipeline",
"reflector.pipelines.main_file_pipeline",
]
@pytest.fixture
async def client():
from httpx import AsyncClient
from reflector.app import app
async with AsyncClient(app=app, base_url="http://test/v1") as ac:
yield ac
@pytest.fixture(autouse=True)
async def ws_manager_in_memory(monkeypatch):
"""Replace Redis-based WS manager with an in-memory implementation for tests."""
import asyncio
import json
from reflector.ws_manager import WebsocketManager
class _InMemorySubscriber:
def __init__(self, queue: asyncio.Queue):
self.queue = queue
async def get_message(self, ignore_subscribe_messages: bool = True):
try:
return await asyncio.wait_for(self.queue.get(), timeout=0.05)
except Exception:
return None
class InMemoryPubSubManager:
def __init__(self):
self.queues: dict[str, asyncio.Queue] = {}
self.connected = False
async def connect(self) -> None:
self.connected = True
async def disconnect(self) -> None:
self.connected = False
async def send_json(self, room_id: str, message: dict) -> None:
if room_id not in self.queues:
self.queues[room_id] = asyncio.Queue()
payload = json.dumps(message).encode("utf-8")
await self.queues[room_id].put(
{"channel": room_id.encode("utf-8"), "data": payload}
)
async def subscribe(self, room_id: str):
if room_id not in self.queues:
self.queues[room_id] = asyncio.Queue()
return _InMemorySubscriber(self.queues[room_id])
async def unsubscribe(self, room_id: str) -> None:
# keep queue for potential later resubscribe within same test
pass
pubsub = InMemoryPubSubManager()
ws_manager = WebsocketManager(pubsub_client=pubsub)
def _get_ws_manager():
return ws_manager
# Patch all places that imported get_ws_manager at import time
monkeypatch.setattr("reflector.ws_manager.get_ws_manager", _get_ws_manager)
monkeypatch.setattr(
"reflector.pipelines.main_live_pipeline.get_ws_manager", _get_ws_manager
)
monkeypatch.setattr(
"reflector.views.transcripts_websocket.get_ws_manager", _get_ws_manager
)
monkeypatch.setattr(
"reflector.views.user_websocket.get_ws_manager", _get_ws_manager
)
monkeypatch.setattr("reflector.views.transcripts.get_ws_manager", _get_ws_manager)
# Websocket auth: avoid OAuth2 on websocket dependencies; allow anonymous
import reflector.auth as auth
# Ensure FastAPI uses our override for routes that captured the original callable
from reflector.app import app as fastapi_app
try:
fastapi_app.dependency_overrides[auth.current_user_optional] = lambda: None
except Exception:
pass
# Stub Redis cache used by profanity filter to avoid external Redis
from reflector import redis_cache as rc
class _FakeRedis:
def __init__(self):
self._data = {}
def get(self, key):
value = self._data.get(key)
if value is None:
return None
if isinstance(value, bytes):
return value
return str(value).encode("utf-8")
def setex(self, key, duration, value):
# ignore duration for tests
if isinstance(value, bytes):
self._data[key] = value
else:
self._data[key] = str(value).encode("utf-8")
fake_redises: dict[int, _FakeRedis] = {}
def _get_redis_client(db=0):
if db not in fake_redises:
fake_redises[db] = _FakeRedis()
return fake_redises[db]
monkeypatch.setattr(rc, "get_redis_client", _get_redis_client)
yield
@pytest.fixture
@pytest.mark.asyncio
async def authenticated_client():
async with authenticated_client_ctx():
yield
@pytest.fixture
@pytest.mark.asyncio
async def authenticated_client2():
async with authenticated_client2_ctx():
yield
@asynccontextmanager
async def authenticated_client_ctx():
from reflector.app import app
from reflector.auth import current_user, current_user_optional
app.dependency_overrides[current_user] = lambda: {
"sub": "randomuserid",
"email": "test@mail.com",
}
app.dependency_overrides[current_user_optional] = lambda: {
"sub": "randomuserid",
"email": "test@mail.com",
}
yield
del app.dependency_overrides[current_user]
del app.dependency_overrides[current_user_optional]
@asynccontextmanager
async def authenticated_client2_ctx():
from reflector.app import app
from reflector.auth import current_user, current_user_optional
app.dependency_overrides[current_user] = lambda: {
"sub": "randomuserid2",
"email": "test@mail.com",
}
app.dependency_overrides[current_user_optional] = lambda: {
"sub": "randomuserid2",
"email": "test@mail.com",
}
yield
del app.dependency_overrides[current_user]
del app.dependency_overrides[current_user_optional]
@pytest.fixture(scope="session")
def fake_mp3_upload():
with patch(
"reflector.db.transcripts.TranscriptController.move_mp3_to_storage"
) as mock_move:
mock_move.return_value = True
yield
@pytest.fixture
async def fake_transcript_with_topics(tmpdir, client):
import shutil
from pathlib import Path
from reflector.db.transcripts import TranscriptTopic
from reflector.processors.types import Word
from reflector.settings import settings
from reflector.views.transcripts import transcripts_controller
settings.DATA_DIR = Path(tmpdir)
# create a transcript
response = await client.post("/transcripts", json={"name": "Test audio download"})
assert response.status_code == 200
tid = response.json()["id"]
transcript = await transcripts_controller.get_by_id(tid)
assert transcript is not None
await transcripts_controller.update(transcript, {"status": "ended"})
# manually copy a file at the expected location
audio_filename = transcript.audio_mp3_filename
path = Path(__file__).parent / "records" / "test_mathieu_hello.mp3"
audio_filename.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(path, audio_filename)
# create some topics
await transcripts_controller.upsert_topic(
transcript,
TranscriptTopic(
title="Topic 1",
summary="Topic 1 summary",
timestamp=0,
transcript="Hello world",
words=[
Word(text="Hello", start=0, end=1, speaker=0),
Word(text="world", start=1, end=2, speaker=0),
],
),
)
await transcripts_controller.upsert_topic(
transcript,
TranscriptTopic(
title="Topic 2",
summary="Topic 2 summary",
timestamp=2,
transcript="Hello world",
words=[
Word(text="Hello", start=2, end=3, speaker=0),
Word(text="world", start=3, end=4, speaker=0),
],
),
)
yield transcript