mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
* feat: remove support of sqlite, 100% postgres * fix: more migration and make datetime timezone aware in postgres * fix: change how database is get, and use contextvar to have difference instance between different loops * test: properly use client fixture that handle lifetime/database connection * fix: add missing client fixture parameters to test functions This commit fixes NameError issues where test functions were trying to use the 'client' fixture but didn't have it as a parameter. The changes include: 1. Added 'client' parameter to test functions in: - test_transcripts_audio_download.py (6 functions including fixture) - test_transcripts_speaker.py (3 functions) - test_transcripts_upload.py (1 function) - test_transcripts_rtc_ws.py (2 functions + appserver fixture) 2. Resolved naming conflicts in test_transcripts_rtc_ws.py where both HTTP client and StreamClient were using variable name 'client'. StreamClient instances are now named 'stream_client' to avoid conflicts. 3. Added missing 'from reflector.app import app' import in rtc_ws tests. Background: Previously implemented contextvars solution with get_database() function resolves asyncio event loop conflicts in Celery tasks. The global client fixture was also created to replace manual AsyncClient instances, ensuring proper FastAPI application lifecycle management and database connections during tests. All tests now pass except for 2 pre-existing RTC WebSocket test failures related to asyncpg connection issues unrelated to these fixes. * fix: ensure task are correctly closed * fix: make separate event loop for the live server * fix: make default settings pointing at postgres * build: remove pytest-docker deps out of dev, just tests group
145 lines
5.1 KiB
Python
145 lines
5.1 KiB
Python
"""Tests for full-text search functionality."""
|
|
|
|
import json
|
|
from datetime import datetime, timezone
|
|
|
|
import pytest
|
|
from pydantic import ValidationError
|
|
|
|
from reflector.db import get_database
|
|
from reflector.db.search import SearchParameters, search_controller
|
|
from reflector.db.transcripts import transcripts
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_postgresql_only():
|
|
params = SearchParameters(query_text="any query here")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert results == []
|
|
assert total == 0
|
|
|
|
try:
|
|
SearchParameters(query_text="")
|
|
assert False, "Should have raised validation error"
|
|
except ValidationError:
|
|
pass # Expected
|
|
|
|
# Test that whitespace query raises validation error
|
|
try:
|
|
SearchParameters(query_text=" ")
|
|
assert False, "Should have raised validation error"
|
|
except ValidationError:
|
|
pass # Expected
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_input_validation():
|
|
try:
|
|
SearchParameters(query_text="")
|
|
assert False, "Should have raised ValidationError"
|
|
except ValidationError:
|
|
pass # Expected
|
|
|
|
# Test that whitespace query raises validation error
|
|
try:
|
|
SearchParameters(query_text=" \t\n ")
|
|
assert False, "Should have raised ValidationError"
|
|
except ValidationError:
|
|
pass # Expected
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_postgresql_search_with_data():
|
|
# collision is improbable
|
|
test_id = "test-search-e2e-7f3a9b2c"
|
|
|
|
try:
|
|
await get_database().execute(
|
|
transcripts.delete().where(transcripts.c.id == test_id)
|
|
)
|
|
|
|
test_data = {
|
|
"id": test_id,
|
|
"name": "Test Search Transcript",
|
|
"title": "Engineering Planning Meeting Q4 2024",
|
|
"status": "completed",
|
|
"locked": False,
|
|
"duration": 1800.0,
|
|
"created_at": datetime.now(timezone.utc),
|
|
"short_summary": "Team discussed search implementation",
|
|
"long_summary": "The engineering team met to plan the search feature",
|
|
"topics": json.dumps([]),
|
|
"events": json.dumps([]),
|
|
"participants": json.dumps([]),
|
|
"source_language": "en",
|
|
"target_language": "en",
|
|
"reviewed": False,
|
|
"audio_location": "local",
|
|
"share_mode": "private",
|
|
"source_kind": "room",
|
|
"webvtt": """WEBVTT
|
|
|
|
00:00:00.000 --> 00:00:10.000
|
|
Welcome to our engineering planning meeting for Q4 2024.
|
|
|
|
00:00:10.000 --> 00:00:20.000
|
|
Today we'll discuss the implementation of full-text search.
|
|
|
|
00:00:20.000 --> 00:00:30.000
|
|
The search feature should support complex queries with ranking.
|
|
|
|
00:00:30.000 --> 00:00:40.000
|
|
We need to implement PostgreSQL tsvector for better performance.""",
|
|
}
|
|
|
|
await get_database().execute(transcripts.insert().values(**test_data))
|
|
|
|
# Test 1: Search for a word in title
|
|
params = SearchParameters(query_text="planning")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by title word"
|
|
|
|
# Test 2: Search for a word in webvtt content
|
|
params = SearchParameters(query_text="tsvector")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by webvtt content"
|
|
|
|
# Test 3: Search with multiple words
|
|
params = SearchParameters(query_text="engineering planning")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by multiple words"
|
|
|
|
# Test 4: Verify SearchResult structure
|
|
test_result = next((r for r in results if r.id == test_id), None)
|
|
if test_result:
|
|
assert test_result.title == "Engineering Planning Meeting Q4 2024"
|
|
assert test_result.status == "completed"
|
|
assert test_result.duration == 1800.0
|
|
assert 0 <= test_result.rank <= 1, "Rank should be normalized to 0-1"
|
|
|
|
# Test 5: Search with OR operator
|
|
params = SearchParameters(query_text="tsvector OR nosuchword")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript with OR query"
|
|
|
|
# Test 6: Quoted phrase search
|
|
params = SearchParameters(query_text='"full-text search"')
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by exact phrase"
|
|
|
|
finally:
|
|
await get_database().execute(
|
|
transcripts.delete().where(transcripts.c.id == test_id)
|
|
)
|
|
await get_database().disconnect()
|