mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
feat: search backend (#537)
* docs: transient docs * chore: cleanup * webvtt WIP * webvtt field * chore: webvtt tests comments * chore: remove useless tests * feat: search TASK.md * feat: full text search by title/webvtt * chore: search api task * feat: search api * feat: search API * chore: rm task md * chore: roll back unnecessary validators * chore: pr review WIP * chore: pr review WIP * chore: pr review * chore: top imports * feat: better lint + ci * feat: better lint + ci * feat: better lint + ci * feat: better lint + ci * chore: lint * chore: lint * fix: db datetime definitions * fix: flush() params * fix: update transcript mutability expectation / test * fix: update transcript mutability expectation / test * chore: auto review * chore: new controller extraction * chore: new controller extraction * chore: cleanup * chore: review WIP * chore: pr WIP * chore: remove ci lint * chore: openapi regeneration * chore: openapi regeneration * chore: postgres test doc * fix: .dockerignore for arm binaries * fix: .dockerignore for arm binaries * fix: cap test loops * fix: cap test loops * fix: cap test loops * fix: get_transcript_topics * chore: remove flow.md docs and claude guidance * chore: remove claude.md db doc * chore: remove claude.md db doc * chore: remove claude.md db doc * chore: remove claude.md db doc
This commit is contained in:
163
server/tests/test_search.py
Normal file
163
server/tests/test_search.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""Tests for full-text search functionality."""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from reflector.db import database
|
||||
from reflector.db.search import SearchParameters, search_controller
|
||||
from reflector.db.transcripts import transcripts
|
||||
from reflector.db.utils import is_postgresql
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_postgresql_only():
|
||||
await database.connect()
|
||||
|
||||
try:
|
||||
params = SearchParameters(query_text="any query here")
|
||||
results, total = await search_controller.search_transcripts(params)
|
||||
assert results == []
|
||||
assert total == 0
|
||||
|
||||
try:
|
||||
SearchParameters(query_text="")
|
||||
assert False, "Should have raised validation error"
|
||||
except ValidationError:
|
||||
pass # Expected
|
||||
|
||||
# Test that whitespace query raises validation error
|
||||
try:
|
||||
SearchParameters(query_text=" ")
|
||||
assert False, "Should have raised validation error"
|
||||
except ValidationError:
|
||||
pass # Expected
|
||||
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_input_validation():
|
||||
await database.connect()
|
||||
|
||||
try:
|
||||
try:
|
||||
SearchParameters(query_text="")
|
||||
assert False, "Should have raised ValidationError"
|
||||
except ValidationError:
|
||||
pass # Expected
|
||||
|
||||
# Test that whitespace query raises validation error
|
||||
try:
|
||||
SearchParameters(query_text=" \t\n ")
|
||||
assert False, "Should have raised ValidationError"
|
||||
except ValidationError:
|
||||
pass # Expected
|
||||
finally:
|
||||
await database.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_postgresql_search_with_data():
|
||||
"""Test full-text search with actual data in PostgreSQL.
|
||||
|
||||
Example how to run: DATABASE_URL=postgresql://reflector:reflector@localhost:5432/reflector_test uv run pytest tests/test_search.py::test_postgresql_search_with_data -v -p no:env
|
||||
"""
|
||||
# Skip if not PostgreSQL
|
||||
if not is_postgresql():
|
||||
pytest.skip("Test requires PostgreSQL. Set DATABASE_URL=postgresql://...")
|
||||
|
||||
await database.connect()
|
||||
|
||||
# collision is improbable
|
||||
test_id = "test-search-e2e-7f3a9b2c"
|
||||
|
||||
try:
|
||||
await database.execute(transcripts.delete().where(transcripts.c.id == test_id))
|
||||
|
||||
test_data = {
|
||||
"id": test_id,
|
||||
"name": "Test Search Transcript",
|
||||
"title": "Engineering Planning Meeting Q4 2024",
|
||||
"status": "completed",
|
||||
"locked": False,
|
||||
"duration": 1800.0,
|
||||
"created_at": datetime.now(),
|
||||
"short_summary": "Team discussed search implementation",
|
||||
"long_summary": "The engineering team met to plan the search feature",
|
||||
"topics": json.dumps([]),
|
||||
"events": json.dumps([]),
|
||||
"participants": json.dumps([]),
|
||||
"source_language": "en",
|
||||
"target_language": "en",
|
||||
"reviewed": False,
|
||||
"audio_location": "local",
|
||||
"share_mode": "private",
|
||||
"source_kind": "room",
|
||||
"webvtt": """WEBVTT
|
||||
|
||||
00:00:00.000 --> 00:00:10.000
|
||||
Welcome to our engineering planning meeting for Q4 2024.
|
||||
|
||||
00:00:10.000 --> 00:00:20.000
|
||||
Today we'll discuss the implementation of full-text search.
|
||||
|
||||
00:00:20.000 --> 00:00:30.000
|
||||
The search feature should support complex queries with ranking.
|
||||
|
||||
00:00:30.000 --> 00:00:40.000
|
||||
We need to implement PostgreSQL tsvector for better performance.""",
|
||||
}
|
||||
|
||||
await database.execute(transcripts.insert().values(**test_data))
|
||||
|
||||
# Test 1: Search for a word in title
|
||||
params = SearchParameters(query_text="planning")
|
||||
results, total = await search_controller.search_transcripts(params)
|
||||
assert total >= 1
|
||||
found = any(r.id == test_id for r in results)
|
||||
assert found, "Should find test transcript by title word"
|
||||
|
||||
# Test 2: Search for a word in webvtt content
|
||||
params = SearchParameters(query_text="tsvector")
|
||||
results, total = await search_controller.search_transcripts(params)
|
||||
assert total >= 1
|
||||
found = any(r.id == test_id for r in results)
|
||||
assert found, "Should find test transcript by webvtt content"
|
||||
|
||||
# Test 3: Search with multiple words
|
||||
params = SearchParameters(query_text="engineering planning")
|
||||
results, total = await search_controller.search_transcripts(params)
|
||||
assert total >= 1
|
||||
found = any(r.id == test_id for r in results)
|
||||
assert found, "Should find test transcript by multiple words"
|
||||
|
||||
# Test 4: Verify SearchResult structure
|
||||
test_result = next((r for r in results if r.id == test_id), None)
|
||||
if test_result:
|
||||
assert test_result.title == "Engineering Planning Meeting Q4 2024"
|
||||
assert test_result.status == "completed"
|
||||
assert test_result.duration == 1800.0
|
||||
assert test_result.source_kind == "room"
|
||||
assert 0 <= test_result.rank <= 1, "Rank should be normalized to 0-1"
|
||||
|
||||
# Test 5: Search with OR operator
|
||||
params = SearchParameters(query_text="tsvector OR nosuchword")
|
||||
results, total = await search_controller.search_transcripts(params)
|
||||
assert total >= 1
|
||||
found = any(r.id == test_id for r in results)
|
||||
assert found, "Should find test transcript with OR query"
|
||||
|
||||
# Test 6: Quoted phrase search
|
||||
params = SearchParameters(query_text='"full-text search"')
|
||||
results, total = await search_controller.search_transcripts(params)
|
||||
assert total >= 1
|
||||
found = any(r.id == test_id for r in results)
|
||||
assert found, "Should find test transcript by exact phrase"
|
||||
|
||||
finally:
|
||||
await database.execute(transcripts.delete().where(transcripts.c.id == test_id))
|
||||
await database.disconnect()
|
||||
Reference in New Issue
Block a user