mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
* docs: transient docs * chore: cleanup * webvtt WIP * webvtt field * chore: webvtt tests comments * chore: remove useless tests * feat: search TASK.md * feat: full text search by title/webvtt * chore: search api task * feat: search api * feat: search API * chore: rm task md * chore: roll back unnecessary validators * chore: pr review WIP * chore: pr review WIP * chore: pr review * chore: top imports * feat: better lint + ci * feat: better lint + ci * feat: better lint + ci * feat: better lint + ci * chore: lint * chore: lint * fix: db datetime definitions * fix: flush() params * fix: update transcript mutability expectation / test * fix: update transcript mutability expectation / test * chore: auto review * chore: new controller extraction * chore: new controller extraction * chore: cleanup * chore: review WIP * chore: pr WIP * chore: remove ci lint * chore: openapi regeneration * chore: openapi regeneration * chore: postgres test doc * fix: .dockerignore for arm binaries * fix: .dockerignore for arm binaries * fix: cap test loops * fix: cap test loops * fix: cap test loops * fix: get_transcript_topics * chore: remove flow.md docs and claude guidance * chore: remove claude.md db doc * chore: remove claude.md db doc * chore: remove claude.md db doc * chore: remove claude.md db doc
164 lines
5.8 KiB
Python
164 lines
5.8 KiB
Python
"""Tests for full-text search functionality."""
|
|
|
|
import json
|
|
from datetime import datetime
|
|
|
|
import pytest
|
|
from pydantic import ValidationError
|
|
|
|
from reflector.db import database
|
|
from reflector.db.search import SearchParameters, search_controller
|
|
from reflector.db.transcripts import transcripts
|
|
from reflector.db.utils import is_postgresql
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_postgresql_only():
|
|
await database.connect()
|
|
|
|
try:
|
|
params = SearchParameters(query_text="any query here")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert results == []
|
|
assert total == 0
|
|
|
|
try:
|
|
SearchParameters(query_text="")
|
|
assert False, "Should have raised validation error"
|
|
except ValidationError:
|
|
pass # Expected
|
|
|
|
# Test that whitespace query raises validation error
|
|
try:
|
|
SearchParameters(query_text=" ")
|
|
assert False, "Should have raised validation error"
|
|
except ValidationError:
|
|
pass # Expected
|
|
|
|
finally:
|
|
await database.disconnect()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_input_validation():
|
|
await database.connect()
|
|
|
|
try:
|
|
try:
|
|
SearchParameters(query_text="")
|
|
assert False, "Should have raised ValidationError"
|
|
except ValidationError:
|
|
pass # Expected
|
|
|
|
# Test that whitespace query raises validation error
|
|
try:
|
|
SearchParameters(query_text=" \t\n ")
|
|
assert False, "Should have raised ValidationError"
|
|
except ValidationError:
|
|
pass # Expected
|
|
finally:
|
|
await database.disconnect()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_postgresql_search_with_data():
|
|
"""Test full-text search with actual data in PostgreSQL.
|
|
|
|
Example how to run: DATABASE_URL=postgresql://reflector:reflector@localhost:5432/reflector_test uv run pytest tests/test_search.py::test_postgresql_search_with_data -v -p no:env
|
|
"""
|
|
# Skip if not PostgreSQL
|
|
if not is_postgresql():
|
|
pytest.skip("Test requires PostgreSQL. Set DATABASE_URL=postgresql://...")
|
|
|
|
await database.connect()
|
|
|
|
# collision is improbable
|
|
test_id = "test-search-e2e-7f3a9b2c"
|
|
|
|
try:
|
|
await database.execute(transcripts.delete().where(transcripts.c.id == test_id))
|
|
|
|
test_data = {
|
|
"id": test_id,
|
|
"name": "Test Search Transcript",
|
|
"title": "Engineering Planning Meeting Q4 2024",
|
|
"status": "completed",
|
|
"locked": False,
|
|
"duration": 1800.0,
|
|
"created_at": datetime.now(),
|
|
"short_summary": "Team discussed search implementation",
|
|
"long_summary": "The engineering team met to plan the search feature",
|
|
"topics": json.dumps([]),
|
|
"events": json.dumps([]),
|
|
"participants": json.dumps([]),
|
|
"source_language": "en",
|
|
"target_language": "en",
|
|
"reviewed": False,
|
|
"audio_location": "local",
|
|
"share_mode": "private",
|
|
"source_kind": "room",
|
|
"webvtt": """WEBVTT
|
|
|
|
00:00:00.000 --> 00:00:10.000
|
|
Welcome to our engineering planning meeting for Q4 2024.
|
|
|
|
00:00:10.000 --> 00:00:20.000
|
|
Today we'll discuss the implementation of full-text search.
|
|
|
|
00:00:20.000 --> 00:00:30.000
|
|
The search feature should support complex queries with ranking.
|
|
|
|
00:00:30.000 --> 00:00:40.000
|
|
We need to implement PostgreSQL tsvector for better performance.""",
|
|
}
|
|
|
|
await database.execute(transcripts.insert().values(**test_data))
|
|
|
|
# Test 1: Search for a word in title
|
|
params = SearchParameters(query_text="planning")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by title word"
|
|
|
|
# Test 2: Search for a word in webvtt content
|
|
params = SearchParameters(query_text="tsvector")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by webvtt content"
|
|
|
|
# Test 3: Search with multiple words
|
|
params = SearchParameters(query_text="engineering planning")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by multiple words"
|
|
|
|
# Test 4: Verify SearchResult structure
|
|
test_result = next((r for r in results if r.id == test_id), None)
|
|
if test_result:
|
|
assert test_result.title == "Engineering Planning Meeting Q4 2024"
|
|
assert test_result.status == "completed"
|
|
assert test_result.duration == 1800.0
|
|
assert test_result.source_kind == "room"
|
|
assert 0 <= test_result.rank <= 1, "Rank should be normalized to 0-1"
|
|
|
|
# Test 5: Search with OR operator
|
|
params = SearchParameters(query_text="tsvector OR nosuchword")
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript with OR query"
|
|
|
|
# Test 6: Quoted phrase search
|
|
params = SearchParameters(query_text='"full-text search"')
|
|
results, total = await search_controller.search_transcripts(params)
|
|
assert total >= 1
|
|
found = any(r.id == test_id for r in results)
|
|
assert found, "Should find test transcript by exact phrase"
|
|
|
|
finally:
|
|
await database.execute(transcripts.delete().where(transcripts.c.id == test_id))
|
|
await database.disconnect()
|