feat: search date filter (#710)

* search date filter

* search date filter

* search date filter

* search date filter

* pr comment

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
Igor Monadical
2025-10-23 20:16:43 -04:00
committed by GitHub
parent 3c4b9f2103
commit 962c40e2b6
4 changed files with 361 additions and 54 deletions

View File

@@ -135,6 +135,8 @@ class SearchParameters(BaseModel):
user_id: str | None = None user_id: str | None = None
room_id: str | None = None room_id: str | None = None
source_kind: SourceKind | None = None source_kind: SourceKind | None = None
from_datetime: datetime | None = None
to_datetime: datetime | None = None
class SearchResultDB(BaseModel): class SearchResultDB(BaseModel):
@@ -402,6 +404,14 @@ class SearchController:
base_query = base_query.where( base_query = base_query.where(
transcripts.c.source_kind == params.source_kind transcripts.c.source_kind == params.source_kind
) )
if params.from_datetime:
base_query = base_query.where(
transcripts.c.created_at >= params.from_datetime
)
if params.to_datetime:
base_query = base_query.where(
transcripts.c.created_at <= params.to_datetime
)
if params.query_text is not None: if params.query_text is not None:
order_by = sqlalchemy.desc(sqlalchemy.text("rank")) order_by = sqlalchemy.desc(sqlalchemy.text("rank"))

View File

@@ -5,7 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi_pagination import Page from fastapi_pagination import Page
from fastapi_pagination.ext.databases import apaginate from fastapi_pagination.ext.databases import apaginate
from jose import jwt from jose import jwt
from pydantic import BaseModel, Field, constr, field_serializer from pydantic import AwareDatetime, BaseModel, Field, constr, field_serializer
import reflector.auth as auth import reflector.auth as auth
from reflector.db import get_database from reflector.db import get_database
@@ -133,6 +133,21 @@ SearchOffsetParam = Annotated[
SearchOffsetBase, Query(description="Number of results to skip") SearchOffsetBase, Query(description="Number of results to skip")
] ]
SearchFromDatetimeParam = Annotated[
AwareDatetime | None,
Query(
alias="from",
description="Filter transcripts created on or after this datetime (ISO 8601 with timezone)",
),
]
SearchToDatetimeParam = Annotated[
AwareDatetime | None,
Query(
alias="to",
description="Filter transcripts created on or before this datetime (ISO 8601 with timezone)",
),
]
class SearchResponse(BaseModel): class SearchResponse(BaseModel):
results: list[SearchResult] results: list[SearchResult]
@@ -174,18 +189,23 @@ async def transcripts_search(
offset: SearchOffsetParam = 0, offset: SearchOffsetParam = 0,
room_id: Optional[str] = None, room_id: Optional[str] = None,
source_kind: Optional[SourceKind] = None, source_kind: Optional[SourceKind] = None,
from_datetime: SearchFromDatetimeParam = None,
to_datetime: SearchToDatetimeParam = None,
user: Annotated[ user: Annotated[
Optional[auth.UserInfo], Depends(auth.current_user_optional) Optional[auth.UserInfo], Depends(auth.current_user_optional)
] = None, ] = None,
): ):
""" """Full-text search across transcript titles and content."""
Full-text search across transcript titles and content.
"""
if not user and not settings.PUBLIC_MODE: if not user and not settings.PUBLIC_MODE:
raise HTTPException(status_code=401, detail="Not authenticated") raise HTTPException(status_code=401, detail="Not authenticated")
user_id = user["sub"] if user else None user_id = user["sub"] if user else None
if from_datetime and to_datetime and from_datetime > to_datetime:
raise HTTPException(
status_code=400, detail="'from' must be less than or equal to 'to'"
)
search_params = SearchParameters( search_params = SearchParameters(
query_text=parse_search_query_param(q), query_text=parse_search_query_param(q),
limit=limit, limit=limit,
@@ -193,6 +213,8 @@ async def transcripts_search(
user_id=user_id, user_id=user_id,
room_id=room_id, room_id=room_id,
source_kind=source_kind, source_kind=source_kind,
from_datetime=from_datetime,
to_datetime=to_datetime,
) )
results, total = await search_controller.search_transcripts(search_params) results, total = await search_controller.search_transcripts(search_params)

View File

@@ -0,0 +1,256 @@
from datetime import datetime, timedelta, timezone
import pytest
from reflector.db import get_database
from reflector.db.search import SearchParameters, search_controller
from reflector.db.transcripts import SourceKind, transcripts
@pytest.mark.asyncio
class TestDateRangeIntegration:
async def setup_test_transcripts(self):
# Use a test user_id that will match in our search parameters
test_user_id = "test-user-123"
test_data = [
{
"id": "test-before-range",
"created_at": datetime(2024, 1, 15, tzinfo=timezone.utc),
"title": "Before Range Transcript",
"user_id": test_user_id,
},
{
"id": "test-start-boundary",
"created_at": datetime(2024, 6, 1, tzinfo=timezone.utc),
"title": "Start Boundary Transcript",
"user_id": test_user_id,
},
{
"id": "test-middle-range",
"created_at": datetime(2024, 6, 15, tzinfo=timezone.utc),
"title": "Middle Range Transcript",
"user_id": test_user_id,
},
{
"id": "test-end-boundary",
"created_at": datetime(2024, 6, 30, 23, 59, 59, tzinfo=timezone.utc),
"title": "End Boundary Transcript",
"user_id": test_user_id,
},
{
"id": "test-after-range",
"created_at": datetime(2024, 12, 31, tzinfo=timezone.utc),
"title": "After Range Transcript",
"user_id": test_user_id,
},
]
for data in test_data:
full_data = {
"id": data["id"],
"name": data["id"],
"status": "ended",
"locked": False,
"duration": 60.0,
"created_at": data["created_at"],
"title": data["title"],
"short_summary": "Test summary",
"long_summary": "Test long summary",
"share_mode": "public",
"source_kind": SourceKind.FILE,
"audio_deleted": False,
"reviewed": False,
"user_id": data["user_id"],
}
await get_database().execute(transcripts.insert().values(**full_data))
return test_data
async def cleanup_test_transcripts(self, test_data):
"""Clean up test transcripts."""
for data in test_data:
await get_database().execute(
transcripts.delete().where(transcripts.c.id == data["id"])
)
@pytest.mark.asyncio
async def test_filter_with_from_datetime_only(self):
"""Test filtering with only from_datetime parameter."""
test_data = await self.setup_test_transcripts()
test_user_id = "test-user-123"
try:
params = SearchParameters(
query_text=None,
from_datetime=datetime(2024, 6, 1, tzinfo=timezone.utc),
to_datetime=None,
user_id=test_user_id,
)
results, total = await search_controller.search_transcripts(params)
# Should include: start_boundary, middle, end_boundary, after
result_ids = [r.id for r in results]
assert "test-before-range" not in result_ids
assert "test-start-boundary" in result_ids
assert "test-middle-range" in result_ids
assert "test-end-boundary" in result_ids
assert "test-after-range" in result_ids
finally:
await self.cleanup_test_transcripts(test_data)
@pytest.mark.asyncio
async def test_filter_with_to_datetime_only(self):
"""Test filtering with only to_datetime parameter."""
test_data = await self.setup_test_transcripts()
test_user_id = "test-user-123"
try:
params = SearchParameters(
query_text=None,
from_datetime=None,
to_datetime=datetime(2024, 6, 30, tzinfo=timezone.utc),
user_id=test_user_id,
)
results, total = await search_controller.search_transcripts(params)
result_ids = [r.id for r in results]
assert "test-before-range" in result_ids
assert "test-start-boundary" in result_ids
assert "test-middle-range" in result_ids
assert "test-end-boundary" not in result_ids
assert "test-after-range" not in result_ids
finally:
await self.cleanup_test_transcripts(test_data)
@pytest.mark.asyncio
async def test_filter_with_both_datetimes(self):
test_data = await self.setup_test_transcripts()
test_user_id = "test-user-123"
try:
params = SearchParameters(
query_text=None,
from_datetime=datetime(2024, 6, 1, tzinfo=timezone.utc),
to_datetime=datetime(
2024, 7, 1, tzinfo=timezone.utc
), # Inclusive of 6/30
user_id=test_user_id,
)
results, total = await search_controller.search_transcripts(params)
result_ids = [r.id for r in results]
assert "test-before-range" not in result_ids
assert "test-start-boundary" in result_ids
assert "test-middle-range" in result_ids
assert "test-end-boundary" in result_ids
assert "test-after-range" not in result_ids
finally:
await self.cleanup_test_transcripts(test_data)
@pytest.mark.asyncio
async def test_date_filter_with_room_and_source_kind(self):
test_data = await self.setup_test_transcripts()
test_user_id = "test-user-123"
try:
params = SearchParameters(
query_text=None,
from_datetime=datetime(2024, 6, 1, tzinfo=timezone.utc),
to_datetime=datetime(2024, 7, 1, tzinfo=timezone.utc),
source_kind=SourceKind.FILE,
room_id=None,
user_id=test_user_id,
)
results, total = await search_controller.search_transcripts(params)
for result in results:
assert result.source_kind == SourceKind.FILE
assert result.created_at >= datetime(2024, 6, 1, tzinfo=timezone.utc)
assert result.created_at <= datetime(2024, 7, 1, tzinfo=timezone.utc)
finally:
await self.cleanup_test_transcripts(test_data)
@pytest.mark.asyncio
async def test_empty_results_for_future_dates(self):
test_data = await self.setup_test_transcripts()
test_user_id = "test-user-123"
try:
params = SearchParameters(
query_text=None,
from_datetime=datetime(2099, 1, 1, tzinfo=timezone.utc),
to_datetime=datetime(2099, 12, 31, tzinfo=timezone.utc),
user_id=test_user_id,
)
results, total = await search_controller.search_transcripts(params)
assert results == []
assert total == 0
finally:
await self.cleanup_test_transcripts(test_data)
@pytest.mark.asyncio
async def test_date_only_input_handling(self):
test_data = await self.setup_test_transcripts()
test_user_id = "test-user-123"
try:
# Pydantic will parse date-only strings to datetime at midnight
from_dt = datetime(2024, 6, 15, 0, 0, 0, tzinfo=timezone.utc)
to_dt = datetime(2024, 6, 16, 0, 0, 0, tzinfo=timezone.utc)
params = SearchParameters(
query_text=None,
from_datetime=from_dt,
to_datetime=to_dt,
user_id=test_user_id,
)
results, total = await search_controller.search_transcripts(params)
result_ids = [r.id for r in results]
assert "test-middle-range" in result_ids
assert "test-before-range" not in result_ids
assert "test-after-range" not in result_ids
finally:
await self.cleanup_test_transcripts(test_data)
class TestDateValidationEdgeCases:
"""Edge case tests for datetime validation."""
def test_timezone_aware_comparison(self):
"""Test that timezone-aware comparisons work correctly."""
# PST time (UTC-8)
pst = timezone(timedelta(hours=-8))
pst_dt = datetime(2024, 6, 15, 8, 0, 0, tzinfo=pst)
# UTC time equivalent (8AM PST = 4PM UTC)
utc_dt = datetime(2024, 6, 15, 16, 0, 0, tzinfo=timezone.utc)
assert pst_dt == utc_dt
def test_mixed_timezone_input(self):
"""Test handling mixed timezone inputs."""
pst = timezone(timedelta(hours=-8))
ist = timezone(timedelta(hours=5, minutes=30))
from_date = datetime(2024, 6, 15, 0, 0, 0, tzinfo=pst) # PST midnight
to_date = datetime(2024, 6, 15, 23, 59, 59, tzinfo=ist) # IST end of day
assert from_date.tzinfo is not None
assert to_date.tzinfo is not None
assert from_date < to_date

View File

@@ -604,25 +604,25 @@ export interface paths {
patch?: never; patch?: never;
trace?: never; trace?: never;
}; };
"/v1/user/tokens": { "/v1/user/api-keys": {
parameters: { parameters: {
query?: never; query?: never;
header?: never; header?: never;
path?: never; path?: never;
cookie?: never; cookie?: never;
}; };
/** List Tokens */ /** List Api Keys */
get: operations["v1_list_tokens"]; get: operations["v1_list_api_keys"];
put?: never; put?: never;
/** Create Token */ /** Create Api Key */
post: operations["v1_create_token"]; post: operations["v1_create_api_key"];
delete?: never; delete?: never;
options?: never; options?: never;
head?: never; head?: never;
patch?: never; patch?: never;
trace?: never; trace?: never;
}; };
"/v1/user/tokens/{token_id}": { "/v1/user/api-keys/{key_id}": {
parameters: { parameters: {
query?: never; query?: never;
header?: never; header?: never;
@@ -632,8 +632,8 @@ export interface paths {
get?: never; get?: never;
put?: never; put?: never;
post?: never; post?: never;
/** Delete Token */ /** Delete Api Key */
delete: operations["v1_delete_token"]; delete: operations["v1_delete_api_key"];
options?: never; options?: never;
head?: never; head?: never;
patch?: never; patch?: never;
@@ -700,6 +700,26 @@ export interface paths {
export type webhooks = Record<string, never>; export type webhooks = Record<string, never>;
export interface components { export interface components {
schemas: { schemas: {
/** ApiKeyResponse */
ApiKeyResponse: {
/**
* Id
* @description A non-empty string
*/
id: string;
/**
* User Id
* @description A non-empty string
*/
user_id: string;
/** Name */
name: string | null;
/**
* Created At
* Format: date-time
*/
created_at: string;
};
/** AudioWaveform */ /** AudioWaveform */
AudioWaveform: { AudioWaveform: {
/** Data */ /** Data */
@@ -759,6 +779,36 @@ export interface components {
*/ */
updated_at: string; updated_at: string;
}; };
/** CreateApiKeyRequest */
CreateApiKeyRequest: {
/** Name */
name?: string | null;
};
/** CreateApiKeyResponse */
CreateApiKeyResponse: {
/**
* Id
* @description A non-empty string
*/
id: string;
/**
* User Id
* @description A non-empty string
*/
user_id: string;
/** Name */
name: string | null;
/**
* Created At
* Format: date-time
*/
created_at: string;
/**
* Key
* @description A non-empty string
*/
key: string;
};
/** CreateParticipant */ /** CreateParticipant */
CreateParticipant: { CreateParticipant: {
/** Speaker */ /** Speaker */
@@ -811,27 +861,6 @@ export interface components {
*/ */
allow_duplicated: boolean | null; allow_duplicated: boolean | null;
}; };
/** CreateTokenRequest */
CreateTokenRequest: {
/** Name */
name?: string | null;
};
/** CreateTokenResponse */
CreateTokenResponse: {
/** Id */
id: string;
/** User Id */
user_id: string;
/** Name */
name: string | null;
/**
* Created At
* Format: date-time
*/
created_at: string;
/** Token */
token: string;
};
/** CreateTranscript */ /** CreateTranscript */
CreateTranscript: { CreateTranscript: {
/** Name */ /** Name */
@@ -1425,20 +1454,6 @@ export interface components {
* @enum {string} * @enum {string}
*/ */
SyncStatus: "success" | "unchanged" | "error" | "skipped"; SyncStatus: "success" | "unchanged" | "error" | "skipped";
/** TokenResponse */
TokenResponse: {
/** Id */
id: string;
/** User Id */
user_id: string;
/** Name */
name: string | null;
/**
* Created At
* Format: date-time
*/
created_at: string;
};
/** Topic */ /** Topic */
Topic: { Topic: {
/** Name */ /** Name */
@@ -2263,6 +2278,10 @@ export interface operations {
offset?: number; offset?: number;
room_id?: string | null; room_id?: string | null;
source_kind?: components["schemas"]["SourceKind"] | null; source_kind?: components["schemas"]["SourceKind"] | null;
/** @description Filter transcripts created on or after this datetime (ISO 8601 with timezone) */
from?: string | null;
/** @description Filter transcripts created on or before this datetime (ISO 8601 with timezone) */
to?: string | null;
}; };
header?: never; header?: never;
path?: never; path?: never;
@@ -3004,7 +3023,7 @@ export interface operations {
}; };
}; };
}; };
v1_list_tokens: { v1_list_api_keys: {
parameters: { parameters: {
query?: never; query?: never;
header?: never; header?: never;
@@ -3019,12 +3038,12 @@ export interface operations {
[name: string]: unknown; [name: string]: unknown;
}; };
content: { content: {
"application/json": components["schemas"]["TokenResponse"][]; "application/json": components["schemas"]["ApiKeyResponse"][];
}; };
}; };
}; };
}; };
v1_create_token: { v1_create_api_key: {
parameters: { parameters: {
query?: never; query?: never;
header?: never; header?: never;
@@ -3033,7 +3052,7 @@ export interface operations {
}; };
requestBody: { requestBody: {
content: { content: {
"application/json": components["schemas"]["CreateTokenRequest"]; "application/json": components["schemas"]["CreateApiKeyRequest"];
}; };
}; };
responses: { responses: {
@@ -3043,7 +3062,7 @@ export interface operations {
[name: string]: unknown; [name: string]: unknown;
}; };
content: { content: {
"application/json": components["schemas"]["CreateTokenResponse"]; "application/json": components["schemas"]["CreateApiKeyResponse"];
}; };
}; };
/** @description Validation Error */ /** @description Validation Error */
@@ -3057,12 +3076,12 @@ export interface operations {
}; };
}; };
}; };
v1_delete_token: { v1_delete_api_key: {
parameters: { parameters: {
query?: never; query?: never;
header?: never; header?: never;
path: { path: {
token_id: string; key_id: string;
}; };
cookie?: never; cookie?: never;
}; };