From 962c40e2b6428ac42fd10aea926782d7a6f3f902 Mon Sep 17 00:00:00 2001 From: Igor Monadical Date: Thu, 23 Oct 2025 20:16:43 -0400 Subject: [PATCH] feat: search date filter (#710) * search date filter * search date filter * search date filter * search date filter * pr comment --------- Co-authored-by: Igor Loskutov --- server/reflector/db/search.py | 10 + server/reflector/views/transcripts.py | 30 ++- server/tests/test_search_date_filtering.py | 256 +++++++++++++++++++++ www/app/reflector-api.d.ts | 119 ++++++---- 4 files changed, 361 insertions(+), 54 deletions(-) create mode 100644 server/tests/test_search_date_filtering.py diff --git a/server/reflector/db/search.py b/server/reflector/db/search.py index caa21c65..5d9bc507 100644 --- a/server/reflector/db/search.py +++ b/server/reflector/db/search.py @@ -135,6 +135,8 @@ class SearchParameters(BaseModel): user_id: str | None = None room_id: str | None = None source_kind: SourceKind | None = None + from_datetime: datetime | None = None + to_datetime: datetime | None = None class SearchResultDB(BaseModel): @@ -402,6 +404,14 @@ class SearchController: base_query = base_query.where( transcripts.c.source_kind == params.source_kind ) + if params.from_datetime: + base_query = base_query.where( + transcripts.c.created_at >= params.from_datetime + ) + if params.to_datetime: + base_query = base_query.where( + transcripts.c.created_at <= params.to_datetime + ) if params.query_text is not None: order_by = sqlalchemy.desc(sqlalchemy.text("rank")) diff --git a/server/reflector/views/transcripts.py b/server/reflector/views/transcripts.py index 04d27e1a..37e806cb 100644 --- a/server/reflector/views/transcripts.py +++ b/server/reflector/views/transcripts.py @@ -5,7 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query from fastapi_pagination import Page from fastapi_pagination.ext.databases import apaginate from jose import jwt -from pydantic import BaseModel, Field, constr, field_serializer +from pydantic import AwareDatetime, BaseModel, Field, constr, field_serializer import reflector.auth as auth from reflector.db import get_database @@ -133,6 +133,21 @@ SearchOffsetParam = Annotated[ SearchOffsetBase, Query(description="Number of results to skip") ] +SearchFromDatetimeParam = Annotated[ + AwareDatetime | None, + Query( + alias="from", + description="Filter transcripts created on or after this datetime (ISO 8601 with timezone)", + ), +] +SearchToDatetimeParam = Annotated[ + AwareDatetime | None, + Query( + alias="to", + description="Filter transcripts created on or before this datetime (ISO 8601 with timezone)", + ), +] + class SearchResponse(BaseModel): results: list[SearchResult] @@ -174,18 +189,23 @@ async def transcripts_search( offset: SearchOffsetParam = 0, room_id: Optional[str] = None, source_kind: Optional[SourceKind] = None, + from_datetime: SearchFromDatetimeParam = None, + to_datetime: SearchToDatetimeParam = None, user: Annotated[ Optional[auth.UserInfo], Depends(auth.current_user_optional) ] = None, ): - """ - Full-text search across transcript titles and content. - """ + """Full-text search across transcript titles and content.""" if not user and not settings.PUBLIC_MODE: raise HTTPException(status_code=401, detail="Not authenticated") user_id = user["sub"] if user else None + if from_datetime and to_datetime and from_datetime > to_datetime: + raise HTTPException( + status_code=400, detail="'from' must be less than or equal to 'to'" + ) + search_params = SearchParameters( query_text=parse_search_query_param(q), limit=limit, @@ -193,6 +213,8 @@ async def transcripts_search( user_id=user_id, room_id=room_id, source_kind=source_kind, + from_datetime=from_datetime, + to_datetime=to_datetime, ) results, total = await search_controller.search_transcripts(search_params) diff --git a/server/tests/test_search_date_filtering.py b/server/tests/test_search_date_filtering.py new file mode 100644 index 00000000..58fd6446 --- /dev/null +++ b/server/tests/test_search_date_filtering.py @@ -0,0 +1,256 @@ +from datetime import datetime, timedelta, timezone + +import pytest + +from reflector.db import get_database +from reflector.db.search import SearchParameters, search_controller +from reflector.db.transcripts import SourceKind, transcripts + + +@pytest.mark.asyncio +class TestDateRangeIntegration: + async def setup_test_transcripts(self): + # Use a test user_id that will match in our search parameters + test_user_id = "test-user-123" + + test_data = [ + { + "id": "test-before-range", + "created_at": datetime(2024, 1, 15, tzinfo=timezone.utc), + "title": "Before Range Transcript", + "user_id": test_user_id, + }, + { + "id": "test-start-boundary", + "created_at": datetime(2024, 6, 1, tzinfo=timezone.utc), + "title": "Start Boundary Transcript", + "user_id": test_user_id, + }, + { + "id": "test-middle-range", + "created_at": datetime(2024, 6, 15, tzinfo=timezone.utc), + "title": "Middle Range Transcript", + "user_id": test_user_id, + }, + { + "id": "test-end-boundary", + "created_at": datetime(2024, 6, 30, 23, 59, 59, tzinfo=timezone.utc), + "title": "End Boundary Transcript", + "user_id": test_user_id, + }, + { + "id": "test-after-range", + "created_at": datetime(2024, 12, 31, tzinfo=timezone.utc), + "title": "After Range Transcript", + "user_id": test_user_id, + }, + ] + + for data in test_data: + full_data = { + "id": data["id"], + "name": data["id"], + "status": "ended", + "locked": False, + "duration": 60.0, + "created_at": data["created_at"], + "title": data["title"], + "short_summary": "Test summary", + "long_summary": "Test long summary", + "share_mode": "public", + "source_kind": SourceKind.FILE, + "audio_deleted": False, + "reviewed": False, + "user_id": data["user_id"], + } + + await get_database().execute(transcripts.insert().values(**full_data)) + + return test_data + + async def cleanup_test_transcripts(self, test_data): + """Clean up test transcripts.""" + for data in test_data: + await get_database().execute( + transcripts.delete().where(transcripts.c.id == data["id"]) + ) + + @pytest.mark.asyncio + async def test_filter_with_from_datetime_only(self): + """Test filtering with only from_datetime parameter.""" + test_data = await self.setup_test_transcripts() + test_user_id = "test-user-123" + + try: + params = SearchParameters( + query_text=None, + from_datetime=datetime(2024, 6, 1, tzinfo=timezone.utc), + to_datetime=None, + user_id=test_user_id, + ) + + results, total = await search_controller.search_transcripts(params) + + # Should include: start_boundary, middle, end_boundary, after + result_ids = [r.id for r in results] + assert "test-before-range" not in result_ids + assert "test-start-boundary" in result_ids + assert "test-middle-range" in result_ids + assert "test-end-boundary" in result_ids + assert "test-after-range" in result_ids + + finally: + await self.cleanup_test_transcripts(test_data) + + @pytest.mark.asyncio + async def test_filter_with_to_datetime_only(self): + """Test filtering with only to_datetime parameter.""" + test_data = await self.setup_test_transcripts() + test_user_id = "test-user-123" + + try: + params = SearchParameters( + query_text=None, + from_datetime=None, + to_datetime=datetime(2024, 6, 30, tzinfo=timezone.utc), + user_id=test_user_id, + ) + + results, total = await search_controller.search_transcripts(params) + + result_ids = [r.id for r in results] + assert "test-before-range" in result_ids + assert "test-start-boundary" in result_ids + assert "test-middle-range" in result_ids + assert "test-end-boundary" not in result_ids + assert "test-after-range" not in result_ids + + finally: + await self.cleanup_test_transcripts(test_data) + + @pytest.mark.asyncio + async def test_filter_with_both_datetimes(self): + test_data = await self.setup_test_transcripts() + test_user_id = "test-user-123" + + try: + params = SearchParameters( + query_text=None, + from_datetime=datetime(2024, 6, 1, tzinfo=timezone.utc), + to_datetime=datetime( + 2024, 7, 1, tzinfo=timezone.utc + ), # Inclusive of 6/30 + user_id=test_user_id, + ) + + results, total = await search_controller.search_transcripts(params) + + result_ids = [r.id for r in results] + assert "test-before-range" not in result_ids + assert "test-start-boundary" in result_ids + assert "test-middle-range" in result_ids + assert "test-end-boundary" in result_ids + assert "test-after-range" not in result_ids + + finally: + await self.cleanup_test_transcripts(test_data) + + @pytest.mark.asyncio + async def test_date_filter_with_room_and_source_kind(self): + test_data = await self.setup_test_transcripts() + test_user_id = "test-user-123" + + try: + params = SearchParameters( + query_text=None, + from_datetime=datetime(2024, 6, 1, tzinfo=timezone.utc), + to_datetime=datetime(2024, 7, 1, tzinfo=timezone.utc), + source_kind=SourceKind.FILE, + room_id=None, + user_id=test_user_id, + ) + + results, total = await search_controller.search_transcripts(params) + + for result in results: + assert result.source_kind == SourceKind.FILE + assert result.created_at >= datetime(2024, 6, 1, tzinfo=timezone.utc) + assert result.created_at <= datetime(2024, 7, 1, tzinfo=timezone.utc) + + finally: + await self.cleanup_test_transcripts(test_data) + + @pytest.mark.asyncio + async def test_empty_results_for_future_dates(self): + test_data = await self.setup_test_transcripts() + test_user_id = "test-user-123" + + try: + params = SearchParameters( + query_text=None, + from_datetime=datetime(2099, 1, 1, tzinfo=timezone.utc), + to_datetime=datetime(2099, 12, 31, tzinfo=timezone.utc), + user_id=test_user_id, + ) + + results, total = await search_controller.search_transcripts(params) + + assert results == [] + assert total == 0 + + finally: + await self.cleanup_test_transcripts(test_data) + + @pytest.mark.asyncio + async def test_date_only_input_handling(self): + test_data = await self.setup_test_transcripts() + test_user_id = "test-user-123" + + try: + # Pydantic will parse date-only strings to datetime at midnight + from_dt = datetime(2024, 6, 15, 0, 0, 0, tzinfo=timezone.utc) + to_dt = datetime(2024, 6, 16, 0, 0, 0, tzinfo=timezone.utc) + + params = SearchParameters( + query_text=None, + from_datetime=from_dt, + to_datetime=to_dt, + user_id=test_user_id, + ) + + results, total = await search_controller.search_transcripts(params) + + result_ids = [r.id for r in results] + assert "test-middle-range" in result_ids + assert "test-before-range" not in result_ids + assert "test-after-range" not in result_ids + + finally: + await self.cleanup_test_transcripts(test_data) + + +class TestDateValidationEdgeCases: + """Edge case tests for datetime validation.""" + + def test_timezone_aware_comparison(self): + """Test that timezone-aware comparisons work correctly.""" + # PST time (UTC-8) + pst = timezone(timedelta(hours=-8)) + pst_dt = datetime(2024, 6, 15, 8, 0, 0, tzinfo=pst) + + # UTC time equivalent (8AM PST = 4PM UTC) + utc_dt = datetime(2024, 6, 15, 16, 0, 0, tzinfo=timezone.utc) + + assert pst_dt == utc_dt + + def test_mixed_timezone_input(self): + """Test handling mixed timezone inputs.""" + pst = timezone(timedelta(hours=-8)) + ist = timezone(timedelta(hours=5, minutes=30)) + + from_date = datetime(2024, 6, 15, 0, 0, 0, tzinfo=pst) # PST midnight + to_date = datetime(2024, 6, 15, 23, 59, 59, tzinfo=ist) # IST end of day + + assert from_date.tzinfo is not None + assert to_date.tzinfo is not None + assert from_date < to_date diff --git a/www/app/reflector-api.d.ts b/www/app/reflector-api.d.ts index 1e0a3819..1dc92f2b 100644 --- a/www/app/reflector-api.d.ts +++ b/www/app/reflector-api.d.ts @@ -604,25 +604,25 @@ export interface paths { patch?: never; trace?: never; }; - "/v1/user/tokens": { + "/v1/user/api-keys": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - /** List Tokens */ - get: operations["v1_list_tokens"]; + /** List Api Keys */ + get: operations["v1_list_api_keys"]; put?: never; - /** Create Token */ - post: operations["v1_create_token"]; + /** Create Api Key */ + post: operations["v1_create_api_key"]; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/v1/user/tokens/{token_id}": { + "/v1/user/api-keys/{key_id}": { parameters: { query?: never; header?: never; @@ -632,8 +632,8 @@ export interface paths { get?: never; put?: never; post?: never; - /** Delete Token */ - delete: operations["v1_delete_token"]; + /** Delete Api Key */ + delete: operations["v1_delete_api_key"]; options?: never; head?: never; patch?: never; @@ -700,6 +700,26 @@ export interface paths { export type webhooks = Record; export interface components { schemas: { + /** ApiKeyResponse */ + ApiKeyResponse: { + /** + * Id + * @description A non-empty string + */ + id: string; + /** + * User Id + * @description A non-empty string + */ + user_id: string; + /** Name */ + name: string | null; + /** + * Created At + * Format: date-time + */ + created_at: string; + }; /** AudioWaveform */ AudioWaveform: { /** Data */ @@ -759,6 +779,36 @@ export interface components { */ updated_at: string; }; + /** CreateApiKeyRequest */ + CreateApiKeyRequest: { + /** Name */ + name?: string | null; + }; + /** CreateApiKeyResponse */ + CreateApiKeyResponse: { + /** + * Id + * @description A non-empty string + */ + id: string; + /** + * User Id + * @description A non-empty string + */ + user_id: string; + /** Name */ + name: string | null; + /** + * Created At + * Format: date-time + */ + created_at: string; + /** + * Key + * @description A non-empty string + */ + key: string; + }; /** CreateParticipant */ CreateParticipant: { /** Speaker */ @@ -811,27 +861,6 @@ export interface components { */ allow_duplicated: boolean | null; }; - /** CreateTokenRequest */ - CreateTokenRequest: { - /** Name */ - name?: string | null; - }; - /** CreateTokenResponse */ - CreateTokenResponse: { - /** Id */ - id: string; - /** User Id */ - user_id: string; - /** Name */ - name: string | null; - /** - * Created At - * Format: date-time - */ - created_at: string; - /** Token */ - token: string; - }; /** CreateTranscript */ CreateTranscript: { /** Name */ @@ -1425,20 +1454,6 @@ export interface components { * @enum {string} */ SyncStatus: "success" | "unchanged" | "error" | "skipped"; - /** TokenResponse */ - TokenResponse: { - /** Id */ - id: string; - /** User Id */ - user_id: string; - /** Name */ - name: string | null; - /** - * Created At - * Format: date-time - */ - created_at: string; - }; /** Topic */ Topic: { /** Name */ @@ -2263,6 +2278,10 @@ export interface operations { offset?: number; room_id?: string | null; source_kind?: components["schemas"]["SourceKind"] | null; + /** @description Filter transcripts created on or after this datetime (ISO 8601 with timezone) */ + from?: string | null; + /** @description Filter transcripts created on or before this datetime (ISO 8601 with timezone) */ + to?: string | null; }; header?: never; path?: never; @@ -3004,7 +3023,7 @@ export interface operations { }; }; }; - v1_list_tokens: { + v1_list_api_keys: { parameters: { query?: never; header?: never; @@ -3019,12 +3038,12 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["TokenResponse"][]; + "application/json": components["schemas"]["ApiKeyResponse"][]; }; }; }; }; - v1_create_token: { + v1_create_api_key: { parameters: { query?: never; header?: never; @@ -3033,7 +3052,7 @@ export interface operations { }; requestBody: { content: { - "application/json": components["schemas"]["CreateTokenRequest"]; + "application/json": components["schemas"]["CreateApiKeyRequest"]; }; }; responses: { @@ -3043,7 +3062,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["CreateTokenResponse"]; + "application/json": components["schemas"]["CreateApiKeyResponse"]; }; }; /** @description Validation Error */ @@ -3057,12 +3076,12 @@ export interface operations { }; }; }; - v1_delete_token: { + v1_delete_api_key: { parameters: { query?: never; header?: never; path: { - token_id: string; + key_id: string; }; cookie?: never; };