From 009590c080bd5a0d7e0a17576a40b1583862abed Mon Sep 17 00:00:00 2001 From: Igor Loskutov Date: Wed, 20 Aug 2025 20:56:45 -0400 Subject: [PATCH] feat: search frontend (#551) * feat: better highlight * feat(search): add long_summary to search vector for improved search results - Update search vector to include long_summary with weight B (between title A and webvtt C) - Modify SearchController to fetch long_summary and prioritize its snippets - Generate snippets from long_summary first (max 2), then from webvtt for remaining slots - Add comprehensive tests for long_summary search functionality - Create migration to update search_vector_en column in PostgreSQL This improves search quality by including summarized content which often contains key topics and themes that may not be explicitly mentioned in the transcript. * fix: address code review feedback for search enhancements - Fix test file inconsistencies by removing references to non-existent model fields - Comment out tests for unimplemented features (room_ids, status filters, date ranges) - Update tests to only use currently available fields (room_id singular, no room_name/processing_status) - Mark future functionality tests with @pytest.mark.skip - Make snippet counts configurable - Add LONG_SUMMARY_MAX_SNIPPETS constant (default: 2) - Replace hardcoded value with configurable constant - Improve error handling consistency in WebVTT parsing - Use different log levels for different error types (debug for malformed, warning for decode, error for unexpected) - Add catch-all exception handler for unexpected errors - Include stack trace for critical errors All existing tests pass with these changes. * fix: correct datetime test to include required duration field * feat: better highlight * feat: search room names * feat: acknowledge deleted room * feat: search filters fix and rank removal * chore: minor refactoring * feat: better matches frontend * chore: self-review (vibe) * chore: self-review WIP * chore: self-review WIP * chore: self-review WIP * chore: self-review WIP * chore: self-review WIP * chore: self-review WIP * chore: self-review WIP * remove swc (vibe) * search url query sync (vibe) * search url query sync (vibe) * better casts and cap while * PR review + simplify frontend hook * pr: remove search db timeouts * cleanup tests * tests cleanup * frontend cleanup * index declarations * refactor frontend (self-review) * fix search pagination * clear "x" for search input * pagination max pages fix * chore: cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * lockfile * pr review --- server/.gitignore | 3 +- ...faa16_add_long_summary_to_search_vector.py | 64 +++ ...bd09963_add_search_optimization_indexes.py | 41 ++ server/reflector/db/search.py | 417 ++++++++++++---- server/reflector/db/transcripts.py | 5 +- server/reflector/views/transcripts.py | 8 +- server/tests/test_search.py | 398 +++++++++++++-- server/tests/test_search_long_summary.py | 164 ++++++ server/tests/test_search_snippets.py | 436 ++++++++++++++-- .../(app)/browse/_components/Pagination.tsx | 55 ++- .../(app)/browse/_components/SearchBar.tsx | 34 -- .../_components/TranscriptActionsMenu.tsx | 14 +- .../browse/_components/TranscriptCards.tsx | 336 +++++++++++-- .../browse/_components/TranscriptTable.tsx | 99 ---- www/app/(app)/browse/page.tsx | 466 +++++++++++++----- www/app/(app)/layout.tsx | 3 +- www/app/(app)/rooms/page.tsx | 4 +- www/app/(app)/rooms/useRoomList.tsx | 3 +- www/app/(app)/transcripts/mockTopics.json | 32 -- www/app/(app)/transcripts/recorder.tsx | 3 +- .../(app)/transcripts/useSearchTranscripts.ts | 123 +++++ .../(app)/transcripts/useTranscriptList.ts | 59 --- www/app/api/schemas.gen.ts | 24 +- www/app/api/services.gen.ts | 2 + www/app/api/types.gen.ts | 7 + www/app/api/urls.ts | 2 + www/app/lib/textHighlight.tsx | 62 +++ www/app/lib/utils.ts | 7 + www/app/page.tsx | 3 +- www/app/providers.tsx | 15 +- www/package.json | 1 + www/pnpm-lock.yaml | 39 ++ 32 files changed, 2311 insertions(+), 618 deletions(-) create mode 100644 server/migrations/versions/0ab2d7ffaa16_add_long_summary_to_search_vector.py create mode 100644 server/migrations/versions/b1c33bd09963_add_search_optimization_indexes.py create mode 100644 server/tests/test_search_long_summary.py delete mode 100644 www/app/(app)/browse/_components/SearchBar.tsx delete mode 100644 www/app/(app)/browse/_components/TranscriptTable.tsx delete mode 100644 www/app/(app)/transcripts/mockTopics.json create mode 100644 www/app/(app)/transcripts/useSearchTranscripts.ts delete mode 100644 www/app/(app)/transcripts/useTranscriptList.ts create mode 100644 www/app/api/urls.ts create mode 100644 www/app/lib/textHighlight.tsx diff --git a/server/.gitignore b/server/.gitignore index 8042ce84..4057de5e 100644 --- a/server/.gitignore +++ b/server/.gitignore @@ -176,7 +176,8 @@ artefacts/ audio_*.wav # ignore local database -reflector.sqlite3 +*.sqlite3 +*.db data/ dump.rdb diff --git a/server/migrations/versions/0ab2d7ffaa16_add_long_summary_to_search_vector.py b/server/migrations/versions/0ab2d7ffaa16_add_long_summary_to_search_vector.py new file mode 100644 index 00000000..990f5932 --- /dev/null +++ b/server/migrations/versions/0ab2d7ffaa16_add_long_summary_to_search_vector.py @@ -0,0 +1,64 @@ +"""add_long_summary_to_search_vector + +Revision ID: 0ab2d7ffaa16 +Revises: b1c33bd09963 +Create Date: 2025-08-15 13:27:52.680211 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "0ab2d7ffaa16" +down_revision: Union[str, None] = "b1c33bd09963" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Drop the existing search vector column and index + op.drop_index("idx_transcript_search_vector_en", table_name="transcript") + op.drop_column("transcript", "search_vector_en") + + # Recreate the search vector column with long_summary included + op.execute(""" + ALTER TABLE transcript ADD COLUMN search_vector_en tsvector + GENERATED ALWAYS AS ( + setweight(to_tsvector('english', coalesce(title, '')), 'A') || + setweight(to_tsvector('english', coalesce(long_summary, '')), 'B') || + setweight(to_tsvector('english', coalesce(webvtt, '')), 'C') + ) STORED + """) + + # Recreate the GIN index for the search vector + op.create_index( + "idx_transcript_search_vector_en", + "transcript", + ["search_vector_en"], + postgresql_using="gin", + ) + + +def downgrade() -> None: + # Drop the updated search vector column and index + op.drop_index("idx_transcript_search_vector_en", table_name="transcript") + op.drop_column("transcript", "search_vector_en") + + # Recreate the original search vector column without long_summary + op.execute(""" + ALTER TABLE transcript ADD COLUMN search_vector_en tsvector + GENERATED ALWAYS AS ( + setweight(to_tsvector('english', coalesce(title, '')), 'A') || + setweight(to_tsvector('english', coalesce(webvtt, '')), 'B') + ) STORED + """) + + # Recreate the GIN index for the search vector + op.create_index( + "idx_transcript_search_vector_en", + "transcript", + ["search_vector_en"], + postgresql_using="gin", + ) diff --git a/server/migrations/versions/b1c33bd09963_add_search_optimization_indexes.py b/server/migrations/versions/b1c33bd09963_add_search_optimization_indexes.py new file mode 100644 index 00000000..9d298309 --- /dev/null +++ b/server/migrations/versions/b1c33bd09963_add_search_optimization_indexes.py @@ -0,0 +1,41 @@ +"""add_search_optimization_indexes + +Revision ID: b1c33bd09963 +Revises: 9f5c78d352d6 +Create Date: 2025-08-14 17:26:02.117408 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b1c33bd09963" +down_revision: Union[str, None] = "9f5c78d352d6" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add indexes for actual search filtering patterns used in frontend + # Based on /browse page filters: room_id and source_kind + + # Index for room_id + created_at (for room-specific searches with date ordering) + op.create_index( + "idx_transcript_room_id_created_at", + "transcript", + ["room_id", "created_at"], + if_not_exists=True, + ) + + # Index for source_kind alone (actively used filter in frontend) + op.create_index( + "idx_transcript_source_kind", "transcript", ["source_kind"], if_not_exists=True + ) + + +def downgrade() -> None: + # Remove the indexes in reverse order + op.drop_index("idx_transcript_source_kind", "transcript", if_exists=True) + op.drop_index("idx_transcript_room_id_created_at", "transcript", if_exists=True) diff --git a/server/reflector/db/search.py b/server/reflector/db/search.py index 93707c75..553501c6 100644 --- a/server/reflector/db/search.py +++ b/server/reflector/db/search.py @@ -1,24 +1,37 @@ """Search functionality for transcripts and other entities.""" +import itertools +from dataclasses import dataclass from datetime import datetime from io import StringIO -from typing import Annotated, Any, Dict +from typing import Annotated, Any, Dict, Iterator import sqlalchemy import webvtt -from pydantic import BaseModel, Field, constr, field_serializer +from fastapi import HTTPException +from pydantic import ( + BaseModel, + Field, + NonNegativeFloat, + NonNegativeInt, + ValidationError, + constr, + field_serializer, +) from reflector.db import get_database +from reflector.db.rooms import rooms from reflector.db.transcripts import SourceKind, transcripts from reflector.db.utils import is_postgresql from reflector.logger import logger DEFAULT_SEARCH_LIMIT = 20 SNIPPET_CONTEXT_LENGTH = 50 # Characters before/after match to include -DEFAULT_SNIPPET_MAX_LENGTH = 150 -DEFAULT_MAX_SNIPPETS = 3 +DEFAULT_SNIPPET_MAX_LENGTH = NonNegativeInt(150) +DEFAULT_MAX_SNIPPETS = NonNegativeInt(3) +LONG_SUMMARY_MAX_SNIPPETS = 2 -SearchQueryBase = constr(min_length=1, strip_whitespace=True) +SearchQueryBase = constr(min_length=0, strip_whitespace=True) SearchLimitBase = Annotated[int, Field(ge=1, le=100)] SearchOffsetBase = Annotated[int, Field(ge=0)] SearchTotalBase = Annotated[int, Field(ge=0)] @@ -32,6 +45,82 @@ SearchTotal = Annotated[ SearchTotalBase, Field(description="Total number of search results") ] +WEBVTT_SPEC_HEADER = "WEBVTT\n\n" + +WebVTTContent = Annotated[ + str, + Field(min_length=len(WEBVTT_SPEC_HEADER), description="WebVTT content"), +] + + +class WebVTTProcessor: + """Stateless processor for WebVTT content operations.""" + + @staticmethod + def parse(raw_content: str) -> WebVTTContent: + """Parse WebVTT content and return it as a string.""" + if not raw_content.startswith(WEBVTT_SPEC_HEADER): + raise ValueError(f"Invalid WebVTT content, no header {WEBVTT_SPEC_HEADER}") + return raw_content + + @staticmethod + def extract_text(webvtt_content: WebVTTContent) -> str: + """Extract plain text from WebVTT content using webvtt library.""" + try: + buffer = StringIO(webvtt_content) + vtt = webvtt.read_buffer(buffer) + return " ".join(caption.text for caption in vtt if caption.text) + except webvtt.errors.MalformedFileError as e: + logger.warning(f"Malformed WebVTT content: {e}") + return "" + except (UnicodeDecodeError, ValueError) as e: + logger.warning(f"Failed to decode WebVTT content: {e}") + return "" + except AttributeError as e: + logger.error( + f"WebVTT parsing error - unexpected format: {e}", exc_info=True + ) + return "" + except Exception as e: + logger.error(f"Unexpected error parsing WebVTT: {e}", exc_info=True) + return "" + + @staticmethod + def generate_snippets( + webvtt_content: WebVTTContent, + query: str, + max_snippets: NonNegativeInt = DEFAULT_MAX_SNIPPETS, + ) -> list[str]: + """Generate snippets from WebVTT content.""" + return SnippetGenerator.generate( + WebVTTProcessor.extract_text(webvtt_content), + query, + max_snippets=max_snippets, + ) + + +@dataclass(frozen=True) +class SnippetCandidate: + """Represents a candidate snippet with its position.""" + + _text: str + start: NonNegativeInt + _original_text_length: int + + @property + def end(self) -> NonNegativeInt: + """Calculate end position from start and raw text length.""" + return self.start + len(self._text) + + def text(self) -> str: + """Get display text with ellipses added if needed.""" + result = self._text.strip() + if self.start > 0: + result = "..." + result + if self.end < self._original_text_length: + result = result + "..." + return result + class SearchParameters(BaseModel): """Validated search parameters for full-text search.""" @@ -41,6 +130,7 @@ class SearchParameters(BaseModel): offset: SearchOffset = 0 user_id: str | None = None room_id: str | None = None + source_kind: SourceKind | None = None class SearchResultDB(BaseModel): @@ -64,13 +154,18 @@ class SearchResult(BaseModel): title: str | None = None user_id: str | None = None room_id: str | None = None + room_name: str | None = None + source_kind: SourceKind created_at: datetime status: str = Field(..., min_length=1) rank: float = Field(..., ge=0, le=1) - duration: float | None = Field(..., ge=0, description="Duration in seconds") + duration: NonNegativeFloat | None = Field(..., description="Duration in seconds") search_snippets: list[str] = Field( description="Text snippets around search matches" ) + total_match_count: NonNegativeInt = Field( + default=0, description="Total number of matches found in the transcript" + ) @field_serializer("created_at", when_used="json") def serialize_datetime(self, dt: datetime) -> str: @@ -79,84 +174,153 @@ class SearchResult(BaseModel): return dt.isoformat() -class SearchController: - """Controller for search operations across different entities.""" +class SnippetGenerator: + """Stateless generator for text snippets and match operations.""" @staticmethod - def _extract_webvtt_text(webvtt_content: str) -> str: - """Extract plain text from WebVTT content using webvtt library.""" - if not webvtt_content: - return "" + def find_all_matches(text: str, query: str) -> Iterator[int]: + """Generate all match positions for a query in text.""" + if not text: + logger.warning("Empty text for search query in find_all_matches") + return + if not query: + logger.warning("Empty query for search text in find_all_matches") + return - try: - buffer = StringIO(webvtt_content) - vtt = webvtt.read_buffer(buffer) - return " ".join(caption.text for caption in vtt if caption.text) - except (webvtt.errors.MalformedFileError, UnicodeDecodeError, ValueError) as e: - logger.warning(f"Failed to parse WebVTT content: {e}", exc_info=e) - return "" - except AttributeError as e: - logger.warning(f"WebVTT parsing error - unexpected format: {e}", exc_info=e) - return "" + text_lower = text.lower() + query_lower = query.lower() + start = 0 + prev_start = start + while (pos := text_lower.find(query_lower, start)) != -1: + yield pos + start = pos + len(query_lower) + if start <= prev_start: + raise ValueError("panic! find_all_matches is not incremental") + prev_start = start @staticmethod - def _generate_snippets( + def count_matches(text: str, query: str) -> NonNegativeInt: + """Count total number of matches for a query in text.""" + ZERO = NonNegativeInt(0) + if not text: + logger.warning("Empty text for search query in count_matches") + return ZERO + if not query: + logger.warning("Empty query for search text in count_matches") + return ZERO + return NonNegativeInt( + sum(1 for _ in SnippetGenerator.find_all_matches(text, query)) + ) + + @staticmethod + def create_snippet( + text: str, match_pos: int, max_length: int = DEFAULT_SNIPPET_MAX_LENGTH + ) -> SnippetCandidate: + """Create a snippet from a match position.""" + snippet_start = NonNegativeInt(max(0, match_pos - SNIPPET_CONTEXT_LENGTH)) + snippet_end = min(len(text), match_pos + max_length - SNIPPET_CONTEXT_LENGTH) + + snippet_text = text[snippet_start:snippet_end] + + return SnippetCandidate( + _text=snippet_text, start=snippet_start, _original_text_length=len(text) + ) + + @staticmethod + def filter_non_overlapping( + candidates: Iterator[SnippetCandidate], + ) -> Iterator[str]: + """Filter out overlapping snippets and return only display text.""" + last_end = 0 + for candidate in candidates: + display_text = candidate.text() + # it means that next overlapping snippets simply don't get included + # it's fine as simplistic logic and users probably won't care much because they already have their search results just fin + if candidate.start >= last_end and display_text: + yield display_text + last_end = candidate.end + + @staticmethod + def generate( text: str, - q: SearchQuery, - max_length: int = DEFAULT_SNIPPET_MAX_LENGTH, - max_snippets: int = DEFAULT_MAX_SNIPPETS, + query: str, + max_length: NonNegativeInt = DEFAULT_SNIPPET_MAX_LENGTH, + max_snippets: NonNegativeInt = DEFAULT_MAX_SNIPPETS, ) -> list[str]: - """Generate multiple snippets around all occurrences of search term.""" - if not text or not q: + """Generate snippets from text.""" + if not text or not query: + logger.warning("Empty text or query for generate_snippets") return [] - snippets = [] - lower_text = text.lower() - search_lower = q.lower() + candidates = ( + SnippetGenerator.create_snippet(text, pos, max_length) + for pos in SnippetGenerator.find_all_matches(text, query) + ) + filtered = SnippetGenerator.filter_non_overlapping(candidates) + snippets = list(itertools.islice(filtered, max_snippets)) - last_snippet_end = 0 - start_pos = 0 - - while len(snippets) < max_snippets: - match_pos = lower_text.find(search_lower, start_pos) - - if match_pos == -1: - if not snippets and search_lower.split(): - first_word = search_lower.split()[0] - match_pos = lower_text.find(first_word, start_pos) - if match_pos == -1: - break - else: - break - - snippet_start = max(0, match_pos - SNIPPET_CONTEXT_LENGTH) - snippet_end = min( - len(text), match_pos + max_length - SNIPPET_CONTEXT_LENGTH - ) - - if snippet_start < last_snippet_end: - start_pos = match_pos + len(search_lower) - continue - - snippet = text[snippet_start:snippet_end] - - if snippet_start > 0: - snippet = "..." + snippet - if snippet_end < len(text): - snippet = snippet + "..." - - snippet = snippet.strip() - - if snippet: - snippets.append(snippet) - last_snippet_end = snippet_end - - start_pos = match_pos + len(search_lower) - if start_pos >= len(text): - break + # Fallback to first word search if no full matches + # it's another assumption: proper snippet logic generation is quite complicated and tied to db logic, so simplification is used here + if not snippets and " " in query: + first_word = query.split()[0] + return SnippetGenerator.generate(text, first_word, max_length, max_snippets) return snippets + @staticmethod + def from_summary( + summary: str, + query: str, + max_snippets: NonNegativeInt = LONG_SUMMARY_MAX_SNIPPETS, + ) -> list[str]: + """Generate snippets from summary text.""" + return SnippetGenerator.generate(summary, query, max_snippets=max_snippets) + + @staticmethod + def combine_sources( + summary: str | None, + webvtt: WebVTTContent | None, + query: str, + max_total: NonNegativeInt = DEFAULT_MAX_SNIPPETS, + ) -> tuple[list[str], NonNegativeInt]: + """Combine snippets from multiple sources and return total match count. + + Returns (snippets, total_match_count) tuple. + + snippets can be empty for real in case of e.g. title match + """ + webvtt_matches = 0 + summary_matches = 0 + + if webvtt: + webvtt_text = WebVTTProcessor.extract_text(webvtt) + webvtt_matches = SnippetGenerator.count_matches(webvtt_text, query) + + if summary: + summary_matches = SnippetGenerator.count_matches(summary, query) + + total_matches = NonNegativeInt(webvtt_matches + summary_matches) + + summary_snippets = ( + SnippetGenerator.from_summary(summary, query) if summary else [] + ) + + if len(summary_snippets) >= max_total: + return summary_snippets[:max_total], total_matches + + remaining = max_total - len(summary_snippets) + webvtt_snippets = ( + WebVTTProcessor.generate_snippets(webvtt, query, remaining) + if webvtt + else [] + ) + + return summary_snippets + webvtt_snippets, total_matches + + +class SearchController: + """Controller for search operations across different entities.""" + @classmethod async def search_transcripts( cls, params: SearchParameters @@ -172,39 +336,64 @@ class SearchController: ) return [], 0 - search_query = sqlalchemy.func.websearch_to_tsquery( - "english", params.query_text + base_columns = [ + transcripts.c.id, + transcripts.c.title, + transcripts.c.created_at, + transcripts.c.duration, + transcripts.c.status, + transcripts.c.user_id, + transcripts.c.room_id, + transcripts.c.source_kind, + transcripts.c.webvtt, + transcripts.c.long_summary, + sqlalchemy.case( + ( + transcripts.c.room_id.isnot(None) & rooms.c.id.is_(None), + "Deleted Room", + ), + else_=rooms.c.name, + ).label("room_name"), + ] + + if params.query_text: + search_query = sqlalchemy.func.websearch_to_tsquery( + "english", params.query_text + ) + rank_column = sqlalchemy.func.ts_rank( + transcripts.c.search_vector_en, + search_query, + 32, # normalization flag: rank/(rank+1) for 0-1 range + ).label("rank") + else: + rank_column = sqlalchemy.cast(1.0, sqlalchemy.Float).label("rank") + + columns = base_columns + [rank_column] + base_query = sqlalchemy.select(columns).select_from( + transcripts.join(rooms, transcripts.c.room_id == rooms.c.id, isouter=True) ) - base_query = sqlalchemy.select( - [ - transcripts.c.id, - transcripts.c.title, - transcripts.c.created_at, - transcripts.c.duration, - transcripts.c.status, - transcripts.c.user_id, - transcripts.c.room_id, - transcripts.c.source_kind, - transcripts.c.webvtt, - sqlalchemy.func.ts_rank( - transcripts.c.search_vector_en, - search_query, - 32, # normalization flag: rank/(rank+1) for 0-1 range - ).label("rank"), - ] - ).where(transcripts.c.search_vector_en.op("@@")(search_query)) + if params.query_text: + base_query = base_query.where( + transcripts.c.search_vector_en.op("@@")(search_query) + ) if params.user_id: base_query = base_query.where(transcripts.c.user_id == params.user_id) if params.room_id: base_query = base_query.where(transcripts.c.room_id == params.room_id) + if params.source_kind: + base_query = base_query.where( + transcripts.c.source_kind == params.source_kind + ) + + if params.query_text: + order_by = sqlalchemy.desc(sqlalchemy.text("rank")) + else: + order_by = sqlalchemy.desc(transcripts.c.created_at) + + query = base_query.order_by(order_by).limit(params.limit).offset(params.offset) - query = ( - base_query.order_by(sqlalchemy.desc(sqlalchemy.text("rank"))) - .limit(params.limit) - .offset(params.offset) - ) rs = await get_database().fetch_all(query) count_query = sqlalchemy.select([sqlalchemy.func.count()]).select_from( @@ -214,18 +403,40 @@ class SearchController: def _process_result(r) -> SearchResult: r_dict: Dict[str, Any] = dict(r) - webvtt: str | None = r_dict.pop("webvtt", None) + webvtt_raw: str | None = r_dict.pop("webvtt", None) + if webvtt_raw: + webvtt = WebVTTProcessor.parse(webvtt_raw) + else: + webvtt = None + long_summary: str | None = r_dict.pop("long_summary", None) + room_name: str | None = r_dict.pop("room_name", None) db_result = SearchResultDB.model_validate(r_dict) - snippets = [] - if webvtt: - plain_text = cls._extract_webvtt_text(webvtt) - snippets = cls._generate_snippets(plain_text, params.query_text) + snippets, total_match_count = SnippetGenerator.combine_sources( + long_summary, webvtt, params.query_text, DEFAULT_MAX_SNIPPETS + ) - return SearchResult(**db_result.model_dump(), search_snippets=snippets) + return SearchResult( + **db_result.model_dump(), + room_name=room_name, + search_snippets=snippets, + total_match_count=total_match_count, + ) + + try: + results = [_process_result(r) for r in rs] + except ValidationError as e: + logger.error(f"Invalid search result data: {e}", exc_info=True) + raise HTTPException( + status_code=500, detail="Internal search result data consistency error" + ) + except Exception as e: + logger.error(f"Error processing search results: {e}", exc_info=True) + raise - results = [_process_result(r) for r in rs] return results, total search_controller = SearchController() +webvtt_processor = WebVTTProcessor() +snippet_generator = SnippetGenerator() diff --git a/server/reflector/db/transcripts.py b/server/reflector/db/transcripts.py index 7d3d6d1c..9dbcba9f 100644 --- a/server/reflector/db/transcripts.py +++ b/server/reflector/db/transcripts.py @@ -88,6 +88,8 @@ transcripts = sqlalchemy.Table( sqlalchemy.Index("idx_transcript_created_at", "created_at"), sqlalchemy.Index("idx_transcript_user_id_recording_id", "user_id", "recording_id"), sqlalchemy.Index("idx_transcript_room_id", "room_id"), + sqlalchemy.Index("idx_transcript_source_kind", "source_kind"), + sqlalchemy.Index("idx_transcript_room_id_created_at", "room_id", "created_at"), ) # Add PostgreSQL-specific full-text search column @@ -99,7 +101,8 @@ if is_postgresql(): TSVECTOR, sqlalchemy.Computed( "setweight(to_tsvector('english', coalesce(title, '')), 'A') || " - "setweight(to_tsvector('english', coalesce(webvtt, '')), 'B')", + "setweight(to_tsvector('english', coalesce(long_summary, '')), 'B') || " + "setweight(to_tsvector('english', coalesce(webvtt, '')), 'C')", persisted=True, ), ) diff --git a/server/reflector/views/transcripts.py b/server/reflector/views/transcripts.py index 49545e84..594dd711 100644 --- a/server/reflector/views/transcripts.py +++ b/server/reflector/views/transcripts.py @@ -160,6 +160,7 @@ async def transcripts_search( limit: SearchLimitParam = DEFAULT_SEARCH_LIMIT, offset: SearchOffsetParam = 0, room_id: Optional[str] = None, + source_kind: Optional[SourceKind] = None, user: Annotated[ Optional[auth.UserInfo], Depends(auth.current_user_optional) ] = None, @@ -173,7 +174,12 @@ async def transcripts_search( user_id = user["sub"] if user else None search_params = SearchParameters( - query_text=q, limit=limit, offset=offset, user_id=user_id, room_id=room_id + query_text=q, + limit=limit, + offset=offset, + user_id=user_id, + room_id=room_id, + source_kind=source_kind, ) results, total = await search_controller.search_transcripts(search_params) diff --git a/server/tests/test_search.py b/server/tests/test_search.py index c59bf631..5d2a22b0 100644 --- a/server/tests/test_search.py +++ b/server/tests/test_search.py @@ -2,13 +2,18 @@ import json from datetime import datetime, timezone +from unittest.mock import AsyncMock, patch import pytest -from pydantic import ValidationError from reflector.db import get_database -from reflector.db.search import SearchParameters, search_controller -from reflector.db.transcripts import transcripts +from reflector.db.search import ( + SearchController, + SearchParameters, + SearchResult, + search_controller, +) +from reflector.db.transcripts import SourceKind, transcripts @pytest.mark.asyncio @@ -18,39 +23,135 @@ async def test_search_postgresql_only(): assert results == [] assert total == 0 - try: - SearchParameters(query_text="") - assert False, "Should have raised validation error" - except ValidationError: - pass # Expected - - # Test that whitespace query raises validation error - try: - SearchParameters(query_text=" ") - assert False, "Should have raised validation error" - except ValidationError: - pass # Expected + params_empty = SearchParameters(query_text="") + results_empty, total_empty = await search_controller.search_transcripts( + params_empty + ) + assert isinstance(results_empty, list) + assert isinstance(total_empty, int) @pytest.mark.asyncio -async def test_search_input_validation(): - try: - SearchParameters(query_text="") - assert False, "Should have raised ValidationError" - except ValidationError: - pass # Expected +async def test_search_with_empty_query(): + """Test that empty query returns all transcripts.""" + params = SearchParameters(query_text="") + results, total = await search_controller.search_transcripts(params) + + assert isinstance(results, list) + assert isinstance(total, int) + if len(results) > 1: + for i in range(len(results) - 1): + assert results[i].created_at >= results[i + 1].created_at + + +@pytest.mark.asyncio +async def test_empty_transcript_title_only_match(): + """Test that transcripts with title-only matches return empty snippets.""" + test_id = "test-empty-9b3f2a8d" - # Test that whitespace query raises validation error try: - SearchParameters(query_text=" \t\n ") - assert False, "Should have raised ValidationError" - except ValidationError: - pass # Expected + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + + test_data = { + "id": test_id, + "name": "Empty Transcript", + "title": "Empty Meeting", + "status": "completed", + "locked": False, + "duration": 0.0, + "created_at": datetime.now(timezone.utc), + "short_summary": None, + "long_summary": None, + "topics": json.dumps([]), + "events": json.dumps([]), + "participants": json.dumps([]), + "source_language": "en", + "target_language": "en", + "reviewed": False, + "audio_location": "local", + "share_mode": "private", + "source_kind": "room", + "webvtt": None, + } + + await get_database().execute(transcripts.insert().values(**test_data)) + + params = SearchParameters(query_text="empty") + results, total = await search_controller.search_transcripts(params) + + assert total >= 1 + found = next((r for r in results if r.id == test_id), None) + assert found is not None, "Should find transcript by title match" + assert found.search_snippets == [] + assert found.total_match_count == 0 + + finally: + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + await get_database().disconnect() + + +@pytest.mark.asyncio +async def test_search_with_long_summary(): + """Test that long_summary content is searchable.""" + test_id = "test-long-summary-8a9f3c2d" + + try: + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + + test_data = { + "id": test_id, + "name": "Test Long Summary", + "title": "Regular Meeting", + "status": "completed", + "locked": False, + "duration": 1800.0, + "created_at": datetime.now(timezone.utc), + "short_summary": "Brief overview", + "long_summary": "Detailed discussion about quantum computing applications and blockchain technology integration", + "topics": json.dumps([]), + "events": json.dumps([]), + "participants": json.dumps([]), + "source_language": "en", + "target_language": "en", + "reviewed": False, + "audio_location": "local", + "share_mode": "private", + "source_kind": "room", + "webvtt": """WEBVTT + +00:00:00.000 --> 00:00:10.000 +Basic meeting content without special keywords.""", + } + + await get_database().execute(transcripts.insert().values(**test_data)) + + params = SearchParameters(query_text="quantum computing") + results, total = await search_controller.search_transcripts(params) + + assert total >= 1 + found = any(r.id == test_id for r in results) + assert found, "Should find transcript by long_summary content" + + test_result = next((r for r in results if r.id == test_id), None) + assert test_result + assert len(test_result.search_snippets) > 0 + assert "quantum computing" in test_result.search_snippets[0].lower() + + finally: + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + await get_database().disconnect() @pytest.mark.asyncio async def test_postgresql_search_with_data(): - # collision is improbable test_id = "test-search-e2e-7f3a9b2c" try: @@ -94,28 +195,24 @@ We need to implement PostgreSQL tsvector for better performance.""", await get_database().execute(transcripts.insert().values(**test_data)) - # Test 1: Search for a word in title params = SearchParameters(query_text="planning") results, total = await search_controller.search_transcripts(params) assert total >= 1 found = any(r.id == test_id for r in results) assert found, "Should find test transcript by title word" - # Test 2: Search for a word in webvtt content params = SearchParameters(query_text="tsvector") results, total = await search_controller.search_transcripts(params) assert total >= 1 found = any(r.id == test_id for r in results) assert found, "Should find test transcript by webvtt content" - # Test 3: Search with multiple words params = SearchParameters(query_text="engineering planning") results, total = await search_controller.search_transcripts(params) assert total >= 1 found = any(r.id == test_id for r in results) assert found, "Should find test transcript by multiple words" - # Test 4: Verify SearchResult structure test_result = next((r for r in results if r.id == test_id), None) if test_result: assert test_result.title == "Engineering Planning Meeting Q4 2024" @@ -123,14 +220,12 @@ We need to implement PostgreSQL tsvector for better performance.""", assert test_result.duration == 1800.0 assert 0 <= test_result.rank <= 1, "Rank should be normalized to 0-1" - # Test 5: Search with OR operator params = SearchParameters(query_text="tsvector OR nosuchword") results, total = await search_controller.search_transcripts(params) assert total >= 1 found = any(r.id == test_id for r in results) assert found, "Should find test transcript with OR query" - # Test 6: Quoted phrase search params = SearchParameters(query_text='"full-text search"') results, total = await search_controller.search_transcripts(params) assert total >= 1 @@ -142,3 +237,240 @@ We need to implement PostgreSQL tsvector for better performance.""", transcripts.delete().where(transcripts.c.id == test_id) ) await get_database().disconnect() + + +@pytest.fixture +def sample_search_params(): + """Create sample search parameters for testing.""" + return SearchParameters( + query_text="test query", + limit=20, + offset=0, + user_id="test-user", + room_id="room1", + ) + + +@pytest.fixture +def mock_db_result(): + """Create a mock database result.""" + return { + "id": "test-transcript-id", + "title": "Test Transcript", + "created_at": datetime(2024, 6, 15, tzinfo=timezone.utc), + "duration": 3600.0, + "status": "completed", + "user_id": "test-user", + "room_id": "room1", + "source_kind": SourceKind.LIVE, + "webvtt": "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\nThis is a test transcript", + "rank": 0.95, + } + + +class TestSearchParameters: + """Test SearchParameters model validation and functionality.""" + + def test_search_parameters_with_available_filters(self): + """Test creating SearchParameters with currently available filter options.""" + params = SearchParameters( + query_text="search term", + limit=50, + offset=10, + user_id="user123", + room_id="room1", + ) + + assert params.query_text == "search term" + assert params.limit == 50 + assert params.offset == 10 + assert params.user_id == "user123" + assert params.room_id == "room1" + + def test_search_parameters_defaults(self): + """Test SearchParameters with default values.""" + params = SearchParameters(query_text="test") + + assert params.query_text == "test" + assert params.limit == 20 + assert params.offset == 0 + assert params.user_id is None + assert params.room_id is None + + +class TestSearchControllerFilters: + """Test SearchController functionality with various filters.""" + + @pytest.mark.asyncio + async def test_search_with_source_kind_filter(self): + """Test search filtering by source_kind.""" + controller = SearchController() + with ( + patch("reflector.db.search.is_postgresql", return_value=True), + patch("reflector.db.search.get_database") as mock_db, + ): + mock_db.return_value.fetch_all = AsyncMock(return_value=[]) + mock_db.return_value.fetch_val = AsyncMock(return_value=0) + + params = SearchParameters(query_text="test", source_kind=SourceKind.LIVE) + + results, total = await controller.search_transcripts(params) + + assert results == [] + assert total == 0 + + mock_db.return_value.fetch_all.assert_called_once() + + @pytest.mark.asyncio + async def test_search_with_single_room_id(self): + """Test search filtering by single room ID (currently supported).""" + controller = SearchController() + with ( + patch("reflector.db.search.is_postgresql", return_value=True), + patch("reflector.db.search.get_database") as mock_db, + ): + mock_db.return_value.fetch_all = AsyncMock(return_value=[]) + mock_db.return_value.fetch_val = AsyncMock(return_value=0) + + params = SearchParameters( + query_text="test", + room_id="room1", + ) + + results, total = await controller.search_transcripts(params) + + assert results == [] + assert total == 0 + mock_db.return_value.fetch_all.assert_called_once() + + @pytest.mark.asyncio + async def test_search_result_includes_available_fields(self, mock_db_result): + """Test that search results include available fields like source_kind.""" + controller = SearchController() + with ( + patch("reflector.db.search.is_postgresql", return_value=True), + patch("reflector.db.search.get_database") as mock_db, + ): + + class MockRow: + def __init__(self, data): + self._data = data + self._mapping = data + + def __iter__(self): + return iter(self._data.items()) + + def __getitem__(self, key): + return self._data[key] + + def keys(self): + return self._data.keys() + + mock_row = MockRow(mock_db_result) + + mock_db.return_value.fetch_all = AsyncMock(return_value=[mock_row]) + mock_db.return_value.fetch_val = AsyncMock(return_value=1) + + params = SearchParameters(query_text="test") + + results, total = await controller.search_transcripts(params) + + assert total == 1 + assert len(results) == 1 + + result = results[0] + assert isinstance(result, SearchResult) + assert result.id == "test-transcript-id" + assert result.title == "Test Transcript" + assert result.rank == 0.95 + + +class TestSearchEndpointParsing: + """Test parameter parsing in the search endpoint.""" + + def test_parse_comma_separated_room_ids(self): + """Test parsing comma-separated room IDs.""" + room_ids_str = "room1,room2,room3" + parsed = [rid.strip() for rid in room_ids_str.split(",") if rid.strip()] + assert parsed == ["room1", "room2", "room3"] + + room_ids_str = "room1, room2 , room3" + parsed = [rid.strip() for rid in room_ids_str.split(",") if rid.strip()] + assert parsed == ["room1", "room2", "room3"] + + room_ids_str = "room1,,room3," + parsed = [rid.strip() for rid in room_ids_str.split(",") if rid.strip()] + assert parsed == ["room1", "room3"] + + def test_parse_source_kind(self): + """Test parsing source_kind values.""" + for kind_str in ["live", "file", "room"]: + parsed = SourceKind(kind_str) + assert parsed == SourceKind(kind_str) + + with pytest.raises(ValueError): + SourceKind("invalid_kind") + + +class TestSearchResultModel: + """Test SearchResult model and serialization.""" + + def test_search_result_with_available_fields(self): + """Test SearchResult model with currently available fields populated.""" + result = SearchResult( + id="test-id", + title="Test Title", + user_id="user-123", + room_id="room-456", + source_kind=SourceKind.ROOM, + created_at=datetime(2024, 6, 15, tzinfo=timezone.utc), + status="completed", + rank=0.85, + duration=1800.5, + search_snippets=["snippet 1", "snippet 2"], + ) + + assert result.id == "test-id" + assert result.title == "Test Title" + assert result.user_id == "user-123" + assert result.room_id == "room-456" + assert result.status == "completed" + assert result.rank == 0.85 + assert result.duration == 1800.5 + assert len(result.search_snippets) == 2 + + def test_search_result_with_optional_fields_none(self): + """Test SearchResult model with optional fields as None.""" + result = SearchResult( + id="test-id", + source_kind=SourceKind.FILE, + created_at=datetime.now(timezone.utc), + status="processing", + rank=0.5, + search_snippets=[], + title=None, + user_id=None, + room_id=None, + duration=None, + ) + + assert result.title is None + assert result.user_id is None + assert result.room_id is None + assert result.duration is None + + def test_search_result_datetime_field(self): + """Test that SearchResult accepts datetime field.""" + result = SearchResult( + id="test-id", + source_kind=SourceKind.LIVE, + created_at=datetime(2024, 6, 15, 12, 30, 45, tzinfo=timezone.utc), + status="completed", + rank=0.9, + duration=None, + search_snippets=[], + ) + + assert result.created_at == datetime( + 2024, 6, 15, 12, 30, 45, tzinfo=timezone.utc + ) diff --git a/server/tests/test_search_long_summary.py b/server/tests/test_search_long_summary.py new file mode 100644 index 00000000..fe3e9305 --- /dev/null +++ b/server/tests/test_search_long_summary.py @@ -0,0 +1,164 @@ +"""Tests for long_summary in search functionality.""" + +import json +from datetime import datetime, timezone + +import pytest + +from reflector.db import get_database +from reflector.db.search import SearchParameters, search_controller +from reflector.db.transcripts import transcripts + + +@pytest.mark.asyncio +async def test_long_summary_snippet_prioritization(): + """Test that snippets from long_summary are prioritized over webvtt content.""" + test_id = "test-snippet-priority-3f9a2b8c" + + try: + # Clean up any existing test data + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + + test_data = { + "id": test_id, + "name": "Test Snippet Priority", + "title": "Meeting About Projects", + "status": "completed", + "locked": False, + "duration": 1800.0, + "created_at": datetime.now(timezone.utc), + "short_summary": "Project discussion", + "long_summary": ( + "The team discussed advanced robotics applications including " + "autonomous navigation systems and sensor fusion techniques. " + "Robotics development will focus on real-time processing." + ), + "topics": json.dumps([]), + "events": json.dumps([]), + "participants": json.dumps([]), + "source_language": "en", + "target_language": "en", + "reviewed": False, + "audio_location": "local", + "share_mode": "private", + "source_kind": "room", + "webvtt": """WEBVTT + +00:00:00.000 --> 00:00:10.000 +We talked about many different topics today. + +00:00:10.000 --> 00:00:20.000 +The robotics project is making good progress. + +00:00:20.000 --> 00:00:30.000 +We need to consider various implementation approaches.""", + } + + await get_database().execute(transcripts.insert().values(**test_data)) + + # Search for "robotics" which appears in both long_summary and webvtt + params = SearchParameters(query_text="robotics") + results, total = await search_controller.search_transcripts(params) + + assert total >= 1 + test_result = next((r for r in results if r.id == test_id), None) + assert test_result, "Should find the test transcript" + + snippets = test_result.search_snippets + assert len(snippets) > 0, "Should have at least one snippet" + + # The first snippets should be from long_summary (more detailed content) + first_snippet = snippets[0].lower() + assert ( + "advanced robotics" in first_snippet or "autonomous" in first_snippet + ), f"First snippet should be from long_summary with detailed content. Got: {snippets[0]}" + + # With max 3 snippets, we should get both from long_summary and webvtt + assert len(snippets) <= 3, "Should respect max snippets limit" + + # All snippets should contain the search term + for snippet in snippets: + assert ( + "robotics" in snippet.lower() + ), f"Snippet should contain search term: {snippet}" + + finally: + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + await get_database().disconnect() + + +@pytest.mark.asyncio +async def test_long_summary_only_search(): + """Test searching for content that only exists in long_summary.""" + test_id = "test-long-only-8b3c9f2a" + + try: + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + + test_data = { + "id": test_id, + "name": "Test Long Only", + "title": "Standard Meeting", + "status": "completed", + "locked": False, + "duration": 1800.0, + "created_at": datetime.now(timezone.utc), + "short_summary": "Team sync", + "long_summary": ( + "Detailed analysis of cryptocurrency market trends and " + "decentralized finance protocols. Discussion included " + "yield farming strategies and liquidity pool mechanics." + ), + "topics": json.dumps([]), + "events": json.dumps([]), + "participants": json.dumps([]), + "source_language": "en", + "target_language": "en", + "reviewed": False, + "audio_location": "local", + "share_mode": "private", + "source_kind": "room", + "webvtt": """WEBVTT + +00:00:00.000 --> 00:00:10.000 +Team meeting about general project updates. + +00:00:10.000 --> 00:00:20.000 +Discussion of timeline and deliverables.""", + } + + await get_database().execute(transcripts.insert().values(**test_data)) + + # Search for terms only in long_summary + params = SearchParameters(query_text="cryptocurrency") + results, total = await search_controller.search_transcripts(params) + + found = any(r.id == test_id for r in results) + assert found, "Should find transcript by long_summary-only content" + + test_result = next((r for r in results if r.id == test_id), None) + assert test_result + assert len(test_result.search_snippets) > 0 + + # Verify the snippet is about cryptocurrency + snippet = test_result.search_snippets[0].lower() + assert "cryptocurrency" in snippet, "Snippet should contain the search term" + + # Search for "yield farming" - a more specific term + params2 = SearchParameters(query_text="yield farming") + results2, total2 = await search_controller.search_transcripts(params2) + + found2 = any(r.id == test_id for r in results2) + assert found2, "Should find transcript by specific long_summary phrase" + + finally: + await get_database().execute( + transcripts.delete().where(transcripts.c.id == test_id) + ) + await get_database().disconnect() diff --git a/server/tests/test_search_snippets.py b/server/tests/test_search_snippets.py index 33597de1..72267a1b 100644 --- a/server/tests/test_search_snippets.py +++ b/server/tests/test_search_snippets.py @@ -1,6 +1,10 @@ """Unit tests for search snippet generation.""" -from reflector.db.search import SearchController +from reflector.db.search import ( + SnippetCandidate, + SnippetGenerator, + WebVTTProcessor, +) class TestExtractWebVTT: @@ -16,7 +20,7 @@ class TestExtractWebVTT: 00:00:10.000 --> 00:00:20.000 Indeed it is a test of WebVTT parsing. """ - result = SearchController._extract_webvtt_text(webvtt) + result = WebVTTProcessor.extract_text(webvtt) assert "Hello world, this is a test" in result assert "Indeed it is a test" in result assert "= 2 # At least 2 distinct snippets + snippets = SnippetGenerator.generate(text, "Python") + assert len(snippets) >= 2 - # Each snippet should contain "Python" for snippet in snippets: assert "python" in snippet.lower() def test_single_match(self): """Test single occurrence returns one snippet.""" text = "This document discusses artificial intelligence and its applications." - snippets = SearchController._generate_snippets(text, "artificial intelligence") + snippets = SnippetGenerator.generate(text, "artificial intelligence") assert len(snippets) == 1 assert "artificial intelligence" in snippets[0].lower() @@ -70,24 +70,22 @@ class TestGenerateSnippets: def test_no_matches(self): """Test no matches returns empty list.""" text = "This is some random text without the search term." - snippets = SearchController._generate_snippets(text, "machine learning") + snippets = SnippetGenerator.generate(text, "machine learning") assert snippets == [] def test_case_insensitive_search(self): """Test search is case insensitive.""" - # Add enough text between matches to get separate snippets text = ( "MACHINE LEARNING is important for modern applications. " - + "It requires lots of data and computational resources. " * 5 # Padding + + "It requires lots of data and computational resources. " * 5 + "Machine Learning rocks and transforms industries. " - + "Deep learning is a subset of it. " * 5 # More padding + + "Deep learning is a subset of it. " * 5 + "Finally, machine learning will shape our future." ) - snippets = SearchController._generate_snippets(text, "machine learning") + snippets = SnippetGenerator.generate(text, "machine learning") - # Should find at least 2 (might be 3 if text is long enough) assert len(snippets) >= 2 for snippet in snippets: assert "machine learning" in snippet.lower() @@ -95,61 +93,55 @@ class TestGenerateSnippets: def test_partial_match_fallback(self): """Test fallback to first word when exact phrase not found.""" text = "We use machine intelligence for processing." - snippets = SearchController._generate_snippets(text, "machine learning") + snippets = SnippetGenerator.generate(text, "machine learning") - # Should fall back to finding "machine" assert len(snippets) == 1 assert "machine" in snippets[0].lower() def test_snippet_ellipsis(self): """Test ellipsis added for truncated snippets.""" - # Long text where match is in the middle text = "a " * 100 + "TARGET_WORD special content here" + " b" * 100 - snippets = SearchController._generate_snippets(text, "TARGET_WORD") + snippets = SnippetGenerator.generate(text, "TARGET_WORD") assert len(snippets) == 1 - assert "..." in snippets[0] # Should have ellipsis + assert "..." in snippets[0] assert "TARGET_WORD" in snippets[0] def test_overlapping_snippets_deduplicated(self): """Test overlapping matches don't create duplicate snippets.""" - text = "test test test word" * 10 # Repeated pattern - snippets = SearchController._generate_snippets(text, "test") + text = "test test test word" * 10 + snippets = SnippetGenerator.generate(text, "test") - # Should get unique snippets, not duplicates assert len(snippets) <= 3 - assert len(snippets) == len(set(snippets)) # All unique + assert len(snippets) == len(set(snippets)) def test_empty_inputs(self): """Test empty text or search term returns empty list.""" - assert SearchController._generate_snippets("", "search") == [] - assert SearchController._generate_snippets("text", "") == [] - assert SearchController._generate_snippets("", "") == [] + assert SnippetGenerator.generate("", "search") == [] + assert SnippetGenerator.generate("text", "") == [] + assert SnippetGenerator.generate("", "") == [] def test_max_snippets_limit(self): """Test respects max_snippets parameter.""" - # Create text with well-separated occurrences - separator = " filler " * 50 # Ensure snippets don't overlap - text = ("Python is amazing" + separator) * 10 # 10 occurrences + separator = " filler " * 50 + text = ("Python is amazing" + separator) * 10 - # Test with different limits - snippets_1 = SearchController._generate_snippets(text, "Python", max_snippets=1) + snippets_1 = SnippetGenerator.generate(text, "Python", max_snippets=1) assert len(snippets_1) == 1 - snippets_2 = SearchController._generate_snippets(text, "Python", max_snippets=2) + snippets_2 = SnippetGenerator.generate(text, "Python", max_snippets=2) assert len(snippets_2) == 2 - snippets_5 = SearchController._generate_snippets(text, "Python", max_snippets=5) - assert len(snippets_5) == 5 # Should get exactly 5 with enough separation + snippets_5 = SnippetGenerator.generate(text, "Python", max_snippets=5) + assert len(snippets_5) == 5 def test_snippet_length(self): """Test snippet length is reasonable.""" - text = "word " * 200 # Long text - snippets = SearchController._generate_snippets(text, "word") + text = "word " * 200 + snippets = SnippetGenerator.generate(text, "word") for snippet in snippets: - # Default max_length is 150 + some context - assert len(snippet) <= 200 # Some buffer for ellipsis + assert len(snippet) <= 200 class TestFullPipeline: @@ -157,7 +149,6 @@ class TestFullPipeline: def test_webvtt_to_snippets_integration(self): """Test full pipeline from WebVTT to search snippets.""" - # Create WebVTT with well-separated content for multiple snippets webvtt = ( """WEBVTT @@ -182,17 +173,362 @@ class TestFullPipeline: """ ) - # Extract and generate snippets - plain_text = SearchController._extract_webvtt_text(webvtt) - snippets = SearchController._generate_snippets(plain_text, "machine learning") + plain_text = WebVTTProcessor.extract_text(webvtt) + snippets = SnippetGenerator.generate(plain_text, "machine learning") - # Should find at least 2 snippets (text might still be close together) - assert len(snippets) >= 1 # At minimum one snippet containing matches - assert len(snippets) <= 3 # At most 3 by default + assert len(snippets) >= 1 + assert len(snippets) <= 3 - # No WebVTT artifacts in snippets for snippet in snippets: assert "machine learning" in snippet.lower() assert "" not in snippet + + +class TestMultiWordQueryBehavior: + """Tests for multi-word query behavior and exact phrase matching.""" + + def test_multi_word_query_snippet_behavior(self): + """Test that multi-word queries generate snippets based on exact phrase matching.""" + sample_text = """This is a sample transcript where user Alice is talking. + Later in the conversation, jordan mentions something important. + The user jordan collaboration was successful. + Another user named Bob joins the discussion.""" + + user_snippets = SnippetGenerator.generate(sample_text, "user") + assert len(user_snippets) == 2, "Should find 2 snippets for 'user'" + + jordan_snippets = SnippetGenerator.generate(sample_text, "jordan") + assert len(jordan_snippets) >= 1, "Should find at least 1 snippet for 'jordan'" + + multi_word_snippets = SnippetGenerator.generate(sample_text, "user jordan") + assert len(multi_word_snippets) == 1, ( + "Should return exactly 1 snippet for 'user jordan' " + "(only the exact phrase match, not individual word occurrences)" + ) + + snippet = multi_word_snippets[0] + assert ( + "user jordan" in snippet.lower() + ), "The snippet should contain the exact phrase 'user jordan'" + + assert ( + "alice" not in snippet.lower() + ), "The snippet should not include the first standalone 'user' with Alice" + + def test_multi_word_query_without_exact_match(self): + """Test snippet generation when exact phrase is not found.""" + sample_text = """User Alice is here. Bob and jordan are talking. + Later jordan mentions something. The user is happy.""" + + snippets = SnippetGenerator.generate(sample_text, "user jordan") + + assert ( + len(snippets) >= 1 + ), "Should find at least 1 snippet when falling back to first word" + + all_snippets_text = " ".join(snippets).lower() + assert ( + "user" in all_snippets_text + ), "Snippets should contain 'user' (the first word)" + + def test_exact_phrase_at_text_boundaries(self): + """Test snippet generation when exact phrase appears at text boundaries.""" + + text_start = "user jordan started the meeting. Other content here." + snippets = SnippetGenerator.generate(text_start, "user jordan") + assert len(snippets) == 1 + assert "user jordan" in snippets[0].lower() + + text_end = "Other content here. The meeting ended with user jordan" + snippets = SnippetGenerator.generate(text_end, "user jordan") + assert len(snippets) == 1 + assert "user jordan" in snippets[0].lower() + + def test_multi_word_query_matches_words_appearing_separately_and_together(self): + """Test that multi-word queries prioritize exact phrase matches over individual word occurrences.""" + sample_text = """This is a sample transcript where user Alice is talking. + Later in the conversation, jordan mentions something important. + The user jordan collaboration was successful. + Another user named Bob joins the discussion.""" + + search_query = "user jordan" + snippets = SnippetGenerator.generate(sample_text, search_query) + + assert len(snippets) == 1, ( + f"Expected exactly 1 snippet for '{search_query}' when exact phrase exists, " + f"got {len(snippets)}. Should ignore individual word occurrences." + ) + + snippet = snippets[0] + + assert ( + search_query in snippet.lower() + ), f"Snippet should contain the exact phrase '{search_query}'. Got: {snippet}" + + assert ( + "jordan mentions" in snippet.lower() + ), f"Snippet should include context before the exact phrase match. Got: {snippet}" + + assert ( + "alice" not in snippet.lower() + ), f"Snippet should not include separate occurrences of individual words. Got: {snippet}" + + text_2 = """The alpha version was released. + Beta testing started yesterday. + The alpha beta integration is complete.""" + + snippets_2 = SnippetGenerator.generate(text_2, "alpha beta") + assert len(snippets_2) == 1, "Should return 1 snippet for exact phrase match" + assert "alpha beta" in snippets_2[0].lower(), "Should contain exact phrase" + assert ( + "version" not in snippets_2[0].lower() + ), "Should not include first separate occurrence" + + +class TestSnippetGenerationEnhanced: + """Additional snippet generation tests from test_search_enhancements.py.""" + + def test_snippet_generation_from_webvtt(self): + """Test snippet generation from WebVTT content.""" + webvtt_content = """WEBVTT + +00:00:00.000 --> 00:00:05.000 +This is the beginning of the transcript + +00:00:05.000 --> 00:00:10.000 +The search term appears here in the middle + +00:00:10.000 --> 00:00:15.000 +And this is the end of the content""" + + plain_text = WebVTTProcessor.extract_text(webvtt_content) + snippets = SnippetGenerator.generate(plain_text, "search term") + + assert len(snippets) > 0 + assert any("search term" in snippet.lower() for snippet in snippets) + + def test_extract_webvtt_text_with_malformed_variations(self): + """Test WebVTT extraction with various malformed content.""" + malformed_vtt = "This is not valid WebVTT content" + result = WebVTTProcessor.extract_text(malformed_vtt) + assert result == "" + + partial_vtt = "WEBVTT\nNo timestamps here" + result = WebVTTProcessor.extract_text(partial_vtt) + assert result == "" or "No timestamps" not in result + + +class TestPureFunctions: + """Test the pure functions extracted for functional programming.""" + + def test_find_all_matches(self): + """Test finding all match positions in text.""" + text = "Python is great. Python is powerful. I love Python." + matches = list(SnippetGenerator.find_all_matches(text, "Python")) + assert matches == [0, 17, 44] + + matches = list(SnippetGenerator.find_all_matches(text, "python")) + assert matches == [0, 17, 44] + + matches = list(SnippetGenerator.find_all_matches(text, "Ruby")) + assert matches == [] + + matches = list(SnippetGenerator.find_all_matches("", "test")) + assert matches == [] + matches = list(SnippetGenerator.find_all_matches("test", "")) + assert matches == [] + + def test_create_snippet(self): + """Test creating a snippet from a match position.""" + text = "This is a long text with the word Python in the middle and more text after." + + snippet = SnippetGenerator.create_snippet(text, 35, max_length=150) + assert "Python" in snippet.text() + assert snippet.start >= 0 + assert snippet.end <= len(text) + assert isinstance(snippet, SnippetCandidate) + + assert len(snippet.text()) > 0 + assert snippet.start <= snippet.end + + long_text = "A" * 200 + snippet = SnippetGenerator.create_snippet(long_text, 100, max_length=50) + assert snippet.text().startswith("...") + assert snippet.text().endswith("...") + + snippet = SnippetGenerator.create_snippet("short text", 0, max_length=100) + assert snippet.start == 0 + assert "short text" in snippet.text() + + def test_filter_non_overlapping(self): + """Test filtering overlapping snippets.""" + candidates = [ + SnippetCandidate(_text="First snippet", start=0, _original_text_length=100), + SnippetCandidate(_text="Overlapping", start=10, _original_text_length=100), + SnippetCandidate( + _text="Third snippet", start=40, _original_text_length=100 + ), + SnippetCandidate( + _text="Fourth snippet", start=65, _original_text_length=100 + ), + ] + + filtered = list(SnippetGenerator.filter_non_overlapping(iter(candidates))) + assert filtered == [ + "First snippet...", + "...Third snippet...", + "...Fourth snippet...", + ] + + filtered = list(SnippetGenerator.filter_non_overlapping(iter([]))) + assert filtered == [] + + def test_generate_integration(self): + """Test the main SnippetGenerator.generate function.""" + text = "Machine learning is amazing. Machine learning transforms data. Learn machine learning today." + + snippets = SnippetGenerator.generate(text, "machine learning") + assert len(snippets) <= 3 + assert all("machine learning" in s.lower() for s in snippets) + + snippets = SnippetGenerator.generate(text, "machine learning", max_snippets=2) + assert len(snippets) <= 2 + + snippets = SnippetGenerator.generate(text, "machine vision") + assert len(snippets) > 0 + assert any("machine" in s.lower() for s in snippets) + + def test_extract_webvtt_text_basic(self): + """Test WebVTT text extraction (basic test, full tests exist elsewhere).""" + webvtt = """WEBVTT + +00:00:00.000 --> 00:00:02.000 +Hello world + +00:00:02.000 --> 00:00:04.000 +This is a test""" + + result = WebVTTProcessor.extract_text(webvtt) + assert "Hello world" in result + assert "This is a test" in result + + # Test empty input + assert WebVTTProcessor.extract_text("") == "" + assert WebVTTProcessor.extract_text(None) == "" + + def test_generate_webvtt_snippets(self): + """Test generating snippets from WebVTT content.""" + webvtt = """WEBVTT + +00:00:00.000 --> 00:00:02.000 +Python programming is great + +00:00:02.000 --> 00:00:04.000 +Learn Python today""" + + snippets = WebVTTProcessor.generate_snippets(webvtt, "Python") + assert len(snippets) > 0 + assert any("Python" in s for s in snippets) + + snippets = WebVTTProcessor.generate_snippets("", "Python") + assert snippets == [] + + def test_from_summary(self): + """Test generating snippets from summary text.""" + summary = "This meeting discussed Python development and machine learning applications." + + snippets = SnippetGenerator.from_summary(summary, "Python") + assert len(snippets) > 0 + assert any("Python" in s for s in snippets) + + long_summary = "Python " * 20 + snippets = SnippetGenerator.from_summary(long_summary, "Python") + assert len(snippets) <= 2 + + def test_combine_sources(self): + """Test combining snippets from multiple sources.""" + summary = "Python is a great programming language." + webvtt = """WEBVTT + +00:00:00.000 --> 00:00:02.000 +Learn Python programming + +00:00:02.000 --> 00:00:04.000 +Python is powerful""" + + snippets, total_count = SnippetGenerator.combine_sources( + summary, webvtt, "Python", max_total=3 + ) + assert len(snippets) <= 3 + assert len(snippets) > 0 + assert total_count > 0 + + snippets, total_count = SnippetGenerator.combine_sources( + summary, None, "Python", max_total=3 + ) + assert len(snippets) > 0 + assert all("Python" in s for s in snippets) + assert total_count == 1 + + snippets, total_count = SnippetGenerator.combine_sources( + None, webvtt, "Python", max_total=3 + ) + assert len(snippets) > 0 + assert total_count == 2 + + long_summary = "Python " * 10 + snippets, total_count = SnippetGenerator.combine_sources( + long_summary, webvtt, "Python", max_total=2 + ) + assert len(snippets) == 2 + assert total_count >= 10 + + def test_match_counting_sum_logic(self): + """Test that match counting correctly sums matches from both sources.""" + summary = "data science uses data analysis and data mining techniques" + webvtt = """WEBVTT + +00:00:00.000 --> 00:00:02.000 +Big data processing + +00:00:02.000 --> 00:00:04.000 +data visualization and data storage""" + + snippets, total_count = SnippetGenerator.combine_sources( + summary, webvtt, "data", max_total=3 + ) + assert total_count == 6 + assert len(snippets) <= 3 + + summary_snippets, summary_count = SnippetGenerator.combine_sources( + summary, None, "data", max_total=3 + ) + assert summary_count == 3 + + webvtt_snippets, webvtt_count = SnippetGenerator.combine_sources( + None, webvtt, "data", max_total=3 + ) + assert webvtt_count == 3 + + snippets_empty, count_empty = SnippetGenerator.combine_sources( + None, None, "data", max_total=3 + ) + assert snippets_empty == [] + assert count_empty == 0 + + def test_edge_cases(self): + """Test edge cases for the pure functions.""" + text = "Test with special: @#$%^&*() characters" + snippets = SnippetGenerator.generate(text, "@#$%") + assert len(snippets) > 0 + + long_query = "a" * 100 + snippets = SnippetGenerator.generate("Some text", long_query) + assert snippets == [] + + text = "Unicode test: café, naïve, 日本語" + snippets = SnippetGenerator.generate(text, "café") + assert len(snippets) > 0 + assert "café" in snippets[0] diff --git a/www/app/(app)/browse/_components/Pagination.tsx b/www/app/(app)/browse/_components/Pagination.tsx index 9997a6f3..06928aad 100644 --- a/www/app/(app)/browse/_components/Pagination.tsx +++ b/www/app/(app)/browse/_components/Pagination.tsx @@ -1,26 +1,67 @@ -import React from "react"; +import React, { useEffect } from "react"; import { Pagination, IconButton, ButtonGroup } from "@chakra-ui/react"; import { LuChevronLeft, LuChevronRight } from "react-icons/lu"; +// explicitly 1-based to prevent +/-1-confusion errors +export const FIRST_PAGE = 1 as PaginationPage; +export const parsePaginationPage = ( + page: number, +): + | { + value: PaginationPage; + } + | { + error: string; + } => { + if (page < FIRST_PAGE) + return { + error: "Page must be greater than 0", + }; + if (!Number.isInteger(page)) + return { + error: "Page must be an integer", + }; + return { + value: page as PaginationPage, + }; +}; +export type PaginationPage = number & { __brand: "PaginationPage" }; +export const PaginationPage = (page: number): PaginationPage => { + const v = parsePaginationPage(page); + if ("error" in v) throw new Error(v.error); + return v.value; +}; + +export const paginationPageTo0Based = (page: PaginationPage): number => + page - FIRST_PAGE; + type PaginationProps = { - page: number; - setPage: (page: number) => void; + page: PaginationPage; + setPage: (page: PaginationPage) => void; total: number; size: number; }; +export const totalPages = (total: number, size: number) => { + return Math.ceil(total / size); +}; + export default function PaginationComponent(props: PaginationProps) { const { page, setPage, total, size } = props; - const totalPages = Math.ceil(total / size); - - if (totalPages <= 1) return null; + useEffect(() => { + if (page > totalPages(total, size)) { + console.error( + `Page number (${page}) is greater than total pages (${totalPages}) in pagination`, + ); + } + }, [page, totalPages(total, size)]); return ( setPage(details.page)} + onPageChange={(details) => setPage(PaginationPage(details.page))} style={{ display: "flex", justifyContent: "center" }} > diff --git a/www/app/(app)/browse/_components/SearchBar.tsx b/www/app/(app)/browse/_components/SearchBar.tsx deleted file mode 100644 index 2ff64f32..00000000 --- a/www/app/(app)/browse/_components/SearchBar.tsx +++ /dev/null @@ -1,34 +0,0 @@ -import React, { useState } from "react"; -import { Flex, Input, Button } from "@chakra-ui/react"; - -interface SearchBarProps { - onSearch: (searchTerm: string) => void; -} - -export default function SearchBar({ onSearch }: SearchBarProps) { - const [searchInputValue, setSearchInputValue] = useState(""); - - const handleSearch = () => { - onSearch(searchInputValue); - }; - - const handleKeyDown = (event: React.KeyboardEvent) => { - if (event.key === "Enter") { - handleSearch(); - } - }; - - return ( - - setSearchInputValue(e.target.value)} - onKeyDown={handleKeyDown} - /> - - - ); -} diff --git a/www/app/(app)/browse/_components/TranscriptActionsMenu.tsx b/www/app/(app)/browse/_components/TranscriptActionsMenu.tsx index 77bbd647..1119a4b7 100644 --- a/www/app/(app)/browse/_components/TranscriptActionsMenu.tsx +++ b/www/app/(app)/browse/_components/TranscriptActionsMenu.tsx @@ -4,8 +4,8 @@ import { LuMenu, LuTrash, LuRotateCw } from "react-icons/lu"; interface TranscriptActionsMenuProps { transcriptId: string; - onDelete: (transcriptId: string) => (e: any) => void; - onReprocess: (transcriptId: string) => (e: any) => void; + onDelete: (transcriptId: string) => void; + onReprocess: (transcriptId: string) => void; } export default function TranscriptActionsMenu({ @@ -24,11 +24,17 @@ export default function TranscriptActionsMenu({ onReprocess(transcriptId)(e)} + onClick={() => onReprocess(transcriptId)} > Reprocess - onDelete(transcriptId)(e)}> + { + e.stopPropagation(); + onDelete(transcriptId); + }} + > Delete diff --git a/www/app/(app)/browse/_components/TranscriptCards.tsx b/www/app/(app)/browse/_components/TranscriptCards.tsx index 54c78ec1..b67e71e7 100644 --- a/www/app/(app)/browse/_components/TranscriptCards.tsx +++ b/www/app/(app)/browse/_components/TranscriptCards.tsx @@ -1,27 +1,290 @@ -import React from "react"; -import { Box, Stack, Text, Flex, Link, Spinner } from "@chakra-ui/react"; +import React, { useState } from "react"; +import { + Box, + Stack, + Text, + Flex, + Link, + Spinner, + Badge, + HStack, + VStack, +} from "@chakra-ui/react"; import NextLink from "next/link"; -import { GetTranscriptMinimal } from "../../../api"; import { formatTimeMs, formatLocalDate } from "../../../lib/time"; import TranscriptStatusIcon from "./TranscriptStatusIcon"; import TranscriptActionsMenu from "./TranscriptActionsMenu"; +import { + highlightMatches, + generateTextFragment, +} from "../../../lib/textHighlight"; +import { SearchResult } from "../../../api"; interface TranscriptCardsProps { - transcripts: GetTranscriptMinimal[]; - onDelete: (transcriptId: string) => (e: any) => void; - onReprocess: (transcriptId: string) => (e: any) => void; - loading?: boolean; + results: SearchResult[]; + query: string; + isLoading?: boolean; + onDelete: (transcriptId: string) => void; + onReprocess: (transcriptId: string) => void; +} + +function highlightText(text: string, query: string): React.ReactNode { + if (!query) return text; + + const matches = highlightMatches(text, query); + + if (matches.length === 0) return text; + + // Sort matches by index to process them in order + const sortedMatches = [...matches].sort((a, b) => a.index - b.index); + + const parts: React.ReactNode[] = []; + let lastIndex = 0; + + sortedMatches.forEach((match, i) => { + // Add text before the match + if (match.index > lastIndex) { + parts.push( + + {text.slice(lastIndex, match.index)} + , + ); + } + + // Add the highlighted match + parts.push( + + {match.match} + , + ); + + lastIndex = match.index + match.match.length; + }); + + // Add remaining text after last match + if (lastIndex < text.length) { + parts.push( + + {text.slice(lastIndex)} + , + ); + } + + return parts; +} + +const transcriptHref = ( + transcriptId: string, + mainSnippet: string, + query: string, +): `/transcripts/${string}` => { + const urlTextFragment = mainSnippet + ? generateTextFragment(mainSnippet, query) + : null; + const urlTextFragmentWithHash = urlTextFragment + ? `#${urlTextFragment.k}=${encodeURIComponent(urlTextFragment.v)}` + : ""; + return `/transcripts/${transcriptId}${urlTextFragmentWithHash}`; +}; + +// note that it's strongly tied to search logic - in case you want to use it independently, refactor +function TranscriptCard({ + result, + query, + onDelete, + onReprocess, +}: { + result: SearchResult; + query: string; + onDelete: (transcriptId: string) => void; + onReprocess: (transcriptId: string) => void; +}) { + const [isExpanded, setIsExpanded] = useState(false); + + const mainSnippet = result.search_snippets[0]; + const additionalSnippets = result.search_snippets.slice(1); + const totalMatches = result.total_match_count || 0; + const snippetsShown = result.search_snippets.length; + const remainingMatches = totalMatches - snippetsShown; + const hasAdditionalSnippets = additionalSnippets.length > 0; + const resultTitle = result.title || "Unnamed Transcript"; + + const formattedDuration = result.duration + ? formatTimeMs(result.duration) + : "N/A"; + const formattedDate = formatLocalDate(result.created_at); + const source = + result.source_kind === "room" + ? result.room_name || result.room_id + : result.source_kind; + + const handleExpandClick = (e: React.MouseEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsExpanded(!isExpanded); + }; + + return ( + + + + + + + {/* Title with highlighting and text fragment for deep linking */} + + {highlightText(resultTitle, query)} + + + {/* Metadata - Horizontal on desktop, vertical on mobile */} + + + + Source: + + {source} + + + • + + + + Date: + + {formattedDate} + + + • + + + + Duration: + + {formattedDuration} + + + + {/* Search Results Section - only show when searching */} + {mainSnippet && ( + <> + {/* Main Snippet */} + + + {highlightText(mainSnippet, query)} + + + + {hasAdditionalSnippets && ( + <> + + + + {remainingMatches > 0 + ? `${additionalSnippets.length + remainingMatches}+` + : additionalSnippets.length} + + + more{" "} + {additionalSnippets.length + remainingMatches === 1 + ? "match" + : "matches"} + {remainingMatches > 0 && + ` (${additionalSnippets.length} shown)`} + + + + {isExpanded ? "▲" : "▼"} + + + + {/* Additional Snippets */} + {isExpanded && ( + + {additionalSnippets.map((snippet, index) => ( + + + {highlightText(snippet, query)} + + + ))} + + )} + + )} + + )} + + + + + ); } export default function TranscriptCards({ - transcripts, + results, + query, + isLoading, onDelete, onReprocess, - loading, }: TranscriptCardsProps) { return ( - - {loading && ( + + {isLoading && ( )} - - {transcripts.map((item) => ( - - - - - - - - {item.title || "Unnamed Transcript"} - - - Source:{" "} - {item.source_kind === "room" - ? item.room_name - : item.source_kind} - - Date: {formatLocalDate(item.created_at)} - Duration: {formatTimeMs(item.duration)} - - - - + + {results.map((result) => ( + ))} diff --git a/www/app/(app)/browse/_components/TranscriptTable.tsx b/www/app/(app)/browse/_components/TranscriptTable.tsx deleted file mode 100644 index 16bd791f..00000000 --- a/www/app/(app)/browse/_components/TranscriptTable.tsx +++ /dev/null @@ -1,99 +0,0 @@ -import React from "react"; -import { Box, Table, Link, Flex, Spinner } from "@chakra-ui/react"; -import NextLink from "next/link"; -import { GetTranscriptMinimal } from "../../../api"; -import { formatTimeMs, formatLocalDate } from "../../../lib/time"; -import TranscriptStatusIcon from "./TranscriptStatusIcon"; -import TranscriptActionsMenu from "./TranscriptActionsMenu"; - -interface TranscriptTableProps { - transcripts: GetTranscriptMinimal[]; - onDelete: (transcriptId: string) => (e: any) => void; - onReprocess: (transcriptId: string) => (e: any) => void; - loading?: boolean; -} - -export default function TranscriptTable({ - transcripts, - onDelete, - onReprocess, - loading, -}: TranscriptTableProps) { - return ( - - {loading && ( - - - - )} - - - - - - - Transcription Title - - - Source - - - Date - - - Duration - - - - - - {transcripts.map((item) => ( - - - - - - - {item.title || "Unnamed Transcript"} - - - - {item.source_kind === "room" - ? item.room_name - : item.source_kind} - - {formatLocalDate(item.created_at)} - {formatTimeMs(item.duration)} - - - - - ))} - - - - - ); -} diff --git a/www/app/(app)/browse/page.tsx b/www/app/(app)/browse/page.tsx index 97d876a1..e7522e14 100644 --- a/www/app/(app)/browse/page.tsx +++ b/www/app/(app)/browse/page.tsx @@ -1,33 +1,264 @@ "use client"; import React, { useState, useEffect } from "react"; -import { Flex, Spinner, Heading, Text, Link } from "@chakra-ui/react"; -import useTranscriptList from "../transcripts/useTranscriptList"; +import { + Flex, + Spinner, + Heading, + Text, + Link, + Box, + Stack, + Input, + Button, + IconButton, +} from "@chakra-ui/react"; +import { + useQueryState, + parseAsString, + parseAsInteger, + parseAsStringLiteral, +} from "nuqs"; +import { LuX } from "react-icons/lu"; +import { useSearchTranscripts } from "../transcripts/useSearchTranscripts"; import useSessionUser from "../../lib/useSessionUser"; -import { Room } from "../../api"; -import Pagination from "./_components/Pagination"; +import { Room, SourceKind, SearchResult, $SourceKind } from "../../api"; import useApi from "../../lib/useApi"; import { useError } from "../../(errors)/errorContext"; -import { SourceKind } from "../../api"; import FilterSidebar from "./_components/FilterSidebar"; -import SearchBar from "./_components/SearchBar"; -import TranscriptTable from "./_components/TranscriptTable"; +import Pagination, { + FIRST_PAGE, + PaginationPage, + parsePaginationPage, + totalPages as getTotalPages, +} from "./_components/Pagination"; import TranscriptCards from "./_components/TranscriptCards"; import DeleteTranscriptDialog from "./_components/DeleteTranscriptDialog"; import { formatLocalDate } from "../../lib/time"; +import { RECORD_A_MEETING_URL } from "../../api/urls"; + +const SEARCH_FORM_QUERY_INPUT_NAME = "query" as const; + +const usePrefetchRooms = (setRooms: (rooms: Room[]) => void): void => { + const { setError } = useError(); + const api = useApi(); + useEffect(() => { + if (!api) return; + api + .v1RoomsList({ page: 1 }) + .then((rooms) => setRooms(rooms.items)) + .catch((err) => setError(err, "There was an error fetching the rooms")); + }, [api, setError]); +}; + +const SearchForm: React.FC<{ + setPage: (page: PaginationPage) => void; + sourceKind: SourceKind | null; + roomId: string | null; + setSourceKind: (sourceKind: SourceKind | null) => void; + setRoomId: (roomId: string | null) => void; + rooms: Room[]; + searchQuery: string | null; + setSearchQuery: (query: string | null) => void; +}> = ({ + setPage, + sourceKind, + roomId, + setRoomId, + setSourceKind, + rooms, + searchQuery, + setSearchQuery, +}) => { + // to keep the search input controllable + more fine grained control (urlSearchQuery is updated on submits) + const [searchInputValue, setSearchInputValue] = useState(searchQuery || ""); + const handleSearchQuerySubmit = async (d: FormData) => { + await setSearchQuery((d.get(SEARCH_FORM_QUERY_INPUT_NAME) as string) || ""); + }; + + const handleClearSearch = () => { + setSearchInputValue(""); + setSearchQuery(null); + setPage(FIRST_PAGE); + }; + return ( + +
+ + + setSearchInputValue(e.target.value)} + name={SEARCH_FORM_QUERY_INPUT_NAME} + pr={searchQuery ? "2.5rem" : undefined} + /> + {searchQuery && ( + + + + )} + + + +
+ +
+ ); +}; + +const UnderSearchFormFilterIndicators: React.FC<{ + sourceKind: SourceKind | null; + roomId: string | null; + setSourceKind: (sourceKind: SourceKind | null) => void; + setRoomId: (roomId: string | null) => void; + rooms: Room[]; +}> = ({ sourceKind, roomId, setRoomId, setSourceKind, rooms }) => { + return ( + <> + {(sourceKind || roomId) && ( + + + Active filters: + + {sourceKind && ( + + + {roomId + ? `Room: ${ + rooms.find((r) => r.id === roomId)?.name || roomId + }` + : `Source: ${sourceKind}`} + + + + )} + + )} + + ); +}; + +const EmptyResult: React.FC<{ + searchQuery: string; +}> = ({ searchQuery }) => { + return ( + + + {searchQuery + ? `No results found for "${searchQuery}". Try adjusting your search terms.` + : "No transcripts found, but you can "} + {!searchQuery && ( + <> + + record a meeting + + {" to get started."} + + )} + + + ); +}; export default function TranscriptBrowser() { - const [selectedSourceKind, setSelectedSourceKind] = - useState(null); - const [selectedRoomId, setSelectedRoomId] = useState(""); - const [rooms, setRooms] = useState([]); - const [page, setPage] = useState(1); - const [searchTerm, setSearchTerm] = useState(""); - const { loading, response, refetch } = useTranscriptList( - page, - selectedSourceKind, - selectedRoomId, - searchTerm, + const [urlSearchQuery, setUrlSearchQuery] = useQueryState( + "q", + parseAsString.withDefault("").withOptions({ shallow: false }), ); + + const [urlSourceKind, setUrlSourceKind] = useQueryState( + "source", + parseAsStringLiteral($SourceKind.enum).withOptions({ + shallow: false, + }), + ); + const [urlRoomId, setUrlRoomId] = useQueryState( + "room", + parseAsString.withDefault("").withOptions({ shallow: false }), + ); + + const [urlPage, setPage] = useQueryState( + "page", + parseAsInteger.withDefault(1).withOptions({ shallow: false }), + ); + + const [page, _setSafePage] = useState(FIRST_PAGE); + + // safety net + useEffect(() => { + const maybePage = parsePaginationPage(urlPage); + if ("error" in maybePage) { + setPage(FIRST_PAGE).then(() => { + /*may be called n times we dont care*/ + }); + return; + } + _setSafePage(maybePage.value); + }, [urlPage]); + + const [rooms, setRooms] = useState([]); + + const pageSize = 20; + const { + results, + totalCount: totalResults, + isLoading, + reload, + } = useSearchTranscripts( + urlSearchQuery, + { + roomIds: urlRoomId ? [urlRoomId] : null, + sourceKind: urlSourceKind, + }, + { + pageSize, + page, + }, + ); + + const totalPages = getTotalPages(totalResults, pageSize); + const userName = useSessionUser().name; const [deletionLoading, setDeletionLoading] = useState(false); const api = useApi(); @@ -35,37 +266,73 @@ export default function TranscriptBrowser() { const cancelRef = React.useRef(null); const [transcriptToDeleteId, setTranscriptToDeleteId] = React.useState(); - const [deletedItemIds, setDeletedItemIds] = React.useState(); - useEffect(() => { - setDeletedItemIds([]); - }, [page, response]); - - useEffect(() => { - if (!api) return; - api - .v1RoomsList({ page: 1 }) - .then((rooms) => setRooms(rooms.items)) - .catch((err) => setError(err, "There was an error fetching the rooms")); - }, [api]); + usePrefetchRooms(setRooms); const handleFilterTranscripts = ( sourceKind: SourceKind | null, roomId: string, ) => { - setSelectedSourceKind(sourceKind); - setSelectedRoomId(roomId); + setUrlSourceKind(sourceKind); + setUrlRoomId(roomId); setPage(1); }; - const handleSearch = (searchTerm: string) => { - setPage(1); - setSearchTerm(searchTerm); - setSelectedSourceKind(null); - setSelectedRoomId(""); + const onCloseDeletion = () => setTranscriptToDeleteId(undefined); + + const confirmDeleteTranscript = (transcriptId: string) => { + if (!api || deletionLoading) return; + setDeletionLoading(true); + api + .v1TranscriptDelete({ transcriptId }) + .then(() => { + setDeletionLoading(false); + onCloseDeletion(); + reload(); + }) + .catch((err) => { + setDeletionLoading(false); + setError(err, "There was an error deleting the transcript"); + }); }; - if (loading && !response) + const handleProcessTranscript = (transcriptId: string) => { + if (!api) { + console.error("API not available on handleProcessTranscript"); + return; + } + api + .v1TranscriptProcess({ transcriptId }) + .then((result) => { + const status = + result && typeof result === "object" && "status" in result + ? (result as { status: string }).status + : undefined; + if (status === "already running") { + setError( + new Error("Processing is already running, please wait"), + "Processing is already running, please wait", + ); + } + }) + .catch((err) => { + setError(err, "There was an error processing the transcript"); + }); + }; + + const transcriptToDelete = results?.find( + (i) => i.id === transcriptToDeleteId, + ); + const dialogTitle = transcriptToDelete?.title || "Unnamed Transcript"; + const dialogDate = transcriptToDelete?.created_at + ? formatLocalDate(transcriptToDelete.created_at) + : undefined; + const dialogSource = + transcriptToDelete?.source_kind === "room" && transcriptToDelete?.room_id + ? transcriptToDelete.room_name || transcriptToDelete.room_id + : transcriptToDelete?.source_kind; + + if (isLoading && results.length === 0) { return ( ); - - if (!loading && !response) - return ( - - - No transcripts found, but you can  - - record a meeting - -  to get started. - - - ); - - const onCloseDeletion = () => setTranscriptToDeleteId(undefined); - - const confirmDeleteTranscript = (transcriptId: string) => { - if (!api || deletionLoading) return; - setDeletionLoading(true); - api - .v1TranscriptDelete({ transcriptId }) - .then(() => { - refetch(); - setDeletionLoading(false); - onCloseDeletion(); - setDeletedItemIds((prev) => - prev ? [...prev, transcriptId] : [transcriptId], - ); - }) - .catch((err) => { - setDeletionLoading(false); - setError(err, "There was an error deleting the transcript"); - }); - }; - - const handleDeleteTranscript = (transcriptId: string) => (e: any) => { - e?.stopPropagation?.(); - setTranscriptToDeleteId(transcriptId); - }; - - const handleProcessTranscript = (transcriptId) => (e) => { - if (api) { - api - .v1TranscriptProcess({ transcriptId }) - .then((result) => { - const status = (result as any).status; - if (status === "already running") { - setError( - new Error("Processing is already running, please wait"), - "Processing is already running, please wait", - ); - } - }) - .catch((err) => { - setError(err, "There was an error processing the transcript"); - }); - } - }; - - const transcriptToDelete = response?.items?.find( - (i) => i.id === transcriptToDeleteId, - ); - const dialogTitle = transcriptToDelete?.title || "Unnamed Transcript"; - const dialogDate = transcriptToDelete?.created_at - ? formatLocalDate(transcriptToDelete.created_at) - : undefined; - const dialogSource = transcriptToDelete - ? transcriptToDelete.source_kind === "room" - ? transcriptToDelete.room_name || undefined - : transcriptToDelete.source_kind - : undefined; + } return ( {userName ? `${userName}'s Transcriptions` : "Your Transcriptions"}{" "} - {loading || (deletionLoading && )} + {(isLoading || deletionLoading) && } @@ -188,25 +380,37 @@ export default function TranscriptBrowser() { gap={4} px={{ base: 0, md: 4 }} > - - - + + {totalPages > 1 ? ( + + ) : null} + + + {!isLoading && results.length === 0 && ( + + )}
diff --git a/www/app/(app)/layout.tsx b/www/app/(app)/layout.tsx index 46691147..5760e19d 100644 --- a/www/app/(app)/layout.tsx +++ b/www/app/(app)/layout.tsx @@ -5,6 +5,7 @@ import Image from "next/image"; import About from "../(aboutAndPrivacy)/about"; import Privacy from "../(aboutAndPrivacy)/privacy"; import UserInfo from "../(auth)/userInfo"; +import { RECORD_A_MEETING_URL } from "../api/urls"; export default async function AppLayout({ children, @@ -53,7 +54,7 @@ export default async function AppLayout({ {/* Text link on the right */} Create diff --git a/www/app/(app)/rooms/page.tsx b/www/app/(app)/rooms/page.tsx index 03a4858b..305087f9 100644 --- a/www/app/(app)/rooms/page.tsx +++ b/www/app/(app)/rooms/page.tsx @@ -19,6 +19,7 @@ import useApi from "../../lib/useApi"; import useRoomList from "./useRoomList"; import { ApiError, Room } from "../../api"; import { RoomList } from "./_components/RoomList"; +import { PaginationPage } from "../browse/_components/Pagination"; interface SelectOption { label: string; @@ -75,8 +76,9 @@ export default function RoomsList() { const [isEditing, setIsEditing] = useState(false); const [editRoomId, setEditRoomId] = useState(""); const api = useApi(); + // TODO seems to be no setPage calls const [page, setPage] = useState(1); - const { loading, response, refetch } = useRoomList(page); + const { loading, response, refetch } = useRoomList(PaginationPage(page)); const [streams, setStreams] = useState([]); const [topics, setTopics] = useState([]); const [nameError, setNameError] = useState(""); diff --git a/www/app/(app)/rooms/useRoomList.tsx b/www/app/(app)/rooms/useRoomList.tsx index d0aad727..fba146fe 100644 --- a/www/app/(app)/rooms/useRoomList.tsx +++ b/www/app/(app)/rooms/useRoomList.tsx @@ -2,6 +2,7 @@ import { useEffect, useState } from "react"; import { useError } from "../../(errors)/errorContext"; import useApi from "../../lib/useApi"; import { Page_Room_ } from "../../api"; +import { PaginationPage } from "../browse/_components/Pagination"; type RoomList = { response: Page_Room_ | null; @@ -11,7 +12,7 @@ type RoomList = { }; //always protected -const useRoomList = (page: number): RoomList => { +const useRoomList = (page: PaginationPage): RoomList => { const [response, setResponse] = useState(null); const [loading, setLoading] = useState(true); const [error, setErrorState] = useState(null); diff --git a/www/app/(app)/transcripts/mockTopics.json b/www/app/(app)/transcripts/mockTopics.json deleted file mode 100644 index ebe9c1cf..00000000 --- a/www/app/(app)/transcripts/mockTopics.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "id": "27c07e49-d7a3-4b86-905c-f1a047366f91", - "title": "Issue one", - "summary": "The team discusses the first issue in the list", - "timestamp": 0.0, - "transcript": "", - "duration": 33, - "segments": [ - { - "text": "Let's start with issue one, Alice you've been working on that, can you give an update ?", - "start": 0.0, - "speaker": 0 - }, - { - "text": "Yes, I've run into an issue with the task system but Bob helped me out and I have a POC ready, should I present it now ?", - "start": 0.38, - "speaker": 1 - }, - { - "text": "Yeah, I had to modify the task system because it didn't account for incoming blobs", - "start": 4.5, - "speaker": 2 - }, - { - "text": "Cool, yeah lets see it", - "start": 5.96, - "speaker": 0 - } - ] - } -] diff --git a/www/app/(app)/transcripts/recorder.tsx b/www/app/(app)/transcripts/recorder.tsx index 84f8b9b0..f57540d4 100644 --- a/www/app/(app)/transcripts/recorder.tsx +++ b/www/app/(app)/transcripts/recorder.tsx @@ -11,6 +11,7 @@ import useWebRTC from "./useWebRTC"; import useAudioDevice from "./useAudioDevice"; import { Box, Flex, IconButton, Menu, RadioGroup } from "@chakra-ui/react"; import { LuScreenShare, LuMic, LuPlay, LuCircleStop } from "react-icons/lu"; +import { RECORD_A_MEETING_URL } from "../../api/urls"; type RecorderProps = { transcriptId: string; @@ -46,7 +47,7 @@ export default function Recorder(props: RecorderProps) { location.href = ""; break; case ",": - location.href = "/transcripts/new"; + location.href = RECORD_A_MEETING_URL; break; case "!": if (record.isRecording()) return; diff --git a/www/app/(app)/transcripts/useSearchTranscripts.ts b/www/app/(app)/transcripts/useSearchTranscripts.ts new file mode 100644 index 00000000..2e6a7311 --- /dev/null +++ b/www/app/(app)/transcripts/useSearchTranscripts.ts @@ -0,0 +1,123 @@ +// this hook is not great, we want to substitute it with a proper state management solution that is also not re-invention + +import { useEffect, useRef, useState } from "react"; +import { SearchResult, SourceKind } from "../../api"; +import useApi from "../../lib/useApi"; +import { + PaginationPage, + paginationPageTo0Based, +} from "../browse/_components/Pagination"; + +interface SearchFilters { + roomIds: readonly string[] | null; + sourceKind: SourceKind | null; +} + +const EMPTY_SEARCH_FILTERS: SearchFilters = { + roomIds: null, + sourceKind: null, +}; + +type UseSearchTranscriptsOptions = { + pageSize: number; + page: PaginationPage; +}; + +interface UseSearchTranscriptsReturn { + results: SearchResult[]; + totalCount: number; + isLoading: boolean; + error: unknown; + reload: () => void; +} + +function hashEffectFilters(filters: SearchFilters): string { + return JSON.stringify(filters); +} + +export function useSearchTranscripts( + query: string = "", + filters: SearchFilters = EMPTY_SEARCH_FILTERS, + options: UseSearchTranscriptsOptions = { + pageSize: 20, + page: PaginationPage(1), + }, +): UseSearchTranscriptsReturn { + const { pageSize, page } = options; + + const [reloadCount, setReloadCount] = useState(0); + + const api = useApi(); + const abortControllerRef = useRef(); + + const [data, setData] = useState<{ results: SearchResult[]; total: number }>({ + results: [], + total: 0, + }); + const [error, setError] = useState(); + const [isLoading, setIsLoading] = useState(false); + + const filterHash = hashEffectFilters(filters); + + useEffect(() => { + if (!api) { + setData({ results: [], total: 0 }); + setError(undefined); + setIsLoading(false); + return; + } + + if (abortControllerRef.current) { + abortControllerRef.current.abort(); + } + + const abortController = new AbortController(); + abortControllerRef.current = abortController; + + const performSearch = async () => { + setIsLoading(true); + + try { + const response = await api.v1TranscriptsSearch({ + q: query || "", + limit: pageSize, + offset: paginationPageTo0Based(page) * pageSize, + roomId: filters.roomIds?.[0], + sourceKind: filters.sourceKind || undefined, + }); + + if (abortController.signal.aborted) return; + setData(response); + setError(undefined); + } catch (err: unknown) { + if ((err as Error).name === "AbortError") { + return; + } + if (abortController.signal.aborted) { + console.error("Aborted search but error", err); + return; + } + + setError(err); + } finally { + if (!abortController.signal.aborted) { + setIsLoading(false); + } + } + }; + + performSearch().then(() => {}); + + return () => { + abortController.abort(); + }; + }, [api, query, page, filterHash, pageSize, reloadCount]); + + return { + results: data.results, + totalCount: data.total, + isLoading, + error, + reload: () => setReloadCount(reloadCount + 1), + }; +} diff --git a/www/app/(app)/transcripts/useTranscriptList.ts b/www/app/(app)/transcripts/useTranscriptList.ts deleted file mode 100644 index 3b449685..00000000 --- a/www/app/(app)/transcripts/useTranscriptList.ts +++ /dev/null @@ -1,59 +0,0 @@ -import { useEffect, useState } from "react"; -import { useError } from "../../(errors)/errorContext"; -import useApi from "../../lib/useApi"; -import { Page_GetTranscriptMinimal_, SourceKind } from "../../api"; - -type TranscriptList = { - response: Page_GetTranscriptMinimal_ | null; - loading: boolean; - error: Error | null; - refetch: () => void; -}; - -const useTranscriptList = ( - page: number, - sourceKind: SourceKind | null, - roomId: string | null, - searchTerm: string | null, -): TranscriptList => { - const [response, setResponse] = useState( - null, - ); - const [loading, setLoading] = useState(true); - const [error, setErrorState] = useState(null); - const { setError } = useError(); - const api = useApi(); - const [refetchCount, setRefetchCount] = useState(0); - - const refetch = () => { - setLoading(true); - setRefetchCount(refetchCount + 1); - }; - - useEffect(() => { - if (!api) return; - setLoading(true); - api - .v1TranscriptsList({ - page, - sourceKind, - roomId, - searchTerm, - size: 10, - }) - .then((response) => { - setResponse(response); - setLoading(false); - }) - .catch((err) => { - setResponse(null); - setLoading(false); - setError(err); - setErrorState(err); - }); - }, [api, page, refetchCount, roomId, searchTerm, sourceKind]); - - return { response, loading, error, refetch }; -}; - -export default useTranscriptList; diff --git a/www/app/api/schemas.gen.ts b/www/app/api/schemas.gen.ts index ac5010d0..7439241a 100644 --- a/www/app/api/schemas.gen.ts +++ b/www/app/api/schemas.gen.ts @@ -1002,7 +1002,7 @@ export const $SearchResponse = { }, query: { type: "string", - minLength: 1, + minLength: 0, title: "Query", description: "Search query text", }, @@ -1065,6 +1065,20 @@ export const $SearchResult = { ], title: "Room Id", }, + room_name: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Room Name", + }, + source_kind: { + $ref: "#/components/schemas/SourceKind", + }, created_at: { type: "string", title: "Created At", @@ -1101,10 +1115,18 @@ export const $SearchResult = { title: "Search Snippets", description: "Text snippets around search matches", }, + total_match_count: { + type: "integer", + minimum: 0, + title: "Total Match Count", + description: "Total number of matches found in the transcript", + default: 0, + }, }, type: "object", required: [ "id", + "source_kind", "created_at", "status", "rank", diff --git a/www/app/api/services.gen.ts b/www/app/api/services.gen.ts index 8bfbe299..31ba098c 100644 --- a/www/app/api/services.gen.ts +++ b/www/app/api/services.gen.ts @@ -286,6 +286,7 @@ export class DefaultService { * @param data.limit Results per page * @param data.offset Number of results to skip * @param data.roomId + * @param data.sourceKind * @returns SearchResponse Successful Response * @throws ApiError */ @@ -300,6 +301,7 @@ export class DefaultService { limit: data.limit, offset: data.offset, room_id: data.roomId, + source_kind: data.sourceKind, }, errors: { 422: "Validation Error", diff --git a/www/app/api/types.gen.ts b/www/app/api/types.gen.ts index 421fa414..9eae96a0 100644 --- a/www/app/api/types.gen.ts +++ b/www/app/api/types.gen.ts @@ -209,6 +209,8 @@ export type SearchResult = { title?: string | null; user_id?: string | null; room_id?: string | null; + room_name?: string | null; + source_kind: SourceKind; created_at: string; status: string; rank: number; @@ -220,6 +222,10 @@ export type SearchResult = { * Text snippets around search matches */ search_snippets: Array; + /** + * Total number of matches found in the transcript + */ + total_match_count?: number; }; export type SourceKind = "room" | "live" | "file"; @@ -407,6 +413,7 @@ export type V1TranscriptsSearchData = { */ q: string; roomId?: string | null; + sourceKind?: SourceKind | null; }; export type V1TranscriptsSearchResponse = SearchResponse; diff --git a/www/app/api/urls.ts b/www/app/api/urls.ts new file mode 100644 index 00000000..bd0a910c --- /dev/null +++ b/www/app/api/urls.ts @@ -0,0 +1,2 @@ +// TODO better connection with generated schema; it's duplication +export const RECORD_A_MEETING_URL = "/transcripts/new" as const; diff --git a/www/app/lib/textHighlight.tsx b/www/app/lib/textHighlight.tsx new file mode 100644 index 00000000..6d903062 --- /dev/null +++ b/www/app/lib/textHighlight.tsx @@ -0,0 +1,62 @@ +/** + * Text highlighting and text fragment generation utilities + * Used for search result highlighting and deep linking with Chrome Text Fragments + */ + +import React from "react"; + +export interface HighlightResult { + text: string; + matches: string[]; +} + +/** + * Escapes special regex characters in a string + */ +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +export const highlightMatches = ( + text: string, + query: string, +): { match: string; index: number }[] => { + if (!query || !text) { + return []; + } + + const queryWords = query.trim().split(/\s+/); + + const regex = new RegExp( + `(${queryWords.map((word) => escapeRegex(word)).join("|")})`, + "gi", + ); + + return Array.from(text.matchAll(regex)).map((result) => ({ + match: result[0], + index: result.index!, + })); +}; + +export function findFirstHighlight(text: string, query: string): string | null { + const matches = highlightMatches(text, query); + if (matches.length === 0) { + return null; + } + return matches[0].match; +} + +export function generateTextFragment( + text: string, + query: string, +): { + k: ":~:text"; + v: string; +} | null { + const firstMatch = findFirstHighlight(text, query); + if (!firstMatch) return null; + return { + k: ":~:text", + v: firstMatch, + }; +} diff --git a/www/app/lib/utils.ts b/www/app/lib/utils.ts index 6fd14bdb..80d0d91b 100644 --- a/www/app/lib/utils.ts +++ b/www/app/lib/utils.ts @@ -136,3 +136,10 @@ export function extractDomain(url) { return null; } } + +export function assertExists(value: T | null | undefined, err?: string): T { + if (value === null || value === undefined) { + throw new Error(`Assertion failed: ${err ?? "value is null or undefined"}`); + } + return value; +} diff --git a/www/app/page.tsx b/www/app/page.tsx index a7caf00c..225fe877 100644 --- a/www/app/page.tsx +++ b/www/app/page.tsx @@ -1,6 +1,7 @@ "use client"; import { redirect } from "next/navigation"; +import { RECORD_A_MEETING_URL } from "./api/urls"; export default function Index() { - redirect("/transcripts/new"); + redirect(RECORD_A_MEETING_URL); } diff --git a/www/app/providers.tsx b/www/app/providers.tsx index dbab9d29..f0f1ea52 100644 --- a/www/app/providers.tsx +++ b/www/app/providers.tsx @@ -5,14 +5,17 @@ import system from "./styles/theme"; import { WherebyProvider } from "@whereby.com/browser-sdk/react"; import { Toaster } from "./components/ui/toaster"; +import { NuqsAdapter } from "nuqs/adapters/next/app"; export function Providers({ children }: { children: React.ReactNode }) { return ( - - - {children} - - - + + + + {children} + + + + ); } diff --git a/www/package.json b/www/package.json index b9c0107c..482a29f6 100644 --- a/www/package.json +++ b/www/package.json @@ -31,6 +31,7 @@ "next": "^14.2.30", "next-auth": "^4.24.7", "next-themes": "^0.4.6", + "nuqs": "^2.4.3", "postcss": "8.4.31", "prop-types": "^15.8.1", "react": "^18.2.0", diff --git a/www/pnpm-lock.yaml b/www/pnpm-lock.yaml index 32f9fd95..55aef9c8 100644 --- a/www/pnpm-lock.yaml +++ b/www/pnpm-lock.yaml @@ -67,6 +67,9 @@ importers: next-themes: specifier: ^0.4.6 version: 0.4.6(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + nuqs: + specifier: ^2.4.3 + version: 2.4.3(next@14.2.31(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(sass@1.90.0))(react@18.3.1) postcss: specifier: 8.4.31 version: 8.4.31 @@ -5436,6 +5439,12 @@ packages: } engines: { node: ">= 8" } + mitt@3.0.1: + resolution: + { + integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==, + } + mkdirp@0.5.6: resolution: { @@ -5660,6 +5669,27 @@ packages: } deprecated: This package is no longer supported. + nuqs@2.4.3: + resolution: + { + integrity: sha512-BgtlYpvRwLYiJuWzxt34q2bXu/AIS66sLU1QePIMr2LWkb+XH0vKXdbLSgn9t6p7QKzwI7f38rX3Wl9llTXQ8Q==, + } + peerDependencies: + "@remix-run/react": ">=2" + next: ">=14.2.0" + react: ">=18.2.0 || ^19.0.0-0" + react-router: ^6 || ^7 + react-router-dom: ^6 || ^7 + peerDependenciesMeta: + "@remix-run/react": + optional: true + next: + optional: true + react-router: + optional: true + react-router-dom: + optional: true + nypm@0.5.4: resolution: { @@ -11553,6 +11583,8 @@ snapshots: minipass: 3.3.6 yallist: 4.0.0 + mitt@3.0.1: {} + mkdirp@0.5.6: dependencies: minimist: 1.2.8 @@ -11674,6 +11706,13 @@ snapshots: gauge: 3.0.2 set-blocking: 2.0.0 + nuqs@2.4.3(next@14.2.31(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(sass@1.90.0))(react@18.3.1): + dependencies: + mitt: 3.0.1 + react: 18.3.1 + optionalDependencies: + next: 14.2.31(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(sass@1.90.0) + nypm@0.5.4: dependencies: citty: 0.1.6