feat: search frontend (#551)

* feat: better highlight

* feat(search): add long_summary to search vector for improved search results

- Update search vector to include long_summary with weight B (between title A and webvtt C)
- Modify SearchController to fetch long_summary and prioritize its snippets
- Generate snippets from long_summary first (max 2), then from webvtt for remaining slots
- Add comprehensive tests for long_summary search functionality
- Create migration to update search_vector_en column in PostgreSQL

This improves search quality by including summarized content which often contains
key topics and themes that may not be explicitly mentioned in the transcript.

* fix: address code review feedback for search enhancements

- Fix test file inconsistencies by removing references to non-existent model fields
  - Comment out tests for unimplemented features (room_ids, status filters, date ranges)
  - Update tests to only use currently available fields (room_id singular, no room_name/processing_status)
  - Mark future functionality tests with @pytest.mark.skip

- Make snippet counts configurable
  - Add LONG_SUMMARY_MAX_SNIPPETS constant (default: 2)
  - Replace hardcoded value with configurable constant

- Improve error handling consistency in WebVTT parsing
  - Use different log levels for different error types (debug for malformed, warning for decode, error for unexpected)
  - Add catch-all exception handler for unexpected errors
  - Include stack trace for critical errors

All existing tests pass with these changes.

* fix: correct datetime test to include required duration field

* feat: better highlight

* feat: search room names

* feat: acknowledge deleted room

* feat: search filters fix and rank removal

* chore: minor refactoring

* feat: better matches frontend

* chore: self-review (vibe)

* chore: self-review WIP

* chore: self-review WIP

* chore: self-review WIP

* chore: self-review WIP

* chore: self-review WIP

* chore: self-review WIP

* chore: self-review WIP

* remove swc (vibe)

* search url query sync (vibe)

* search url query sync (vibe)

* better casts and cap while

* PR review + simplify frontend hook

* pr: remove search db timeouts

* cleanup tests

* tests cleanup

* frontend cleanup

* index declarations

* refactor frontend (self-review)

* fix search pagination

* clear "x" for search input

* pagination max pages fix

* chore: cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* lockfile

* pr review
This commit is contained in:
Igor Loskutov
2025-08-20 20:56:45 -04:00
committed by GitHub
parent fe5d344cff
commit 009590c080
32 changed files with 2311 additions and 618 deletions

View File

@@ -1,32 +0,0 @@
[
{
"id": "27c07e49-d7a3-4b86-905c-f1a047366f91",
"title": "Issue one",
"summary": "The team discusses the first issue in the list",
"timestamp": 0.0,
"transcript": "",
"duration": 33,
"segments": [
{
"text": "Let's start with issue one, Alice you've been working on that, can you give an update ?",
"start": 0.0,
"speaker": 0
},
{
"text": "Yes, I've run into an issue with the task system but Bob helped me out and I have a POC ready, should I present it now ?",
"start": 0.38,
"speaker": 1
},
{
"text": "Yeah, I had to modify the task system because it didn't account for incoming blobs",
"start": 4.5,
"speaker": 2
},
{
"text": "Cool, yeah lets see it",
"start": 5.96,
"speaker": 0
}
]
}
]

View File

@@ -11,6 +11,7 @@ import useWebRTC from "./useWebRTC";
import useAudioDevice from "./useAudioDevice";
import { Box, Flex, IconButton, Menu, RadioGroup } from "@chakra-ui/react";
import { LuScreenShare, LuMic, LuPlay, LuCircleStop } from "react-icons/lu";
import { RECORD_A_MEETING_URL } from "../../api/urls";
type RecorderProps = {
transcriptId: string;
@@ -46,7 +47,7 @@ export default function Recorder(props: RecorderProps) {
location.href = "";
break;
case ",":
location.href = "/transcripts/new";
location.href = RECORD_A_MEETING_URL;
break;
case "!":
if (record.isRecording()) return;

View File

@@ -0,0 +1,123 @@
// this hook is not great, we want to substitute it with a proper state management solution that is also not re-invention
import { useEffect, useRef, useState } from "react";
import { SearchResult, SourceKind } from "../../api";
import useApi from "../../lib/useApi";
import {
PaginationPage,
paginationPageTo0Based,
} from "../browse/_components/Pagination";
interface SearchFilters {
roomIds: readonly string[] | null;
sourceKind: SourceKind | null;
}
const EMPTY_SEARCH_FILTERS: SearchFilters = {
roomIds: null,
sourceKind: null,
};
type UseSearchTranscriptsOptions = {
pageSize: number;
page: PaginationPage;
};
interface UseSearchTranscriptsReturn {
results: SearchResult[];
totalCount: number;
isLoading: boolean;
error: unknown;
reload: () => void;
}
function hashEffectFilters(filters: SearchFilters): string {
return JSON.stringify(filters);
}
export function useSearchTranscripts(
query: string = "",
filters: SearchFilters = EMPTY_SEARCH_FILTERS,
options: UseSearchTranscriptsOptions = {
pageSize: 20,
page: PaginationPage(1),
},
): UseSearchTranscriptsReturn {
const { pageSize, page } = options;
const [reloadCount, setReloadCount] = useState(0);
const api = useApi();
const abortControllerRef = useRef<AbortController>();
const [data, setData] = useState<{ results: SearchResult[]; total: number }>({
results: [],
total: 0,
});
const [error, setError] = useState<any>();
const [isLoading, setIsLoading] = useState(false);
const filterHash = hashEffectFilters(filters);
useEffect(() => {
if (!api) {
setData({ results: [], total: 0 });
setError(undefined);
setIsLoading(false);
return;
}
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
const abortController = new AbortController();
abortControllerRef.current = abortController;
const performSearch = async () => {
setIsLoading(true);
try {
const response = await api.v1TranscriptsSearch({
q: query || "",
limit: pageSize,
offset: paginationPageTo0Based(page) * pageSize,
roomId: filters.roomIds?.[0],
sourceKind: filters.sourceKind || undefined,
});
if (abortController.signal.aborted) return;
setData(response);
setError(undefined);
} catch (err: unknown) {
if ((err as Error).name === "AbortError") {
return;
}
if (abortController.signal.aborted) {
console.error("Aborted search but error", err);
return;
}
setError(err);
} finally {
if (!abortController.signal.aborted) {
setIsLoading(false);
}
}
};
performSearch().then(() => {});
return () => {
abortController.abort();
};
}, [api, query, page, filterHash, pageSize, reloadCount]);
return {
results: data.results,
totalCount: data.total,
isLoading,
error,
reload: () => setReloadCount(reloadCount + 1),
};
}

View File

@@ -1,59 +0,0 @@
import { useEffect, useState } from "react";
import { useError } from "../../(errors)/errorContext";
import useApi from "../../lib/useApi";
import { Page_GetTranscriptMinimal_, SourceKind } from "../../api";
type TranscriptList = {
response: Page_GetTranscriptMinimal_ | null;
loading: boolean;
error: Error | null;
refetch: () => void;
};
const useTranscriptList = (
page: number,
sourceKind: SourceKind | null,
roomId: string | null,
searchTerm: string | null,
): TranscriptList => {
const [response, setResponse] = useState<Page_GetTranscriptMinimal_ | null>(
null,
);
const [loading, setLoading] = useState<boolean>(true);
const [error, setErrorState] = useState<Error | null>(null);
const { setError } = useError();
const api = useApi();
const [refetchCount, setRefetchCount] = useState(0);
const refetch = () => {
setLoading(true);
setRefetchCount(refetchCount + 1);
};
useEffect(() => {
if (!api) return;
setLoading(true);
api
.v1TranscriptsList({
page,
sourceKind,
roomId,
searchTerm,
size: 10,
})
.then((response) => {
setResponse(response);
setLoading(false);
})
.catch((err) => {
setResponse(null);
setLoading(false);
setError(err);
setErrorState(err);
});
}, [api, page, refetchCount, roomId, searchTerm, sourceKind]);
return { response, loading, error, refetch };
};
export default useTranscriptList;