mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
feat: search backend (#537)
* docs: transient docs * chore: cleanup * webvtt WIP * webvtt field * chore: webvtt tests comments * chore: remove useless tests * feat: search TASK.md * feat: full text search by title/webvtt * chore: search api task * feat: search api * feat: search API * chore: rm task md * chore: roll back unnecessary validators * chore: pr review WIP * chore: pr review WIP * chore: pr review * chore: top imports * feat: better lint + ci * feat: better lint + ci * feat: better lint + ci * feat: better lint + ci * chore: lint * chore: lint * fix: db datetime definitions * fix: flush() params * fix: update transcript mutability expectation / test * fix: update transcript mutability expectation / test * chore: auto review * chore: new controller extraction * chore: new controller extraction * chore: cleanup * chore: review WIP * chore: pr WIP * chore: remove ci lint * chore: openapi regeneration * chore: openapi regeneration * chore: postgres test doc * fix: .dockerignore for arm binaries * fix: .dockerignore for arm binaries * fix: cap test loops * fix: cap test loops * fix: cap test loops * fix: get_transcript_topics * chore: remove flow.md docs and claude guidance * chore: remove claude.md db doc * chore: remove claude.md db doc * chore: remove claude.md db doc * chore: remove claude.md db doc
This commit is contained in:
234
server/tests/test_webvtt_integration.py
Normal file
234
server/tests/test_webvtt_integration.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""Integration tests for WebVTT auto-update functionality in Transcript model."""
|
||||
|
||||
import pytest
|
||||
|
||||
from reflector.db import database
|
||||
from reflector.db.transcripts import (
|
||||
SourceKind,
|
||||
TranscriptController,
|
||||
TranscriptTopic,
|
||||
transcripts,
|
||||
)
|
||||
from reflector.processors.types import Word
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestWebVTTAutoUpdate:
|
||||
"""Test that WebVTT field auto-updates when Transcript is created or modified."""
|
||||
|
||||
async def test_webvtt_not_updated_on_transcript_creation_without_topics(self):
|
||||
"""WebVTT should be None when creating transcript without topics."""
|
||||
controller = TranscriptController()
|
||||
|
||||
transcript = await controller.add(
|
||||
name="Test Transcript",
|
||||
source_kind=SourceKind.FILE,
|
||||
)
|
||||
|
||||
try:
|
||||
result = await database.fetch_one(
|
||||
transcripts.select().where(transcripts.c.id == transcript.id)
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["webvtt"] is None
|
||||
finally:
|
||||
await controller.remove_by_id(transcript.id)
|
||||
|
||||
async def test_webvtt_updated_on_upsert_topic(self):
|
||||
"""WebVTT should update when upserting topics via upsert_topic method."""
|
||||
controller = TranscriptController()
|
||||
|
||||
transcript = await controller.add(
|
||||
name="Test Transcript",
|
||||
source_kind=SourceKind.FILE,
|
||||
)
|
||||
|
||||
try:
|
||||
topic = TranscriptTopic(
|
||||
id="topic1",
|
||||
title="Test Topic",
|
||||
summary="Test summary",
|
||||
timestamp=0.0,
|
||||
words=[
|
||||
Word(text="Hello", start=0.0, end=0.5, speaker=0),
|
||||
Word(text=" world", start=0.5, end=1.0, speaker=0),
|
||||
],
|
||||
)
|
||||
|
||||
await controller.upsert_topic(transcript, topic)
|
||||
|
||||
result = await database.fetch_one(
|
||||
transcripts.select().where(transcripts.c.id == transcript.id)
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
webvtt = result["webvtt"]
|
||||
|
||||
assert webvtt is not None
|
||||
assert "WEBVTT" in webvtt
|
||||
assert "Hello world" in webvtt
|
||||
assert "<v Speaker0>" in webvtt
|
||||
|
||||
finally:
|
||||
await controller.remove_by_id(transcript.id)
|
||||
|
||||
async def test_webvtt_updated_on_direct_topics_update(self):
|
||||
"""WebVTT should update when updating topics field directly."""
|
||||
controller = TranscriptController()
|
||||
|
||||
transcript = await controller.add(
|
||||
name="Test Transcript",
|
||||
source_kind=SourceKind.FILE,
|
||||
)
|
||||
|
||||
try:
|
||||
topics_data = [
|
||||
{
|
||||
"id": "topic1",
|
||||
"title": "First Topic",
|
||||
"summary": "First sentence test",
|
||||
"timestamp": 0.0,
|
||||
"words": [
|
||||
{"text": "First", "start": 0.0, "end": 0.5, "speaker": 0},
|
||||
{"text": " sentence", "start": 0.5, "end": 1.0, "speaker": 0},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
await controller.update(transcript, {"topics": topics_data})
|
||||
|
||||
# Fetch from DB
|
||||
result = await database.fetch_one(
|
||||
transcripts.select().where(transcripts.c.id == transcript.id)
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
webvtt = result["webvtt"]
|
||||
|
||||
assert webvtt is not None
|
||||
assert "WEBVTT" in webvtt
|
||||
assert "First sentence" in webvtt
|
||||
|
||||
finally:
|
||||
await controller.remove_by_id(transcript.id)
|
||||
|
||||
async def test_webvtt_updated_manually_with_handle_topics_update(self):
|
||||
"""Test that _handle_topics_update works when called manually."""
|
||||
controller = TranscriptController()
|
||||
|
||||
transcript = await controller.add(
|
||||
name="Test Transcript",
|
||||
source_kind=SourceKind.FILE,
|
||||
)
|
||||
|
||||
try:
|
||||
topic1 = TranscriptTopic(
|
||||
id="topic1",
|
||||
title="Topic 1",
|
||||
summary="Manual test",
|
||||
timestamp=0.0,
|
||||
words=[
|
||||
Word(text="Manual", start=0.0, end=0.5, speaker=0),
|
||||
Word(text=" test", start=0.5, end=1.0, speaker=0),
|
||||
],
|
||||
)
|
||||
|
||||
transcript.upsert_topic(topic1)
|
||||
|
||||
values = {"topics": transcript.topics_dump()}
|
||||
|
||||
await controller.update(transcript, values)
|
||||
|
||||
# Fetch from DB
|
||||
result = await database.fetch_one(
|
||||
transcripts.select().where(transcripts.c.id == transcript.id)
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
webvtt = result["webvtt"]
|
||||
|
||||
assert webvtt is not None
|
||||
assert "WEBVTT" in webvtt
|
||||
assert "Manual test" in webvtt
|
||||
assert "<v Speaker0>" in webvtt
|
||||
|
||||
finally:
|
||||
await controller.remove_by_id(transcript.id)
|
||||
|
||||
async def test_webvtt_update_with_non_sequential_topics_fails(self):
|
||||
"""Test that non-sequential topics raise assertion error."""
|
||||
controller = TranscriptController()
|
||||
|
||||
transcript = await controller.add(
|
||||
name="Test Transcript",
|
||||
source_kind=SourceKind.FILE,
|
||||
)
|
||||
|
||||
try:
|
||||
topic1 = TranscriptTopic(
|
||||
id="topic1",
|
||||
title="Bad Topic",
|
||||
summary="Bad order test",
|
||||
timestamp=1.0,
|
||||
words=[
|
||||
Word(text="Second", start=2.0, end=2.5, speaker=0),
|
||||
Word(text="First", start=1.0, end=1.5, speaker=0),
|
||||
],
|
||||
)
|
||||
|
||||
transcript.upsert_topic(topic1)
|
||||
values = {"topics": transcript.topics_dump()}
|
||||
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
TranscriptController._handle_topics_update(values)
|
||||
|
||||
assert "Words are not in sequence" in str(exc_info.value)
|
||||
|
||||
finally:
|
||||
await controller.remove_by_id(transcript.id)
|
||||
|
||||
async def test_multiple_speakers_in_webvtt(self):
|
||||
"""Test WebVTT generation with multiple speakers."""
|
||||
controller = TranscriptController()
|
||||
|
||||
transcript = await controller.add(
|
||||
name="Test Transcript",
|
||||
source_kind=SourceKind.FILE,
|
||||
)
|
||||
|
||||
try:
|
||||
topic = TranscriptTopic(
|
||||
id="topic1",
|
||||
title="Multi Speaker",
|
||||
summary="Multi speaker test",
|
||||
timestamp=0.0,
|
||||
words=[
|
||||
Word(text="Hello", start=0.0, end=0.5, speaker=0),
|
||||
Word(text="Hi", start=1.0, end=1.5, speaker=1),
|
||||
Word(text="Goodbye", start=2.0, end=2.5, speaker=0),
|
||||
],
|
||||
)
|
||||
|
||||
transcript.upsert_topic(topic)
|
||||
values = {"topics": transcript.topics_dump()}
|
||||
|
||||
await controller.update(transcript, values)
|
||||
|
||||
# Fetch from DB
|
||||
result = await database.fetch_one(
|
||||
transcripts.select().where(transcripts.c.id == transcript.id)
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
webvtt = result["webvtt"]
|
||||
|
||||
assert webvtt is not None
|
||||
assert "<v Speaker0>" in webvtt
|
||||
assert "<v Speaker1>" in webvtt
|
||||
assert "Hello" in webvtt
|
||||
assert "Hi" in webvtt
|
||||
assert "Goodbye" in webvtt
|
||||
|
||||
finally:
|
||||
await controller.remove_by_id(transcript.id)
|
||||
Reference in New Issue
Block a user