Files
reflector/server/tests/test_webvtt_integration.py
Igor Loskutov 6fb5cb21c2 feat: search backend (#537)
* docs: transient docs

* chore: cleanup

* webvtt WIP

* webvtt field

* chore: webvtt tests comments

* chore: remove useless tests

* feat: search TASK.md

* feat: full text search by title/webvtt

* chore: search api task

* feat: search api

* feat: search API

* chore: rm task md

* chore: roll back unnecessary validators

* chore: pr review WIP

* chore: pr review WIP

* chore: pr review

* chore: top imports

* feat: better lint + ci

* feat: better lint + ci

* feat: better lint + ci

* feat: better lint + ci

* chore: lint

* chore: lint

* fix: db datetime definitions

* fix: flush() params

* fix: update transcript mutability expectation / test

* fix: update transcript mutability expectation / test

* chore: auto review

* chore: new controller extraction

* chore: new controller extraction

* chore: cleanup

* chore: review WIP

* chore: pr WIP

* chore: remove ci lint

* chore: openapi regeneration

* chore: openapi regeneration

* chore: postgres test doc

* fix: .dockerignore for arm binaries

* fix: .dockerignore for arm binaries

* fix: cap test loops

* fix: cap test loops

* fix: cap test loops

* fix: get_transcript_topics

* chore: remove flow.md docs and claude guidance

* chore: remove claude.md db doc

* chore: remove claude.md db doc

* chore: remove claude.md db doc

* chore: remove claude.md db doc
2025-08-13 10:03:38 -04:00

235 lines
7.4 KiB
Python

"""Integration tests for WebVTT auto-update functionality in Transcript model."""
import pytest
from reflector.db import database
from reflector.db.transcripts import (
SourceKind,
TranscriptController,
TranscriptTopic,
transcripts,
)
from reflector.processors.types import Word
@pytest.mark.asyncio
class TestWebVTTAutoUpdate:
"""Test that WebVTT field auto-updates when Transcript is created or modified."""
async def test_webvtt_not_updated_on_transcript_creation_without_topics(self):
"""WebVTT should be None when creating transcript without topics."""
controller = TranscriptController()
transcript = await controller.add(
name="Test Transcript",
source_kind=SourceKind.FILE,
)
try:
result = await database.fetch_one(
transcripts.select().where(transcripts.c.id == transcript.id)
)
assert result is not None
assert result["webvtt"] is None
finally:
await controller.remove_by_id(transcript.id)
async def test_webvtt_updated_on_upsert_topic(self):
"""WebVTT should update when upserting topics via upsert_topic method."""
controller = TranscriptController()
transcript = await controller.add(
name="Test Transcript",
source_kind=SourceKind.FILE,
)
try:
topic = TranscriptTopic(
id="topic1",
title="Test Topic",
summary="Test summary",
timestamp=0.0,
words=[
Word(text="Hello", start=0.0, end=0.5, speaker=0),
Word(text=" world", start=0.5, end=1.0, speaker=0),
],
)
await controller.upsert_topic(transcript, topic)
result = await database.fetch_one(
transcripts.select().where(transcripts.c.id == transcript.id)
)
assert result is not None
webvtt = result["webvtt"]
assert webvtt is not None
assert "WEBVTT" in webvtt
assert "Hello world" in webvtt
assert "<v Speaker0>" in webvtt
finally:
await controller.remove_by_id(transcript.id)
async def test_webvtt_updated_on_direct_topics_update(self):
"""WebVTT should update when updating topics field directly."""
controller = TranscriptController()
transcript = await controller.add(
name="Test Transcript",
source_kind=SourceKind.FILE,
)
try:
topics_data = [
{
"id": "topic1",
"title": "First Topic",
"summary": "First sentence test",
"timestamp": 0.0,
"words": [
{"text": "First", "start": 0.0, "end": 0.5, "speaker": 0},
{"text": " sentence", "start": 0.5, "end": 1.0, "speaker": 0},
],
}
]
await controller.update(transcript, {"topics": topics_data})
# Fetch from DB
result = await database.fetch_one(
transcripts.select().where(transcripts.c.id == transcript.id)
)
assert result is not None
webvtt = result["webvtt"]
assert webvtt is not None
assert "WEBVTT" in webvtt
assert "First sentence" in webvtt
finally:
await controller.remove_by_id(transcript.id)
async def test_webvtt_updated_manually_with_handle_topics_update(self):
"""Test that _handle_topics_update works when called manually."""
controller = TranscriptController()
transcript = await controller.add(
name="Test Transcript",
source_kind=SourceKind.FILE,
)
try:
topic1 = TranscriptTopic(
id="topic1",
title="Topic 1",
summary="Manual test",
timestamp=0.0,
words=[
Word(text="Manual", start=0.0, end=0.5, speaker=0),
Word(text=" test", start=0.5, end=1.0, speaker=0),
],
)
transcript.upsert_topic(topic1)
values = {"topics": transcript.topics_dump()}
await controller.update(transcript, values)
# Fetch from DB
result = await database.fetch_one(
transcripts.select().where(transcripts.c.id == transcript.id)
)
assert result is not None
webvtt = result["webvtt"]
assert webvtt is not None
assert "WEBVTT" in webvtt
assert "Manual test" in webvtt
assert "<v Speaker0>" in webvtt
finally:
await controller.remove_by_id(transcript.id)
async def test_webvtt_update_with_non_sequential_topics_fails(self):
"""Test that non-sequential topics raise assertion error."""
controller = TranscriptController()
transcript = await controller.add(
name="Test Transcript",
source_kind=SourceKind.FILE,
)
try:
topic1 = TranscriptTopic(
id="topic1",
title="Bad Topic",
summary="Bad order test",
timestamp=1.0,
words=[
Word(text="Second", start=2.0, end=2.5, speaker=0),
Word(text="First", start=1.0, end=1.5, speaker=0),
],
)
transcript.upsert_topic(topic1)
values = {"topics": transcript.topics_dump()}
with pytest.raises(AssertionError) as exc_info:
TranscriptController._handle_topics_update(values)
assert "Words are not in sequence" in str(exc_info.value)
finally:
await controller.remove_by_id(transcript.id)
async def test_multiple_speakers_in_webvtt(self):
"""Test WebVTT generation with multiple speakers."""
controller = TranscriptController()
transcript = await controller.add(
name="Test Transcript",
source_kind=SourceKind.FILE,
)
try:
topic = TranscriptTopic(
id="topic1",
title="Multi Speaker",
summary="Multi speaker test",
timestamp=0.0,
words=[
Word(text="Hello", start=0.0, end=0.5, speaker=0),
Word(text="Hi", start=1.0, end=1.5, speaker=1),
Word(text="Goodbye", start=2.0, end=2.5, speaker=0),
],
)
transcript.upsert_topic(topic)
values = {"topics": transcript.topics_dump()}
await controller.update(transcript, values)
# Fetch from DB
result = await database.fetch_one(
transcripts.select().where(transcripts.c.id == transcript.id)
)
assert result is not None
webvtt = result["webvtt"]
assert webvtt is not None
assert "<v Speaker0>" in webvtt
assert "<v Speaker1>" in webvtt
assert "Hello" in webvtt
assert "Hi" in webvtt
assert "Goodbye" in webvtt
finally:
await controller.remove_by_id(transcript.id)