Files
reflector/server/tests/test_cleanup.py
Igor Monadical 79f161436e chore: meeting user id removal and room id requirement (#635)
* chore: remove meeting user id and make meeting room id required

* meeting room_id optional

* orphaned meeting room ids DATA migration

* ci fix

* fix meeting_room_id_fkey downgrade

* fix migration rollback

* fix: put index back (meeting room id)

* fix: put index back (meeting room id)

* fix: put index back (meeting room id)

* remove noop migrations

---------

Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
2025-09-12 13:07:58 -04:00

286 lines
9.6 KiB
Python

from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, patch
import pytest
from reflector.db.recordings import Recording, recordings_controller
from reflector.db.transcripts import SourceKind, transcripts_controller
from reflector.worker.cleanup import cleanup_old_public_data
@pytest.mark.asyncio
async def test_cleanup_old_public_data_skips_when_not_public():
"""Test that cleanup is skipped when PUBLIC_MODE is False."""
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = False
result = await cleanup_old_public_data()
# Should return early without doing anything
assert result is None
@pytest.mark.asyncio
async def test_cleanup_old_public_data_deletes_old_anonymous_transcripts():
"""Test that old anonymous transcripts are deleted."""
# Create old and new anonymous transcripts
old_date = datetime.now(timezone.utc) - timedelta(days=8)
new_date = datetime.now(timezone.utc) - timedelta(days=2)
# Create old anonymous transcript (should be deleted)
old_transcript = await transcripts_controller.add(
name="Old Anonymous Transcript",
source_kind=SourceKind.FILE,
user_id=None, # Anonymous
)
# Manually update created_at to be old
from reflector.db import get_database
from reflector.db.transcripts import transcripts
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == old_transcript.id)
.values(created_at=old_date)
)
# Create new anonymous transcript (should NOT be deleted)
new_transcript = await transcripts_controller.add(
name="New Anonymous Transcript",
source_kind=SourceKind.FILE,
user_id=None, # Anonymous
)
# Create old transcript with user (should NOT be deleted)
old_user_transcript = await transcripts_controller.add(
name="Old User Transcript",
source_kind=SourceKind.FILE,
user_id="user123",
)
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == old_user_transcript.id)
.values(created_at=old_date)
)
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock the storage deletion
with patch("reflector.db.transcripts.get_transcripts_storage") as mock_storage:
mock_storage.return_value.delete_file = AsyncMock()
result = await cleanup_old_public_data()
# Check results
assert result["transcripts_deleted"] == 1
assert result["errors"] == []
# Verify old anonymous transcript was deleted
assert await transcripts_controller.get_by_id(old_transcript.id) is None
# Verify new anonymous transcript still exists
assert await transcripts_controller.get_by_id(new_transcript.id) is not None
# Verify user transcript still exists
assert await transcripts_controller.get_by_id(old_user_transcript.id) is not None
@pytest.mark.asyncio
async def test_cleanup_deletes_associated_meeting_and_recording():
"""Test that meetings and recordings associated with old transcripts are deleted."""
from reflector.db import get_database
from reflector.db.meetings import meetings
from reflector.db.transcripts import transcripts
old_date = datetime.now(timezone.utc) - timedelta(days=8)
# Create a meeting
meeting_id = "test-meeting-for-transcript"
await get_database().execute(
meetings.insert().values(
id=meeting_id,
room_name="Meeting with Transcript",
room_url="https://example.com/meeting",
host_room_url="https://example.com/meeting-host",
start_date=old_date,
end_date=old_date + timedelta(hours=1),
room_id=None,
)
)
# Create a recording
recording = await recordings_controller.create(
Recording(
bucket_name="test-bucket",
object_key="test-recording.mp4",
recorded_at=old_date,
)
)
# Create an old transcript with both meeting and recording
old_transcript = await transcripts_controller.add(
name="Old Transcript with Meeting and Recording",
source_kind=SourceKind.ROOM,
user_id=None,
meeting_id=meeting_id,
recording_id=recording.id,
)
# Update created_at to be old
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == old_transcript.id)
.values(created_at=old_date)
)
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock storage deletion
with patch("reflector.db.transcripts.get_transcripts_storage") as mock_storage:
mock_storage.return_value.delete_file = AsyncMock()
with patch(
"reflector.worker.cleanup.get_recordings_storage"
) as mock_rec_storage:
mock_rec_storage.return_value.delete_file = AsyncMock()
result = await cleanup_old_public_data()
# Check results
assert result["transcripts_deleted"] == 1
assert result["meetings_deleted"] == 1
assert result["recordings_deleted"] == 1
assert result["errors"] == []
# Verify transcript was deleted
assert await transcripts_controller.get_by_id(old_transcript.id) is None
# Verify meeting was deleted
query = meetings.select().where(meetings.c.id == meeting_id)
meeting_result = await get_database().fetch_one(query)
assert meeting_result is None
# Verify recording was deleted
assert await recordings_controller.get_by_id(recording.id) is None
@pytest.mark.asyncio
async def test_cleanup_handles_errors_gracefully():
"""Test that cleanup continues even when individual deletions fail."""
old_date = datetime.now(timezone.utc) - timedelta(days=8)
# Create multiple old transcripts
transcript1 = await transcripts_controller.add(
name="Transcript 1",
source_kind=SourceKind.FILE,
user_id=None,
)
transcript2 = await transcripts_controller.add(
name="Transcript 2",
source_kind=SourceKind.FILE,
user_id=None,
)
# Update created_at to be old
from reflector.db import get_database
from reflector.db.transcripts import transcripts
for t_id in [transcript1.id, transcript2.id]:
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == t_id)
.values(created_at=old_date)
)
with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock remove_by_id to fail for the first transcript
original_remove = transcripts_controller.remove_by_id
call_count = 0
async def mock_remove_by_id(transcript_id, user_id=None):
nonlocal call_count
call_count += 1
if call_count == 1:
raise Exception("Simulated deletion error")
return await original_remove(transcript_id, user_id)
with patch.object(
transcripts_controller, "remove_by_id", side_effect=mock_remove_by_id
):
result = await cleanup_old_public_data()
# Should have one successful deletion and one error
assert result["transcripts_deleted"] == 1
assert len(result["errors"]) == 1
assert "Failed to delete transcript" in result["errors"][0]
@pytest.mark.asyncio
async def test_meeting_consent_cascade_delete():
"""Test that meeting_consent records are automatically deleted when meeting is deleted."""
from reflector.db import get_database
from reflector.db.meetings import (
meeting_consent,
meeting_consent_controller,
meetings,
)
# Create a meeting
meeting_id = "test-cascade-meeting"
await get_database().execute(
meetings.insert().values(
id=meeting_id,
room_name="Test Meeting for CASCADE",
room_url="https://example.com/cascade-test",
host_room_url="https://example.com/cascade-test-host",
start_date=datetime.now(timezone.utc),
end_date=datetime.now(timezone.utc) + timedelta(hours=1),
room_id=None,
)
)
# Create consent records for this meeting
consent1_id = "consent-1"
consent2_id = "consent-2"
await get_database().execute(
meeting_consent.insert().values(
id=consent1_id,
meeting_id=meeting_id,
user_id="user1",
consent_given=True,
consent_timestamp=datetime.now(timezone.utc),
)
)
await get_database().execute(
meeting_consent.insert().values(
id=consent2_id,
meeting_id=meeting_id,
user_id="user2",
consent_given=False,
consent_timestamp=datetime.now(timezone.utc),
)
)
# Verify consent records exist
consents = await meeting_consent_controller.get_by_meeting_id(meeting_id)
assert len(consents) == 2
# Delete the meeting
await get_database().execute(meetings.delete().where(meetings.c.id == meeting_id))
# Verify meeting is deleted
query = meetings.select().where(meetings.c.id == meeting_id)
result = await get_database().fetch_one(query)
assert result is None
# Verify consent records are automatically deleted (CASCADE DELETE)
consents_after = await meeting_consent_controller.get_by_meeting_id(meeting_id)
assert len(consents_after) == 0