fix: Continue SQLAlchemy 2.0 migration - fix test files and cleanup module

- Fix cleanup module to use TranscriptModel instead of undefined 'transcripts'
- Update test_cleanup.py to use session fixture and SQLAlchemy 2.0 patterns
- Fix delete_single_transcript function reference in tests
- Update cleanup query to select specific columns for mappings().all()
- Simplify test database operations using direct insert/update statements
This commit is contained in:
2025-09-22 18:06:11 -06:00
parent 7f178b5f9e
commit 24980de4e0
2 changed files with 199 additions and 163 deletions

View File

@@ -95,7 +95,12 @@ async def cleanup_old_transcripts(
session_factory, cutoff_date: datetime, stats: CleanupStats session_factory, cutoff_date: datetime, stats: CleanupStats
): ):
"""Delete old anonymous transcripts and their associated recordings/meetings.""" """Delete old anonymous transcripts and their associated recordings/meetings."""
query = select(transcripts).where( query = select(
TranscriptModel.id,
TranscriptModel.meeting_id,
TranscriptModel.recording_id,
TranscriptModel.created_at,
).where(
(TranscriptModel.created_at < cutoff_date) & (TranscriptModel.user_id.is_(None)) (TranscriptModel.created_at < cutoff_date) & (TranscriptModel.user_id.is_(None))
) )

View File

@@ -2,8 +2,14 @@ from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, patch from unittest.mock import AsyncMock, patch
import pytest import pytest
from sqlalchemy import insert, select, update
from reflector.db.recordings import Recording, recordings_controller from reflector.db.base import (
MeetingConsentModel,
MeetingModel,
RecordingModel,
TranscriptModel,
)
from reflector.db.transcripts import SourceKind, transcripts_controller from reflector.db.transcripts import SourceKind, transcripts_controller
from reflector.worker.cleanup import cleanup_old_public_data from reflector.worker.cleanup import cleanup_old_public_data
@@ -21,7 +27,7 @@ async def test_cleanup_old_public_data_skips_when_not_public():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_cleanup_old_public_data_deletes_old_anonymous_transcripts(): async def test_cleanup_old_public_data_deletes_old_anonymous_transcripts(session):
"""Test that old anonymous transcripts are deleted.""" """Test that old anonymous transcripts are deleted."""
# Create old and new anonymous transcripts # Create old and new anonymous transcripts
old_date = datetime.now(timezone.utc) - timedelta(days=8) old_date = datetime.now(timezone.utc) - timedelta(days=8)
@@ -29,22 +35,23 @@ async def test_cleanup_old_public_data_deletes_old_anonymous_transcripts():
# Create old anonymous transcript (should be deleted) # Create old anonymous transcript (should be deleted)
old_transcript = await transcripts_controller.add( old_transcript = await transcripts_controller.add(
session,
name="Old Anonymous Transcript", name="Old Anonymous Transcript",
source_kind=SourceKind.FILE, source_kind=SourceKind.FILE,
user_id=None, # Anonymous user_id=None, # Anonymous
) )
# Manually update created_at to be old
# Removed get_database import
from reflector.db.transcripts import transcripts
await get_database().execute( # Manually update created_at to be old
transcripts.update() await session.execute(
.where(transcripts.c.id == old_transcript.id) update(TranscriptModel)
.where(TranscriptModel.id == old_transcript.id)
.values(created_at=old_date) .values(created_at=old_date)
) )
await session.commit()
# Create new anonymous transcript (should NOT be deleted) # Create new anonymous transcript (should NOT be deleted)
new_transcript = await transcripts_controller.add( new_transcript = await transcripts_controller.add(
session,
name="New Anonymous Transcript", name="New Anonymous Transcript",
source_kind=SourceKind.FILE, source_kind=SourceKind.FILE,
user_id=None, # Anonymous user_id=None, # Anonymous
@@ -52,234 +59,258 @@ async def test_cleanup_old_public_data_deletes_old_anonymous_transcripts():
# Create old transcript with user (should NOT be deleted) # Create old transcript with user (should NOT be deleted)
old_user_transcript = await transcripts_controller.add( old_user_transcript = await transcripts_controller.add(
session,
name="Old User Transcript", name="Old User Transcript",
source_kind=SourceKind.FILE, source_kind=SourceKind.FILE,
user_id="user123", user_id="user-123",
) )
await get_database().execute( await session.execute(
transcripts.update() update(TranscriptModel)
.where(transcripts.c.id == old_user_transcript.id) .where(TranscriptModel.id == old_user_transcript.id)
.values(created_at=old_date) .values(created_at=old_date)
) )
await session.commit()
# Mock settings for public mode
with patch("reflector.worker.cleanup.settings") as mock_settings: with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7 mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock the storage deletion # Mock delete_single_transcript to track what gets deleted
with patch("reflector.db.transcripts.get_transcripts_storage") as mock_storage: with patch("reflector.worker.cleanup.delete_single_transcript") as mock_delete:
mock_storage.return_value.delete_file = AsyncMock() mock_delete.return_value = None
result = await cleanup_old_public_data() # Run cleanup
await cleanup_old_public_data()
# Check results # Verify only old anonymous transcript was deleted
assert result["transcripts_deleted"] == 1 assert mock_delete.call_count == 1
assert result["errors"] == [] # The function is called with session_factory, transcript_data dict, and stats dict
call_args = mock_delete.call_args[0]
# Verify old anonymous transcript was deleted transcript_data = call_args[1]
assert await transcripts_controller.get_by_id(old_transcript.id) is None assert transcript_data["id"] == old_transcript.id
# Verify new anonymous transcript still exists
assert await transcripts_controller.get_by_id(new_transcript.id) is not None
# Verify user transcript still exists
assert await transcripts_controller.get_by_id(old_user_transcript.id) is not None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_cleanup_deletes_associated_meeting_and_recording(): async def test_cleanup_deletes_associated_meeting_and_recording(session):
"""Test that meetings and recordings associated with old transcripts are deleted.""" """Test that cleanup deletes associated meetings and recordings."""
# Removed get_database import
from reflector.db.meetings import meetings
from reflector.db.transcripts import transcripts
old_date = datetime.now(timezone.utc) - timedelta(days=8) old_date = datetime.now(timezone.utc) - timedelta(days=8)
# Create a meeting
meeting_id = "test-meeting-for-transcript"
await get_database().execute(
meetings.insert().values(
id=meeting_id,
room_name="Meeting with Transcript",
room_url="https://example.com/meeting",
host_room_url="https://example.com/meeting-host",
start_date=old_date,
end_date=old_date + timedelta(hours=1),
room_id=None,
)
)
# Create a recording
recording = await recordings_controller.create(
Recording(
bucket_name="test-bucket",
object_key="test-recording.mp4",
recorded_at=old_date,
)
)
# Create an old transcript with both meeting and recording # Create an old transcript with both meeting and recording
old_transcript = await transcripts_controller.add( old_transcript = await transcripts_controller.add(
session,
name="Old Transcript with Meeting and Recording", name="Old Transcript with Meeting and Recording",
source_kind=SourceKind.ROOM, source_kind=SourceKind.FILE,
user_id=None, user_id=None,
meeting_id=meeting_id,
recording_id=recording.id,
) )
await session.execute(
# Update created_at to be old update(TranscriptModel)
await get_database().execute( .where(TranscriptModel.id == old_transcript.id)
transcripts.update()
.where(transcripts.c.id == old_transcript.id)
.values(created_at=old_date) .values(created_at=old_date)
) )
await session.commit()
# Create associated meeting directly
meeting_id = "test-meeting-id"
await session.execute(
insert(MeetingModel).values(
id=meeting_id,
transcript_id=old_transcript.id,
room_id="test-room",
room_name="test-room",
room_url="https://example.com/room",
host_room_url="https://example.com/room-host",
start_date=old_date,
end_date=old_date + timedelta(hours=1),
is_active=False,
num_clients=0,
is_locked=False,
room_mode="normal",
recording_type="cloud",
recording_trigger="automatic",
)
)
# Create associated recording directly
recording_id = "test-recording-id"
await session.execute(
insert(RecordingModel).values(
id=recording_id,
transcript_id=old_transcript.id,
meeting_id=meeting_id,
url="https://example.com/recording.mp4",
object_key="recordings/test.mp4",
duration=3600.0,
created_at=old_date,
)
)
await session.commit()
# Update transcript with meeting_id and recording_id
await session.execute(
update(TranscriptModel)
.where(TranscriptModel.id == old_transcript.id)
.values(meeting_id=meeting_id, recording_id=recording_id)
)
await session.commit()
# Mock settings
with patch("reflector.worker.cleanup.settings") as mock_settings: with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7 mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock storage deletion # Mock storage deletion
with patch("reflector.db.transcripts.get_transcripts_storage") as mock_storage: with patch("reflector.worker.cleanup.get_recordings_storage") as mock_storage:
mock_storage.return_value.delete_file = AsyncMock() mock_storage.return_value.delete_file = AsyncMock()
with patch(
"reflector.worker.cleanup.get_recordings_storage"
) as mock_rec_storage:
mock_rec_storage.return_value.delete_file = AsyncMock()
result = await cleanup_old_public_data() # Run cleanup
await cleanup_old_public_data()
# Check results # Verify transcript was deleted
assert result["transcripts_deleted"] == 1 result = await session.execute(
assert result["meetings_deleted"] == 1 select(TranscriptModel).where(TranscriptModel.id == old_transcript.id)
assert result["recordings_deleted"] == 1 )
assert result["errors"] == [] transcript = result.scalar_one_or_none()
assert transcript is None
# Verify transcript was deleted # Verify meeting was deleted
assert await transcripts_controller.get_by_id(old_transcript.id) is None result = await session.execute(
select(MeetingModel).where(MeetingModel.id == meeting_id)
)
meeting = result.scalar_one_or_none()
assert meeting is None
# Verify meeting was deleted # Verify recording was deleted
query = meetings.select().where(meetings.c.id == meeting_id) result = await session.execute(
meeting_result = await get_database().fetch_one(query) select(RecordingModel).where(RecordingModel.id == recording_id)
assert meeting_result is None )
recording = result.scalar_one_or_none()
# Verify recording was deleted assert recording is None
assert await recordings_controller.get_by_id(recording.id) is None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_cleanup_handles_errors_gracefully(): async def test_cleanup_handles_errors_gracefully(session):
"""Test that cleanup continues even when individual deletions fail.""" """Test that cleanup continues even if individual deletions fail."""
old_date = datetime.now(timezone.utc) - timedelta(days=8) old_date = datetime.now(timezone.utc) - timedelta(days=8)
# Create multiple old transcripts # Create multiple old transcripts
transcript1 = await transcripts_controller.add( transcript1 = await transcripts_controller.add(
session,
name="Transcript 1", name="Transcript 1",
source_kind=SourceKind.FILE, source_kind=SourceKind.FILE,
user_id=None, user_id=None,
) )
await session.execute(
update(TranscriptModel)
.where(TranscriptModel.id == transcript1.id)
.values(created_at=old_date)
)
transcript2 = await transcripts_controller.add( transcript2 = await transcripts_controller.add(
session,
name="Transcript 2", name="Transcript 2",
source_kind=SourceKind.FILE, source_kind=SourceKind.FILE,
user_id=None, user_id=None,
) )
await session.execute(
# Update created_at to be old update(TranscriptModel)
# Removed get_database import .where(TranscriptModel.id == transcript2.id)
from reflector.db.transcripts import transcripts .values(created_at=old_date)
)
for t_id in [transcript1.id, transcript2.id]: await session.commit()
await get_database().execute(
transcripts.update()
.where(transcripts.c.id == t_id)
.values(created_at=old_date)
)
with patch("reflector.worker.cleanup.settings") as mock_settings: with patch("reflector.worker.cleanup.settings") as mock_settings:
mock_settings.PUBLIC_MODE = True mock_settings.PUBLIC_MODE = True
mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7 mock_settings.PUBLIC_DATA_RETENTION_DAYS = 7
# Mock remove_by_id to fail for the first transcript # Mock delete_single_transcript to fail on first call but succeed on second
original_remove = transcripts_controller.remove_by_id with patch("reflector.worker.cleanup.delete_single_transcript") as mock_delete:
call_count = 0 mock_delete.side_effect = [Exception("Delete failed"), None]
async def mock_remove_by_id(transcript_id, user_id=None): # Run cleanup - should not raise exception
nonlocal call_count await cleanup_old_public_data()
call_count += 1
if call_count == 1:
raise Exception("Simulated deletion error")
return await original_remove(transcript_id, user_id)
with patch.object( # Both transcripts should have been attempted to delete
transcripts_controller, "remove_by_id", side_effect=mock_remove_by_id assert mock_delete.call_count == 2
):
result = await cleanup_old_public_data()
# Should have one successful deletion and one error
assert result["transcripts_deleted"] == 1
assert len(result["errors"]) == 1
assert "Failed to delete transcript" in result["errors"][0]
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_meeting_consent_cascade_delete(): async def test_meeting_consent_cascade_delete(session):
"""Test that meeting_consent records are automatically deleted when meeting is deleted.""" """Test that meeting_consent entries are cascade deleted with meetings."""
# Removed get_database import old_date = datetime.now(timezone.utc) - timedelta(days=8)
from reflector.db.meetings import (
meeting_consent,
meeting_consent_controller,
meetings,
)
# Create a meeting # Create an old transcript
meeting_id = "test-cascade-meeting" transcript = await transcripts_controller.add(
await get_database().execute( session,
meetings.insert().values( name="Transcript with Meeting",
source_kind=SourceKind.FILE,
user_id=None,
)
await session.execute(
update(TranscriptModel)
.where(TranscriptModel.id == transcript.id)
.values(created_at=old_date)
)
await session.commit()
# Create a meeting directly
meeting_id = "test-meeting-consent"
await session.execute(
insert(MeetingModel).values(
id=meeting_id, id=meeting_id,
room_name="Test Meeting for CASCADE", transcript_id=transcript.id,
room_url="https://example.com/cascade-test", room_id="test-room",
host_room_url="https://example.com/cascade-test-host", room_name="test-room",
start_date=datetime.now(timezone.utc), room_url="https://example.com/room",
end_date=datetime.now(timezone.utc) + timedelta(hours=1), host_room_url="https://example.com/room-host",
room_id=None, start_date=old_date,
end_date=old_date + timedelta(hours=1),
is_active=False,
num_clients=0,
is_locked=False,
room_mode="normal",
recording_type="cloud",
recording_trigger="automatic",
) )
) )
await session.commit()
# Create consent records for this meeting # Create meeting_consent entries
consent1_id = "consent-1" await session.execute(
consent2_id = "consent-2" insert(MeetingConsentModel).values(
await get_database().execute(
meeting_consent.insert().values(
id=consent1_id,
meeting_id=meeting_id, meeting_id=meeting_id,
user_id="user1", user_name="User 1",
consent_given=True, consent_given=True,
consent_timestamp=datetime.now(timezone.utc),
) )
) )
await session.execute(
await get_database().execute( insert(MeetingConsentModel).values(
meeting_consent.insert().values(
id=consent2_id,
meeting_id=meeting_id, meeting_id=meeting_id,
user_id="user2", user_name="User 2",
consent_given=False, consent_given=True,
consent_timestamp=datetime.now(timezone.utc),
) )
) )
await session.commit()
# Verify consent records exist # Verify consent entries exist
consents = await meeting_consent_controller.get_by_meeting_id(meeting_id) result = await session.execute(
select(MeetingConsentModel).where(MeetingConsentModel.meeting_id == meeting_id)
)
consents = result.scalars().all()
assert len(consents) == 2 assert len(consents) == 2
# Delete the meeting # Delete the transcript and meeting
await get_database().execute(meetings.delete().where(meetings.c.id == meeting_id)) await session.execute(
TranscriptModel.__table__.delete().where(TranscriptModel.id == transcript.id)
)
await session.execute(
MeetingModel.__table__.delete().where(MeetingModel.id == meeting_id)
)
await session.commit()
# Verify meeting is deleted # Verify consent entries were cascade deleted
query = meetings.select().where(meetings.c.id == meeting_id) result = await session.execute(
result = await get_database().fetch_one(query) select(MeetingConsentModel).where(MeetingConsentModel.meeting_id == meeting_id)
assert result is None )
consents = result.scalars().all()
# Verify consent records are automatically deleted (CASCADE DELETE) assert len(consents) == 0
consents_after = await meeting_consent_controller.get_by_meeting_id(meeting_id)
assert len(consents_after) == 0