diff --git a/.github/workflows/db_migrations.yml b/.github/workflows/db_migrations.yml new file mode 100644 index 00000000..6044802a --- /dev/null +++ b/.github/workflows/db_migrations.yml @@ -0,0 +1,55 @@ +name: Test Database Migrations + +on: + push: + paths: + - "server/migrations/**" + - "server/reflector/db/**" + - "server/alembic.ini" + - ".github/workflows/db_migrations.yml" + pull_request: + paths: + - "server/migrations/**" + - "server/reflector/db/**" + - "server/alembic.ini" + - ".github/workflows/db_migrations.yml" + +jobs: + test-migrations: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install poetry + run: pipx install poetry + + - name: Set up Python 3.x + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: "poetry" + cache-dependency-path: "server/poetry.lock" + + - name: Install requirements + working-directory: ./server + run: | + poetry install --no-root + + - name: Test migrations from scratch + working-directory: ./server + run: | + echo "Testing migrations from clean database..." + poetry run alembic upgrade head + echo "✅ Fresh migration successful" + + - name: Test migration rollback and re-apply + working-directory: ./server + run: | + echo "Testing rollback to base..." + poetry run alembic downgrade base + echo "✅ Rollback successful" + + echo "Testing re-apply of all migrations..." + poetry run alembic upgrade head + echo "✅ Re-apply successful" diff --git a/server/migrations/versions/62dea3db63a5_add_room_options.py b/server/migrations/versions/62dea3db63a5_add_room_options.py index 6739100e..aa47b98d 100644 --- a/server/migrations/versions/62dea3db63a5_add_room_options.py +++ b/server/migrations/versions/62dea3db63a5_add_room_options.py @@ -1,15 +1,16 @@ -"""Add room options +"""add room options Revision ID: 62dea3db63a5 Revises: 1340c04426b8 -Create Date: 2024-09-03 16:19:26.861027 +Create Date: 2025-06-27 09:04:21.006823 """ from typing import Sequence, Union -import sqlalchemy as sa from alembic import op +import sqlalchemy as sa + # revision identifiers, used by Alembic. revision: str = "62dea3db63a5" @@ -20,67 +21,63 @@ depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column( + op.create_table( "meeting", + sa.Column("id", sa.String(), nullable=False), + sa.Column("room_name", sa.String(), nullable=True), + sa.Column("room_url", sa.String(), nullable=True), + sa.Column("host_room_url", sa.String(), nullable=True), + sa.Column("viewer_room_url", sa.String(), nullable=True), + sa.Column("start_date", sa.DateTime(), nullable=True), + sa.Column("end_date", sa.DateTime(), nullable=True), + sa.Column("user_id", sa.String(), nullable=True), + sa.Column("room_id", sa.String(), nullable=True), sa.Column( "is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False ), - ) - op.add_column( - "meeting", sa.Column("room_mode", sa.String(), server_default="normal", nullable=False), - ) - op.add_column( - "meeting", sa.Column( "recording_type", sa.String(), server_default="cloud", nullable=False ), - ) - op.add_column( - "meeting", sa.Column( "recording_trigger", sa.String(), server_default="automatic-2nd-participant", nullable=False, ), + sa.PrimaryKeyConstraint("id"), ) - op.add_column( + op.create_table( "room", + sa.Column("id", sa.String(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("user_id", sa.String(), nullable=False), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.Column( + "zulip_auto_post", sa.Boolean(), server_default=sa.text("0"), nullable=False + ), + sa.Column("zulip_stream", sa.String(), nullable=True), + sa.Column("zulip_topic", sa.String(), nullable=True), sa.Column( "is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False ), - ) - op.add_column( - "room", sa.Column("room_mode", sa.String(), server_default="normal", nullable=False), - ) - op.add_column( - "room", sa.Column( "recording_type", sa.String(), server_default="cloud", nullable=False ), - ) - op.add_column( - "room", sa.Column( "recording_trigger", sa.String(), server_default="automatic-2nd-participant", nullable=False, ), + sa.PrimaryKeyConstraint("id"), ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("room", "recording_trigger") - op.drop_column("room", "recording_type") - op.drop_column("room", "room_mode") - op.drop_column("room", "is_locked") - op.drop_column("meeting", "recording_trigger") - op.drop_column("meeting", "recording_type") - op.drop_column("meeting", "room_mode") - op.drop_column("meeting", "is_locked") + op.drop_table("room") + op.drop_table("meeting") # ### end Alembic commands ### diff --git a/server/migrations/versions/99365b0cd87b_add_title_short_and_long_summary_and_.py b/server/migrations/versions/99365b0cd87b_add_title_short_and_long_summary_and_.py index 5d7dc857..a1b67945 100644 --- a/server/migrations/versions/99365b0cd87b_add_title_short_and_long_summary_and_.py +++ b/server/migrations/versions/99365b0cd87b_add_title_short_and_long_summary_and_.py @@ -5,33 +5,39 @@ Revises: b3df9681cae9 Create Date: 2023-09-01 20:19:47.216334 """ + from typing import Sequence, Union from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. -revision: str = '99365b0cd87b' -down_revision: Union[str, None] = 'b3df9681cae9' +revision: str = "99365b0cd87b" +down_revision: Union[str, None] = "b3df9681cae9" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.execute("UPDATE transcript SET events = " - "REPLACE(events, '\"event\": \"SUMMARY\"', '\"event\": \"LONG_SUMMARY\"');") - op.alter_column('transcript', 'summary', new_column_name='long_summary') - op.add_column('transcript', sa.Column('title', sa.String(), nullable=True)) - op.add_column('transcript', sa.Column('short_summary', sa.String(), nullable=True)) + op.execute( + "UPDATE transcript SET events = " + 'REPLACE(events, \'"event": "SUMMARY"\', \'"event": "LONG_SUMMARY"\');' + ) + op.alter_column("transcript", "summary", new_column_name="long_summary") + op.add_column("transcript", sa.Column("title", sa.String(), nullable=True)) + op.add_column("transcript", sa.Column("short_summary", sa.String(), nullable=True)) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.execute("UPDATE transcript SET events = " - "REPLACE(events, '\"event\": \"LONG_SUMMARY\"', '\"event\": \"SUMMARY\"');") - op.alter_column('transcript', 'long_summary', nullable=True, new_column_name='summary') - op.drop_column('transcript', 'title') - op.drop_column('transcript', 'short_summary') + op.execute( + "UPDATE transcript SET events = " + 'REPLACE(events, \'"event": "LONG_SUMMARY"\', \'"event": "SUMMARY"\');' + ) + with op.batch_alter_table("transcript", schema=None) as batch_op: + batch_op.alter_column("long_summary", nullable=True, new_column_name="summary") + op.drop_column("transcript", "title") + op.drop_column("transcript", "short_summary") # ### end Alembic commands ### diff --git a/server/migrations/versions/b3df9681cae9_add_source_and_target_language.py b/server/migrations/versions/b3df9681cae9_add_source_and_target_language.py deleted file mode 100644 index ed8a85b2..00000000 --- a/server/migrations/versions/b3df9681cae9_add_source_and_target_language.py +++ /dev/null @@ -1,32 +0,0 @@ -"""add source and target language - -Revision ID: b3df9681cae9 -Revises: 543ed284d69a -Create Date: 2023-08-29 10:55:37.690469 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = 'b3df9681cae9' -down_revision: Union[str, None] = '543ed284d69a' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('transcript', sa.Column('source_language', sa.String(), nullable=True)) - op.add_column('transcript', sa.Column('target_language', sa.String(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('transcript', 'target_language') - op.drop_column('transcript', 'source_language') - # ### end Alembic commands ### diff --git a/server/migrations/versions/b3df9681cae9_add_transcript_table.py b/server/migrations/versions/b3df9681cae9_add_transcript_table.py new file mode 100644 index 00000000..27e3493d --- /dev/null +++ b/server/migrations/versions/b3df9681cae9_add_transcript_table.py @@ -0,0 +1,46 @@ +"""Add transcript table + +Revision ID: b3df9681cae9 +Revises: 543ed284d69a +Create Date: 2025-06-27 08:57:16.306940 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "b3df9681cae9" +down_revision: Union[str, None] = "543ed284d69a" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "transcript", + sa.Column("id", sa.String(), nullable=False), + sa.Column("name", sa.String(), nullable=True), + sa.Column("status", sa.String(), nullable=True), + sa.Column("locked", sa.Boolean(), nullable=True), + sa.Column("duration", sa.Integer(), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("summary", sa.String(), nullable=True), + sa.Column("topics", sa.JSON(), nullable=True), + sa.Column("events", sa.JSON(), nullable=True), + sa.Column("source_language", sa.String(), nullable=True), + sa.Column("target_language", sa.String(), nullable=True), + sa.Column("user_id", sa.String(), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("transcript") + # ### end Alembic commands ### diff --git a/server/migrations/versions/d3ff3a39297f_add_recordings.py b/server/migrations/versions/d3ff3a39297f_add_recordings.py index 92ceb29c..f99d9646 100644 --- a/server/migrations/versions/d3ff3a39297f_add_recordings.py +++ b/server/migrations/versions/d3ff3a39297f_add_recordings.py @@ -1,23 +1,16 @@ -"""Add recordings +"""add recordings Revision ID: d3ff3a39297f Revises: b0e5f7876032 -Create Date: 2025-03-10 14:38:53.504413 +Create Date: 2025-06-27 09:27:25.302152 """ -import uuid -from datetime import datetime from typing import Sequence, Union -import boto3 -import sqlalchemy as sa from alembic import op -from reflector.db.meetings import meetings -from reflector.db.recordings import Recording, recordings -from reflector.db.rooms import rooms -from reflector.db.transcripts import transcripts -from reflector.settings import settings +import sqlalchemy as sa + # revision identifiers, used by Alembic. revision: str = "d3ff3a39297f" @@ -26,107 +19,28 @@ branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None -def add_recordings_from_s3(): - bind = op.get_bind() - - s3 = boto3.client( - "s3", - region_name=settings.TRANSCRIPT_STORAGE_AWS_REGION, - aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID, - aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY, - ) - - bucket_name = settings.AWS_WHEREBY_S3_BUCKET - paginator = s3.get_paginator("list_objects_v2") - pages = paginator.paginate(Bucket=bucket_name) - - for page in pages: - if "Contents" not in page: - continue - - for obj in page["Contents"]: - object_key = obj["Key"] - - if not (object_key.endswith(".mp4")): - continue - - room_name = f"/{object_key[:36]}" - recorded_at = datetime.fromisoformat(object_key[37:57]) - - meeting = bind.execute( - meetings.select().where(meetings.c.room_name == room_name) - ).fetchone() - - recording = Recording( - id=str(uuid.uuid4()), - bucket_name=bucket_name, - object_key=object_key, - recorded_at=recorded_at, - meeting_id=meeting["id"], - ) - bind.execute(recordings.insert().values(recording.model_dump())) - - -def link_transcripts_to_recordings(): - bind = op.get_bind() - - room_transcripts = bind.execute( - transcripts.select() - .where(transcripts.c.meeting_id.isnot(None)) - .order_by(transcripts.c.meeting_id, transcripts.c.created_at) - ).fetchall() - - for transcript in room_transcripts: - transcript_recordings = bind.execute( - recordings.select() - .where( - recordings.c.meeting_id == transcript["meeting_id"], - ) - .order_by(recordings.c.recorded_at.desc()) - ).fetchall() - - if len(transcript_recordings) == 1: - bind.execute( - transcripts.update() - .where(transcripts.c.id == transcript["id"]) - .values(recording_id=transcript_recordings[0]["id"]) - ) - elif len(transcript_recordings) > 1: - matched_recording = next( - ( - r - for r in transcript_recordings - if r["recorded_at"] <= transcript["created_at"] - ), - None, - ) - bind.execute( - transcripts.update() - .where(transcripts.c.id == transcript["id"]) - .values(recording_id=matched_recording["id"]) - ) - - -def delete_recordings(): - bind = op.get_bind() - bind.execute(recordings.delete()) - - def upgrade() -> None: - with op.batch_alter_table("recording", schema=None) as batch_op: - batch_op.create_unique_constraint( - "uq_recording_object_key", - ["bucket_name", "object_key"], - ) - op.add_column("transcript", sa.Column("recording_id", sa.String(), nullable=True)) + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "recording", + sa.Column("id", sa.String(), nullable=False), + sa.Column("bucket_name", sa.String(), nullable=False), + sa.Column("object_key", sa.String(), nullable=False), + sa.Column("recorded_at", sa.DateTime(), nullable=False), + sa.Column("status", sa.String(), server_default="pending", nullable=False), + sa.Column("meeting_id", sa.String(), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + with op.batch_alter_table("transcript", schema=None) as batch_op: + batch_op.add_column(sa.Column("recording_id", sa.String(), nullable=True)) - add_recordings_from_s3() - link_transcripts_to_recordings() + # ### end Alembic commands ### def downgrade() -> None: - with op.batch_alter_table("recording", schema=None) as batch_op: - batch_op.drop_constraint("uq_recording_object_key", type_="unique") - op.drop_column("transcript", "recording_id") + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("transcript", schema=None) as batch_op: + batch_op.drop_column("recording_id") - delete_recordings() + op.drop_table("recording") + # ### end Alembic commands ### diff --git a/server/reflector/db/meetings.py b/server/reflector/db/meetings.py index e6830460..8117b3ed 100644 --- a/server/reflector/db/meetings.py +++ b/server/reflector/db/meetings.py @@ -46,10 +46,10 @@ meeting_consent = sa.Table( "meeting_consent", metadata, sa.Column("id", sa.String, primary_key=True), - sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id")), - sa.Column("user_id", sa.String, nullable=True), - sa.Column("consent_given", sa.Boolean), - sa.Column("consent_timestamp", sa.DateTime), + sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id"), nullable=False), + sa.Column("user_id", sa.String), + sa.Column("consent_given", sa.Boolean, nullable=False), + sa.Column("consent_timestamp", sa.DateTime, nullable=False), )