fix: alembic migrations (#470)

* fix: alembic migrations

This commit fixes all the migrations that was half-backed, due to auto
creation in the db init before. The process was to checkout at the
commit where the migration was created, and use --autogenerate to
regenerate at the state of the migration. 4 migrations was fixed.

It also includes a workflow to ensure migration can applies correctly.

* fix: db migration check

* fix: nullable on meeting_consent

* fix: try fixing tests
This commit is contained in:
2025-06-27 12:03:10 -06:00
committed by GitHub
parent 9f70f76557
commit 3d370336cc
7 changed files with 174 additions and 188 deletions

55
.github/workflows/db_migrations.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
name: Test Database Migrations
on:
push:
paths:
- "server/migrations/**"
- "server/reflector/db/**"
- "server/alembic.ini"
- ".github/workflows/db_migrations.yml"
pull_request:
paths:
- "server/migrations/**"
- "server/reflector/db/**"
- "server/alembic.ini"
- ".github/workflows/db_migrations.yml"
jobs:
test-migrations:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install poetry
run: pipx install poetry
- name: Set up Python 3.x
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "poetry"
cache-dependency-path: "server/poetry.lock"
- name: Install requirements
working-directory: ./server
run: |
poetry install --no-root
- name: Test migrations from scratch
working-directory: ./server
run: |
echo "Testing migrations from clean database..."
poetry run alembic upgrade head
echo "✅ Fresh migration successful"
- name: Test migration rollback and re-apply
working-directory: ./server
run: |
echo "Testing rollback to base..."
poetry run alembic downgrade base
echo "✅ Rollback successful"
echo "Testing re-apply of all migrations..."
poetry run alembic upgrade head
echo "✅ Re-apply successful"

View File

@@ -1,15 +1,16 @@
"""Add room options
"""add room options
Revision ID: 62dea3db63a5
Revises: 1340c04426b8
Create Date: 2024-09-03 16:19:26.861027
Create Date: 2025-06-27 09:04:21.006823
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "62dea3db63a5"
@@ -20,67 +21,63 @@ depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
op.create_table(
"meeting",
sa.Column("id", sa.String(), nullable=False),
sa.Column("room_name", sa.String(), nullable=True),
sa.Column("room_url", sa.String(), nullable=True),
sa.Column("host_room_url", sa.String(), nullable=True),
sa.Column("viewer_room_url", sa.String(), nullable=True),
sa.Column("start_date", sa.DateTime(), nullable=True),
sa.Column("end_date", sa.DateTime(), nullable=True),
sa.Column("user_id", sa.String(), nullable=True),
sa.Column("room_id", sa.String(), nullable=True),
sa.Column(
"is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False
),
)
op.add_column(
"meeting",
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
)
op.add_column(
"meeting",
sa.Column(
"recording_type", sa.String(), server_default="cloud", nullable=False
),
)
op.add_column(
"meeting",
sa.Column(
"recording_trigger",
sa.String(),
server_default="automatic-2nd-participant",
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.add_column(
op.create_table(
"room",
sa.Column("id", sa.String(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column("user_id", sa.String(), nullable=False),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column(
"zulip_auto_post", sa.Boolean(), server_default=sa.text("0"), nullable=False
),
sa.Column("zulip_stream", sa.String(), nullable=True),
sa.Column("zulip_topic", sa.String(), nullable=True),
sa.Column(
"is_locked", sa.Boolean(), server_default=sa.text("0"), nullable=False
),
)
op.add_column(
"room",
sa.Column("room_mode", sa.String(), server_default="normal", nullable=False),
)
op.add_column(
"room",
sa.Column(
"recording_type", sa.String(), server_default="cloud", nullable=False
),
)
op.add_column(
"room",
sa.Column(
"recording_trigger",
sa.String(),
server_default="automatic-2nd-participant",
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("room", "recording_trigger")
op.drop_column("room", "recording_type")
op.drop_column("room", "room_mode")
op.drop_column("room", "is_locked")
op.drop_column("meeting", "recording_trigger")
op.drop_column("meeting", "recording_type")
op.drop_column("meeting", "room_mode")
op.drop_column("meeting", "is_locked")
op.drop_table("room")
op.drop_table("meeting")
# ### end Alembic commands ###

View File

@@ -5,33 +5,39 @@ Revises: b3df9681cae9
Create Date: 2023-09-01 20:19:47.216334
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '99365b0cd87b'
down_revision: Union[str, None] = 'b3df9681cae9'
revision: str = "99365b0cd87b"
down_revision: Union[str, None] = "b3df9681cae9"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.execute("UPDATE transcript SET events = "
"REPLACE(events, '\"event\": \"SUMMARY\"', '\"event\": \"LONG_SUMMARY\"');")
op.alter_column('transcript', 'summary', new_column_name='long_summary')
op.add_column('transcript', sa.Column('title', sa.String(), nullable=True))
op.add_column('transcript', sa.Column('short_summary', sa.String(), nullable=True))
op.execute(
"UPDATE transcript SET events = "
'REPLACE(events, \'"event": "SUMMARY"\', \'"event": "LONG_SUMMARY"\');'
)
op.alter_column("transcript", "summary", new_column_name="long_summary")
op.add_column("transcript", sa.Column("title", sa.String(), nullable=True))
op.add_column("transcript", sa.Column("short_summary", sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.execute("UPDATE transcript SET events = "
"REPLACE(events, '\"event\": \"LONG_SUMMARY\"', '\"event\": \"SUMMARY\"');")
op.alter_column('transcript', 'long_summary', nullable=True, new_column_name='summary')
op.drop_column('transcript', 'title')
op.drop_column('transcript', 'short_summary')
op.execute(
"UPDATE transcript SET events = "
'REPLACE(events, \'"event": "LONG_SUMMARY"\', \'"event": "SUMMARY"\');'
)
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.alter_column("long_summary", nullable=True, new_column_name="summary")
op.drop_column("transcript", "title")
op.drop_column("transcript", "short_summary")
# ### end Alembic commands ###

View File

@@ -1,32 +0,0 @@
"""add source and target language
Revision ID: b3df9681cae9
Revises: 543ed284d69a
Create Date: 2023-08-29 10:55:37.690469
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'b3df9681cae9'
down_revision: Union[str, None] = '543ed284d69a'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('transcript', sa.Column('source_language', sa.String(), nullable=True))
op.add_column('transcript', sa.Column('target_language', sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('transcript', 'target_language')
op.drop_column('transcript', 'source_language')
# ### end Alembic commands ###

View File

@@ -0,0 +1,46 @@
"""Add transcript table
Revision ID: b3df9681cae9
Revises: 543ed284d69a
Create Date: 2025-06-27 08:57:16.306940
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "b3df9681cae9"
down_revision: Union[str, None] = "543ed284d69a"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"transcript",
sa.Column("id", sa.String(), nullable=False),
sa.Column("name", sa.String(), nullable=True),
sa.Column("status", sa.String(), nullable=True),
sa.Column("locked", sa.Boolean(), nullable=True),
sa.Column("duration", sa.Integer(), nullable=True),
sa.Column("created_at", sa.DateTime(), nullable=True),
sa.Column("summary", sa.String(), nullable=True),
sa.Column("topics", sa.JSON(), nullable=True),
sa.Column("events", sa.JSON(), nullable=True),
sa.Column("source_language", sa.String(), nullable=True),
sa.Column("target_language", sa.String(), nullable=True),
sa.Column("user_id", sa.String(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("transcript")
# ### end Alembic commands ###

View File

@@ -1,23 +1,16 @@
"""Add recordings
"""add recordings
Revision ID: d3ff3a39297f
Revises: b0e5f7876032
Create Date: 2025-03-10 14:38:53.504413
Create Date: 2025-06-27 09:27:25.302152
"""
import uuid
from datetime import datetime
from typing import Sequence, Union
import boto3
import sqlalchemy as sa
from alembic import op
from reflector.db.meetings import meetings
from reflector.db.recordings import Recording, recordings
from reflector.db.rooms import rooms
from reflector.db.transcripts import transcripts
from reflector.settings import settings
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "d3ff3a39297f"
@@ -26,107 +19,28 @@ branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def add_recordings_from_s3():
bind = op.get_bind()
s3 = boto3.client(
"s3",
region_name=settings.TRANSCRIPT_STORAGE_AWS_REGION,
aws_access_key_id=settings.TRANSCRIPT_STORAGE_AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.TRANSCRIPT_STORAGE_AWS_SECRET_ACCESS_KEY,
)
bucket_name = settings.AWS_WHEREBY_S3_BUCKET
paginator = s3.get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=bucket_name)
for page in pages:
if "Contents" not in page:
continue
for obj in page["Contents"]:
object_key = obj["Key"]
if not (object_key.endswith(".mp4")):
continue
room_name = f"/{object_key[:36]}"
recorded_at = datetime.fromisoformat(object_key[37:57])
meeting = bind.execute(
meetings.select().where(meetings.c.room_name == room_name)
).fetchone()
recording = Recording(
id=str(uuid.uuid4()),
bucket_name=bucket_name,
object_key=object_key,
recorded_at=recorded_at,
meeting_id=meeting["id"],
)
bind.execute(recordings.insert().values(recording.model_dump()))
def link_transcripts_to_recordings():
bind = op.get_bind()
room_transcripts = bind.execute(
transcripts.select()
.where(transcripts.c.meeting_id.isnot(None))
.order_by(transcripts.c.meeting_id, transcripts.c.created_at)
).fetchall()
for transcript in room_transcripts:
transcript_recordings = bind.execute(
recordings.select()
.where(
recordings.c.meeting_id == transcript["meeting_id"],
)
.order_by(recordings.c.recorded_at.desc())
).fetchall()
if len(transcript_recordings) == 1:
bind.execute(
transcripts.update()
.where(transcripts.c.id == transcript["id"])
.values(recording_id=transcript_recordings[0]["id"])
)
elif len(transcript_recordings) > 1:
matched_recording = next(
(
r
for r in transcript_recordings
if r["recorded_at"] <= transcript["created_at"]
),
None,
)
bind.execute(
transcripts.update()
.where(transcripts.c.id == transcript["id"])
.values(recording_id=matched_recording["id"])
)
def delete_recordings():
bind = op.get_bind()
bind.execute(recordings.delete())
def upgrade() -> None:
with op.batch_alter_table("recording", schema=None) as batch_op:
batch_op.create_unique_constraint(
"uq_recording_object_key",
["bucket_name", "object_key"],
)
op.add_column("transcript", sa.Column("recording_id", sa.String(), nullable=True))
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"recording",
sa.Column("id", sa.String(), nullable=False),
sa.Column("bucket_name", sa.String(), nullable=False),
sa.Column("object_key", sa.String(), nullable=False),
sa.Column("recorded_at", sa.DateTime(), nullable=False),
sa.Column("status", sa.String(), server_default="pending", nullable=False),
sa.Column("meeting_id", sa.String(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.add_column(sa.Column("recording_id", sa.String(), nullable=True))
add_recordings_from_s3()
link_transcripts_to_recordings()
# ### end Alembic commands ###
def downgrade() -> None:
with op.batch_alter_table("recording", schema=None) as batch_op:
batch_op.drop_constraint("uq_recording_object_key", type_="unique")
op.drop_column("transcript", "recording_id")
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("transcript", schema=None) as batch_op:
batch_op.drop_column("recording_id")
delete_recordings()
op.drop_table("recording")
# ### end Alembic commands ###

View File

@@ -46,10 +46,10 @@ meeting_consent = sa.Table(
"meeting_consent",
metadata,
sa.Column("id", sa.String, primary_key=True),
sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id")),
sa.Column("user_id", sa.String, nullable=True),
sa.Column("consent_given", sa.Boolean),
sa.Column("consent_timestamp", sa.DateTime),
sa.Column("meeting_id", sa.String, sa.ForeignKey("meeting.id"), nullable=False),
sa.Column("user_id", sa.String),
sa.Column("consent_given", sa.Boolean, nullable=False),
sa.Column("consent_timestamp", sa.DateTime, nullable=False),
)