mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
* style: deactivate PLC0415 only on part that it's ok + re-run pre-commit run --all * ci: add pre-commit hook * build: move from yarn to pnpm * build: move from yarn to pnpm * build: fix node-version * ci: install pnpm prior node (?) * build: update deps and pnpm trying to fix vercel build * feat: docker www corepack * style: pre-commit --------- Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
107 lines
2.8 KiB
Python
107 lines
2.8 KiB
Python
"""populate_webvtt_from_topics
|
|
|
|
Revision ID: 8120ebc75366
|
|
Revises: 116b2f287eab
|
|
Create Date: 2025-08-11 19:11:01.316947
|
|
|
|
"""
|
|
|
|
import json
|
|
from typing import Sequence, Union
|
|
|
|
from alembic import op
|
|
from sqlalchemy import text
|
|
|
|
# revision identifiers, used by Alembic.
|
|
revision: str = "8120ebc75366"
|
|
down_revision: Union[str, None] = "116b2f287eab"
|
|
branch_labels: Union[str, Sequence[str], None] = None
|
|
depends_on: Union[str, Sequence[str], None] = None
|
|
|
|
|
|
def topics_to_webvtt(topics):
|
|
"""Convert topics list to WebVTT format string."""
|
|
if not topics:
|
|
return None
|
|
|
|
lines = ["WEBVTT", ""]
|
|
|
|
for topic in topics:
|
|
start_time = format_timestamp(topic.get("start"))
|
|
end_time = format_timestamp(topic.get("end"))
|
|
text = topic.get("text", "").strip()
|
|
|
|
if start_time and end_time and text:
|
|
lines.append(f"{start_time} --> {end_time}")
|
|
lines.append(text)
|
|
lines.append("")
|
|
|
|
return "\n".join(lines).strip()
|
|
|
|
|
|
def format_timestamp(seconds):
|
|
"""Format seconds to WebVTT timestamp format (HH:MM:SS.mmm)."""
|
|
if seconds is None:
|
|
return None
|
|
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = seconds % 60
|
|
|
|
return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
|
|
|
|
|
|
def upgrade() -> None:
|
|
"""Populate WebVTT field for all transcripts with topics."""
|
|
|
|
# Get connection
|
|
connection = op.get_bind()
|
|
|
|
# Query all transcripts with topics
|
|
result = connection.execute(
|
|
text("SELECT id, topics FROM transcript WHERE topics IS NOT NULL")
|
|
)
|
|
|
|
rows = result.fetchall()
|
|
print(f"Found {len(rows)} transcripts with topics")
|
|
|
|
updated_count = 0
|
|
error_count = 0
|
|
|
|
for row in rows:
|
|
transcript_id = row[0]
|
|
topics_data = row[1]
|
|
|
|
if not topics_data:
|
|
continue
|
|
|
|
try:
|
|
# Parse JSON if it's a string
|
|
if isinstance(topics_data, str):
|
|
topics_data = json.loads(topics_data)
|
|
|
|
# Convert topics to WebVTT format
|
|
webvtt_content = topics_to_webvtt(topics_data)
|
|
|
|
if webvtt_content:
|
|
# Update the webvtt field
|
|
connection.execute(
|
|
text("UPDATE transcript SET webvtt = :webvtt WHERE id = :id"),
|
|
{"webvtt": webvtt_content, "id": transcript_id},
|
|
)
|
|
updated_count += 1
|
|
print(f"✓ Updated transcript {transcript_id}")
|
|
|
|
except Exception as e:
|
|
error_count += 1
|
|
print(f"✗ Error updating transcript {transcript_id}: {e}")
|
|
|
|
print(f"\nMigration complete!")
|
|
print(f" Updated: {updated_count}")
|
|
print(f" Errors: {error_count}")
|
|
|
|
|
|
def downgrade() -> None:
|
|
"""Clear WebVTT field for all transcripts."""
|
|
op.execute(text("UPDATE transcript SET webvtt = NULL"))
|