mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 20:59:05 +00:00
fix: automatically reprocess daily recordings (#797)
* Automatically reprocess recordings * Restore the comments * Remove redundant check * Fix indent * Add comment about cyclic import
This commit is contained in:
@@ -3,6 +3,7 @@ from typing import Literal
|
||||
|
||||
import sqlalchemy as sa
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import or_
|
||||
|
||||
from reflector.db import get_database, metadata
|
||||
from reflector.utils import generate_uuid4
|
||||
@@ -79,5 +80,35 @@ class RecordingController:
|
||||
results = await get_database().fetch_all(query)
|
||||
return [Recording(**row) for row in results]
|
||||
|
||||
async def get_multitrack_needing_reprocessing(
|
||||
self, bucket_name: str
|
||||
) -> list[Recording]:
|
||||
"""
|
||||
Get multitrack recordings that need reprocessing:
|
||||
- Have track_keys (multitrack)
|
||||
- Either have no transcript OR transcript has error status
|
||||
|
||||
This is more efficient than fetching all recordings and filtering in Python.
|
||||
"""
|
||||
from reflector.db.transcripts import (
|
||||
transcripts, # noqa: PLC0415 cyclic import
|
||||
)
|
||||
|
||||
query = (
|
||||
recordings.select()
|
||||
.outerjoin(transcripts, recordings.c.id == transcripts.c.recording_id)
|
||||
.where(
|
||||
recordings.c.bucket_name == bucket_name,
|
||||
recordings.c.track_keys.isnot(None),
|
||||
or_(
|
||||
transcripts.c.id.is_(None),
|
||||
transcripts.c.status == "error",
|
||||
),
|
||||
)
|
||||
)
|
||||
results = await get_database().fetch_all(query)
|
||||
recordings_list = [Recording(**row) for row in results]
|
||||
return [r for r in recordings_list if r.is_multitrack]
|
||||
|
||||
|
||||
recordings_controller = RecordingController()
|
||||
|
||||
Reference in New Issue
Block a user