Link recorded meeting to a transcript

This commit is contained in:
2024-08-09 15:24:35 +02:00
parent b1527ad7b3
commit 2381428ae2
20 changed files with 1225 additions and 12 deletions

View File

@@ -0,0 +1,30 @@
"""add meeting
Revision ID: 1340c04426b8
Revises: b9348748bbbc
Create Date: 2024-07-31 16:41:29.415218
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '1340c04426b8'
down_revision: Union[str, None] = 'b9348748bbbc'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@@ -11,6 +11,7 @@ from reflector.events import subscribers_shutdown, subscribers_startup
from reflector.logger import logger
from reflector.metrics import metrics_init
from reflector.settings import settings
from reflector.views.meetings import router as meetings_router
from reflector.views.rtc_offer import router as rtc_offer_router
from reflector.views.transcripts import router as transcripts_router
from reflector.views.transcripts_audio import router as transcripts_audio_router
@@ -68,6 +69,7 @@ metrics_init(app, instrumentator)
# register views
app.include_router(rtc_offer_router)
app.include_router(meetings_router, prefix="/v1")
app.include_router(transcripts_router, prefix="/v1")
app.include_router(transcripts_audio_router, prefix="/v1")
app.include_router(transcripts_participants_router, prefix="/v1")

View File

@@ -7,6 +7,7 @@ database = databases.Database(settings.DATABASE_URL)
metadata = sqlalchemy.MetaData()
# import models
import reflector.db.meetings # noqa
import reflector.db.transcripts # noqa
engine = sqlalchemy.create_engine(

View File

@@ -0,0 +1,94 @@
from datetime import datetime
import sqlalchemy
from fastapi import HTTPException
from pydantic import BaseModel
from reflector.db import database, metadata
meetings = sqlalchemy.Table(
"meeting",
metadata,
sqlalchemy.Column("id", sqlalchemy.String, primary_key=True),
sqlalchemy.Column("room_name", sqlalchemy.String),
sqlalchemy.Column("room_url", sqlalchemy.String),
sqlalchemy.Column("host_room_url", sqlalchemy.String),
sqlalchemy.Column("viewer_room_url", sqlalchemy.String),
sqlalchemy.Column("start_date", sqlalchemy.DateTime),
sqlalchemy.Column("end_date", sqlalchemy.DateTime),
sqlalchemy.Column("user_id", sqlalchemy.String),
)
class Meeting(BaseModel):
id: str
room_name: str
room_url: str
host_room_url: str
viewer_room_url: str
start_date: datetime
end_date: datetime
user_id: str
class MeetingController:
async def add(
self,
id: str,
room_name: str,
room_url: str,
host_room_url: str,
viewer_room_url: str,
start_date: datetime,
end_date: datetime,
user_id: str,
):
"""
Add a new meeting
"""
meeting = Meeting(
id=id,
room_name=room_name,
room_url=room_url,
host_room_url=host_room_url,
viewer_room_url=viewer_room_url,
start_date=start_date,
end_date=end_date,
user_id=user_id,
)
query = meetings.insert().values(**meeting.model_dump())
await database.execute(query)
return meeting
async def get_by_room_name(
self,
room_name: str,
) -> Meeting:
"""
Get a meeting by room name.
"""
query = meetings.select().where(meetings.c.room_name == room_name)
result = await database.fetch_one(query)
if not result:
return None
return Meeting(**result)
async def get_by_id_for_http(self, meeting_id: str, user_id: str | None) -> Meeting:
"""
Get a meeting by ID for HTTP request.
If not found, it will raise a 404 error.
"""
query = meetings.select().where(meetings.c.id == meeting_id)
result = await database.fetch_one(query)
if not result:
raise HTTPException(status_code=404, detail="Meeting not found")
meeting = Meeting(**result)
if result["user_id"] != user_id:
meeting.host_room_url = ""
return meeting
meetings_controller = MeetingController()

View File

@@ -50,6 +50,10 @@ transcripts = sqlalchemy.Table(
nullable=False,
server_default="private",
),
sqlalchemy.Column(
"meeting_id",
sqlalchemy.String,
),
)
@@ -145,6 +149,7 @@ class Transcript(BaseModel):
share_mode: Literal["private", "semi-private", "public"] = "private"
audio_location: str = "local"
reviewed: bool = False
meeting_id: str | None = None
def add_event(self, event: str, data: BaseModel) -> TranscriptEvent:
ev = TranscriptEvent(event=event, data=data.model_dump())
@@ -329,6 +334,18 @@ class TranscriptController:
return None
return Transcript(**result)
async def get_by_meeting_id(self, meeting_id: str, **kwargs) -> Transcript | None:
"""
Get a transcript by meeting_id
"""
query = transcripts.select().where(transcripts.c.meeting_id == meeting_id)
if "user_id" in kwargs:
query = query.where(transcripts.c.user_id == kwargs["user_id"])
result = await database.fetch_one(query)
if not result:
return None
return Transcript(**result)
async def get_by_id_for_http(
self,
transcript_id: str,
@@ -376,6 +393,8 @@ class TranscriptController:
source_language: str = "en",
target_language: str = "en",
user_id: str | None = None,
meeting_id: str | None = None,
share_mode: str = "private",
):
"""
Add a new transcript
@@ -385,6 +404,8 @@ class TranscriptController:
source_language=source_language,
target_language=target_language,
user_id=user_id,
meeting_id=meeting_id,
share_mode=share_mode,
)
query = transcripts.insert().values(**transcript.model_dump())
await database.execute(query)

View File

@@ -133,5 +133,9 @@ class Settings(BaseSettings):
AWS_PROCESS_RECORDING_QUEUE_URL: str | None = None
WHEREBY_API_URL: str = "https://api.whereby.dev/v1/meetings"
WHEREBY_API_KEY: str | None = None
settings = Settings()

View File

@@ -0,0 +1,28 @@
from datetime import datetime
from typing import Annotated, Optional
import reflector.auth as auth
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from reflector.db.meetings import meetings_controller
router = APIRouter()
class GetMeeting(BaseModel):
id: str
room_name: str
room_url: str
host_room_url: str
viewer_room_url: str
start_date: datetime
end_date: datetime
@router.get("/meetings/{meeting_id}", response_model=GetMeeting)
async def meeting_get(
meeting_id: str,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
user_id = user["sub"] if user else None
return await meetings_controller.get_by_id_for_http(meeting_id, user_id=user_id)

View File

@@ -1,4 +1,4 @@
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import Annotated, Literal, Optional
import reflector.auth as auth
@@ -7,6 +7,7 @@ from fastapi_pagination import Page
from fastapi_pagination.ext.databases import paginate
from jose import jwt
from pydantic import BaseModel, Field
from reflector.db.meetings import meetings_controller
from reflector.db.transcripts import (
TranscriptParticipant,
TranscriptTopic,
@@ -15,6 +16,7 @@ from reflector.db.transcripts import (
from reflector.processors.types import Transcript as ProcessorTranscript
from reflector.processors.types import Word
from reflector.settings import settings
from reflector.whereby import create_meeting
router = APIRouter()
@@ -51,6 +53,7 @@ class GetTranscript(BaseModel):
target_language: str | None
participants: list[TranscriptParticipant] | None
reviewed: bool
meeting_id: str | None
class CreateTranscript(BaseModel):
@@ -108,6 +111,37 @@ async def transcripts_create(
)
@router.post("/transcripts/meeting", response_model=GetTranscript)
async def transcripts_create_meeting(
info: CreateTranscript,
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
):
user_id = user["sub"] if user else None
start_date = datetime.now(timezone.utc)
end_date = start_date + timedelta(minutes=1)
meeting = await create_meeting("", start_date=start_date, end_date=end_date)
meeting = await meetings_controller.add(
id=meeting["meetingId"],
room_name=meeting["roomName"],
room_url=meeting["roomUrl"],
host_room_url=meeting["hostRoomUrl"],
viewer_room_url=meeting["viewerRoomUrl"],
start_date=datetime.fromisoformat(meeting["startDate"]),
end_date=datetime.fromisoformat(meeting["endDate"]),
user_id=user_id,
)
return await transcripts_controller.add(
info.name,
source_language=info.source_language,
target_language=info.target_language,
user_id=user_id,
meeting_id=meeting.id,
share_mode="public",
)
# ==============================================================
# Single transcript
# ==============================================================

View File

@@ -0,0 +1,29 @@
from datetime import datetime
import httpx
from reflector.settings import settings
async def create_meeting(
room_name_prefix: str, start_date: datetime, end_date: datetime
):
headers = {
"Content-Type": "application/json; charset=utf-8",
"Authorization": f"Bearer {settings.WHEREBY_API_KEY}",
}
data = {
"templateType": "viewerMode",
"isLocked": False,
"roomNamePrefix": room_name_prefix,
"roomNamePattern": "uuid",
"roomMode": "normal",
"startDate": start_date.isoformat(),
"endDate": end_date.isoformat(),
}
async with httpx.AsyncClient() as client:
response = await client.post(
settings.WHEREBY_API_URL, headers=headers, json=data, timeout=10
)
response.raise_for_status()
return response.json()

View File

@@ -7,6 +7,7 @@ import boto3
import structlog
from celery import shared_task
from celery.utils.log import get_task_logger
from reflector.db.meetings import meetings_controller
from reflector.db.transcripts import transcripts_controller
from reflector.pipelines.main_live_pipeline import asynctask, task_pipeline_process
from reflector.settings import settings
@@ -60,12 +61,11 @@ def process_messages():
async def process_recording(bucket_name: str, object_key: str):
logger.info("Processing recording: %s/%s", bucket_name, object_key)
transcript = await transcripts_controller.add(
"",
source_language="en",
target_language="en",
user_id=None,
)
# extract a guid from the object key
room_name = f"/{object_key[:36]}"
meeting = await meetings_controller.get_by_room_name(room_name)
transcript = await transcripts_controller.get_by_meeting_id(meeting.id)
_, extension = os.path.splitext(object_key)
upload_filename = transcript.data_path / f"upload{extension}"
upload_filename.parent.mkdir(parents=True, exist_ok=True)