mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
375 lines
11 KiB
Python
375 lines
11 KiB
Python
from datetime import datetime, timedelta
|
|
from typing import Annotated, Literal, Optional
|
|
|
|
import reflector.auth as auth
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from fastapi_pagination import Page
|
|
from fastapi_pagination.ext.databases import paginate
|
|
from jose import jwt
|
|
from pydantic import BaseModel, Field
|
|
from reflector.db.migrate_user import migrate_user
|
|
from reflector.db.transcripts import (
|
|
SourceKind,
|
|
TranscriptParticipant,
|
|
TranscriptTopic,
|
|
transcripts_controller,
|
|
)
|
|
from reflector.processors.types import Transcript as ProcessorTranscript
|
|
from reflector.processors.types import Word
|
|
from reflector.settings import settings
|
|
from reflector.zulip import (
|
|
InvalidMessageError,
|
|
get_zulip_message,
|
|
send_message_to_zulip,
|
|
update_zulip_message,
|
|
)
|
|
|
|
router = APIRouter()
|
|
|
|
ALGORITHM = "HS256"
|
|
DOWNLOAD_EXPIRE_MINUTES = 60
|
|
|
|
|
|
def create_access_token(data: dict, expires_delta: timedelta):
|
|
to_encode = data.copy()
|
|
expire = datetime.utcnow() + expires_delta
|
|
to_encode.update({"exp": expire})
|
|
encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=ALGORITHM)
|
|
return encoded_jwt
|
|
|
|
|
|
# ==============================================================
|
|
# Transcripts list
|
|
# ==============================================================
|
|
|
|
|
|
class GetTranscript(BaseModel):
|
|
id: str
|
|
user_id: str | None
|
|
name: str
|
|
status: str
|
|
locked: bool
|
|
duration: float
|
|
title: str | None
|
|
short_summary: str | None
|
|
long_summary: str | None
|
|
created_at: datetime
|
|
share_mode: str = Field("private")
|
|
source_language: str | None
|
|
target_language: str | None
|
|
participants: list[TranscriptParticipant] | None
|
|
reviewed: bool
|
|
meeting_id: str | None
|
|
source_kind: SourceKind
|
|
room_id: str | None = None
|
|
room_name: str | None = None
|
|
|
|
|
|
class CreateTranscript(BaseModel):
|
|
name: str
|
|
source_language: str = Field("en")
|
|
target_language: str = Field("en")
|
|
|
|
|
|
class UpdateTranscript(BaseModel):
|
|
name: Optional[str] = Field(None)
|
|
locked: Optional[bool] = Field(None)
|
|
title: Optional[str] = Field(None)
|
|
short_summary: Optional[str] = Field(None)
|
|
long_summary: Optional[str] = Field(None)
|
|
share_mode: Optional[Literal["public", "semi-private", "private"]] = Field(None)
|
|
participants: Optional[list[TranscriptParticipant]] = Field(None)
|
|
reviewed: Optional[bool] = Field(None)
|
|
|
|
|
|
class DeletionStatus(BaseModel):
|
|
status: str
|
|
|
|
|
|
@router.get("/transcripts", response_model=Page[GetTranscript])
|
|
async def transcripts_list(
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
source_kind: SourceKind | None = None,
|
|
room_id: str | None = None,
|
|
search_term: str | None = None,
|
|
):
|
|
from reflector.db import database
|
|
|
|
if not user and not settings.PUBLIC_MODE:
|
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
|
|
user_id = user["sub"] if user else None
|
|
|
|
# for fief to jwt migration, migrate user if needed
|
|
if user:
|
|
await migrate_user(email=user["email"], user_id=user["sub"])
|
|
|
|
return await paginate(
|
|
database,
|
|
await transcripts_controller.get_all(
|
|
user_id=user_id,
|
|
source_kind=SourceKind(source_kind) if source_kind else None,
|
|
room_id=room_id,
|
|
search_term=search_term,
|
|
order_by="-created_at",
|
|
return_query=True,
|
|
),
|
|
)
|
|
|
|
|
|
@router.post("/transcripts", response_model=GetTranscript)
|
|
async def transcripts_create(
|
|
info: CreateTranscript,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
return await transcripts_controller.add(
|
|
info.name,
|
|
source_kind=SourceKind.LIVE,
|
|
source_language=info.source_language,
|
|
target_language=info.target_language,
|
|
user_id=user_id,
|
|
)
|
|
|
|
|
|
# ==============================================================
|
|
# Single transcript
|
|
# ==============================================================
|
|
|
|
|
|
class GetTranscriptSegmentTopic(BaseModel):
|
|
text: str
|
|
start: float
|
|
speaker: int
|
|
|
|
|
|
class GetTranscriptTopic(BaseModel):
|
|
id: str
|
|
title: str
|
|
summary: str
|
|
timestamp: float
|
|
duration: float | None
|
|
transcript: str
|
|
segments: list[GetTranscriptSegmentTopic] = []
|
|
|
|
@classmethod
|
|
def from_transcript_topic(cls, topic: TranscriptTopic):
|
|
if not topic.words:
|
|
# In previous version, words were missing
|
|
# Just output a segment with speaker 0
|
|
text = topic.transcript
|
|
duration = None
|
|
segments = [
|
|
GetTranscriptSegmentTopic(
|
|
text=topic.transcript,
|
|
start=topic.timestamp,
|
|
speaker=0,
|
|
)
|
|
]
|
|
else:
|
|
# New versions include words
|
|
transcript = ProcessorTranscript(words=topic.words)
|
|
text = transcript.text
|
|
duration = transcript.duration
|
|
segments = [
|
|
GetTranscriptSegmentTopic(
|
|
text=segment.text,
|
|
start=segment.start,
|
|
speaker=segment.speaker,
|
|
)
|
|
for segment in transcript.as_segments()
|
|
]
|
|
return cls(
|
|
id=topic.id,
|
|
title=topic.title,
|
|
summary=topic.summary,
|
|
timestamp=topic.timestamp,
|
|
transcript=text,
|
|
segments=segments,
|
|
duration=duration,
|
|
)
|
|
|
|
|
|
class GetTranscriptTopicWithWords(GetTranscriptTopic):
|
|
words: list[Word] = []
|
|
|
|
@classmethod
|
|
def from_transcript_topic(cls, topic: TranscriptTopic):
|
|
instance = super().from_transcript_topic(topic)
|
|
if topic.words:
|
|
instance.words = topic.words
|
|
return instance
|
|
|
|
|
|
class SpeakerWords(BaseModel):
|
|
speaker: int
|
|
words: list[Word]
|
|
|
|
|
|
class GetTranscriptTopicWithWordsPerSpeaker(GetTranscriptTopic):
|
|
words_per_speaker: list[SpeakerWords] = []
|
|
|
|
@classmethod
|
|
def from_transcript_topic(cls, topic: TranscriptTopic):
|
|
instance = super().from_transcript_topic(topic)
|
|
if topic.words:
|
|
words_per_speakers = []
|
|
# group words by speaker
|
|
words = []
|
|
for word in topic.words:
|
|
if words and words[-1].speaker != word.speaker:
|
|
words_per_speakers.append(
|
|
SpeakerWords(
|
|
speaker=words[-1].speaker,
|
|
words=words,
|
|
)
|
|
)
|
|
words = []
|
|
words.append(word)
|
|
if words:
|
|
words_per_speakers.append(
|
|
SpeakerWords(
|
|
speaker=words[-1].speaker,
|
|
words=words,
|
|
)
|
|
)
|
|
|
|
instance.words_per_speaker = words_per_speakers
|
|
|
|
return instance
|
|
|
|
|
|
@router.get("/transcripts/{transcript_id}", response_model=GetTranscript)
|
|
async def transcript_get(
|
|
transcript_id: str,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
return await transcripts_controller.get_by_id_for_http(
|
|
transcript_id, user_id=user_id
|
|
)
|
|
|
|
|
|
@router.patch("/transcripts/{transcript_id}", response_model=GetTranscript)
|
|
async def transcript_update(
|
|
transcript_id: str,
|
|
info: UpdateTranscript,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
transcript = await transcripts_controller.get_by_id_for_http(
|
|
transcript_id, user_id=user_id
|
|
)
|
|
if not transcript:
|
|
raise HTTPException(status_code=404, detail="Transcript not found")
|
|
values = info.dict(exclude_unset=True)
|
|
await transcripts_controller.update(transcript, values)
|
|
return transcript
|
|
|
|
|
|
@router.delete("/transcripts/{transcript_id}", response_model=DeletionStatus)
|
|
async def transcript_delete(
|
|
transcript_id: str,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
transcript = await transcripts_controller.get_by_id(transcript_id, user_id=user_id)
|
|
if not transcript:
|
|
raise HTTPException(status_code=404, detail="Transcript not found")
|
|
await transcripts_controller.remove_by_id(transcript.id, user_id=user_id)
|
|
return DeletionStatus(status="ok")
|
|
|
|
|
|
@router.get(
|
|
"/transcripts/{transcript_id}/topics",
|
|
response_model=list[GetTranscriptTopic],
|
|
)
|
|
async def transcript_get_topics(
|
|
transcript_id: str,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
transcript = await transcripts_controller.get_by_id_for_http(
|
|
transcript_id, user_id=user_id
|
|
)
|
|
|
|
# convert to GetTranscriptTopic
|
|
return [
|
|
GetTranscriptTopic.from_transcript_topic(topic) for topic in transcript.topics
|
|
]
|
|
|
|
|
|
@router.get(
|
|
"/transcripts/{transcript_id}/topics/with-words",
|
|
response_model=list[GetTranscriptTopicWithWords],
|
|
)
|
|
async def transcript_get_topics_with_words(
|
|
transcript_id: str,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
transcript = await transcripts_controller.get_by_id_for_http(
|
|
transcript_id, user_id=user_id
|
|
)
|
|
|
|
# convert to GetTranscriptTopicWithWords
|
|
return [
|
|
GetTranscriptTopicWithWords.from_transcript_topic(topic)
|
|
for topic in transcript.topics
|
|
]
|
|
|
|
|
|
@router.get(
|
|
"/transcripts/{transcript_id}/topics/{topic_id}/words-per-speaker",
|
|
response_model=GetTranscriptTopicWithWordsPerSpeaker,
|
|
)
|
|
async def transcript_get_topics_with_words_per_speaker(
|
|
transcript_id: str,
|
|
topic_id: str,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
transcript = await transcripts_controller.get_by_id_for_http(
|
|
transcript_id, user_id=user_id
|
|
)
|
|
|
|
# get the topic from the transcript
|
|
topic = next((t for t in transcript.topics if t.id == topic_id), None)
|
|
if not topic:
|
|
raise HTTPException(status_code=404, detail="Topic not found")
|
|
|
|
# convert to GetTranscriptTopicWithWordsPerSpeaker
|
|
return GetTranscriptTopicWithWordsPerSpeaker.from_transcript_topic(topic)
|
|
|
|
|
|
@router.post("/transcripts/{transcript_id}/zulip")
|
|
async def transcript_post_to_zulip(
|
|
transcript_id: str,
|
|
stream: str,
|
|
topic: str,
|
|
include_topics: bool,
|
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
|
):
|
|
user_id = user["sub"] if user else None
|
|
transcript = await transcripts_controller.get_by_id_for_http(
|
|
transcript_id, user_id=user_id
|
|
)
|
|
if not transcript:
|
|
raise HTTPException(status_code=404, detail="Transcript not found")
|
|
|
|
content = get_zulip_message(transcript, include_topics)
|
|
|
|
message_updated = False
|
|
if transcript.zulip_message_id:
|
|
try:
|
|
update_zulip_message(transcript.zulip_message_id, stream, topic, content)
|
|
message_updated = True
|
|
except InvalidMessageError:
|
|
pass
|
|
|
|
if not message_updated:
|
|
response = send_message_to_zulip(stream, topic, content)
|
|
await transcripts_controller.update(
|
|
transcript, {"zulip_message_id": response["id"]}
|
|
)
|