mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
server: include transcripts words in database, but keep back compatible api
This commit is contained in:
@@ -17,6 +17,8 @@ from fastapi_pagination import Page, paginate
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from reflector.db import database, transcripts
|
from reflector.db import database, transcripts
|
||||||
from reflector.logger import logger
|
from reflector.logger import logger
|
||||||
|
from reflector.processors.types import Transcript as ProcessorTranscript
|
||||||
|
from reflector.processors.types import Word as ProcessorWord
|
||||||
from reflector.settings import settings
|
from reflector.settings import settings
|
||||||
from reflector.utils.audio_waveform import get_audio_waveform
|
from reflector.utils.audio_waveform import get_audio_waveform
|
||||||
from starlette.concurrency import run_in_threadpool
|
from starlette.concurrency import run_in_threadpool
|
||||||
@@ -60,7 +62,8 @@ class TranscriptTopic(BaseModel):
|
|||||||
title: str
|
title: str
|
||||||
summary: str
|
summary: str
|
||||||
timestamp: float
|
timestamp: float
|
||||||
segments: list[TranscriptSegmentTopic] = []
|
text: str | None = None
|
||||||
|
words: list[ProcessorWord] = []
|
||||||
|
|
||||||
|
|
||||||
class TranscriptFinalShortSummary(BaseModel):
|
class TranscriptFinalShortSummary(BaseModel):
|
||||||
@@ -304,6 +307,53 @@ async def transcripts_create(
|
|||||||
# ==============================================================
|
# ==============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class GetTranscriptSegmentTopic(BaseModel):
|
||||||
|
text: str
|
||||||
|
start: float
|
||||||
|
speaker: int
|
||||||
|
|
||||||
|
|
||||||
|
class GetTranscriptTopic(BaseModel):
|
||||||
|
title: str
|
||||||
|
summary: str
|
||||||
|
timestamp: float
|
||||||
|
text: str
|
||||||
|
segments: list[GetTranscriptSegmentTopic] = []
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_transcript_topic(cls, topic: TranscriptTopic):
|
||||||
|
if not topic.words:
|
||||||
|
# In previous version, words were missing
|
||||||
|
# Just output a segment with speaker 0
|
||||||
|
text = topic.text
|
||||||
|
segments = [
|
||||||
|
GetTranscriptSegmentTopic(
|
||||||
|
text=topic.text,
|
||||||
|
start=topic.timestamp,
|
||||||
|
speaker=0,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# New versions include words
|
||||||
|
transcript = ProcessorTranscript(words=topic.words)
|
||||||
|
text = transcript.text
|
||||||
|
segments = [
|
||||||
|
GetTranscriptSegmentTopic(
|
||||||
|
text=segment.text,
|
||||||
|
start=segment.start,
|
||||||
|
speaker=segment.speaker,
|
||||||
|
)
|
||||||
|
for segment in transcript.as_segments()
|
||||||
|
]
|
||||||
|
return cls(
|
||||||
|
title=topic.title,
|
||||||
|
summary=topic.summary,
|
||||||
|
timestamp=topic.timestamp,
|
||||||
|
text=text,
|
||||||
|
segments=segments,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/transcripts/{transcript_id}", response_model=GetTranscript)
|
@router.get("/transcripts/{transcript_id}", response_model=GetTranscript)
|
||||||
async def transcript_get(
|
async def transcript_get(
|
||||||
transcript_id: str,
|
transcript_id: str,
|
||||||
@@ -412,7 +462,10 @@ async def transcript_get_audio_waveform(
|
|||||||
return transcript.audio_waveform
|
return transcript.audio_waveform
|
||||||
|
|
||||||
|
|
||||||
@router.get("/transcripts/{transcript_id}/topics", response_model=list[TranscriptTopic])
|
@router.get(
|
||||||
|
"/transcripts/{transcript_id}/topics",
|
||||||
|
response_model=list[GetTranscriptTopic],
|
||||||
|
)
|
||||||
async def transcript_get_topics(
|
async def transcript_get_topics(
|
||||||
transcript_id: str,
|
transcript_id: str,
|
||||||
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
user: Annotated[Optional[auth.UserInfo], Depends(auth.current_user_optional)],
|
||||||
@@ -421,7 +474,11 @@ async def transcript_get_topics(
|
|||||||
transcript = await transcripts_controller.get_by_id(transcript_id, user_id=user_id)
|
transcript = await transcripts_controller.get_by_id(transcript_id, user_id=user_id)
|
||||||
if not transcript:
|
if not transcript:
|
||||||
raise HTTPException(status_code=404, detail="Transcript not found")
|
raise HTTPException(status_code=404, detail="Transcript not found")
|
||||||
return transcript.topics
|
|
||||||
|
# convert to GetTranscriptTopic
|
||||||
|
return [
|
||||||
|
GetTranscriptTopic.from_transcript_topic(topic) for topic in transcript.topics
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@router.get("/transcripts/{transcript_id}/events")
|
@router.get("/transcripts/{transcript_id}/events")
|
||||||
@@ -498,6 +555,13 @@ async def transcript_events_websocket(
|
|||||||
|
|
||||||
|
|
||||||
async def handle_rtc_event(event: PipelineEvent, args, data):
|
async def handle_rtc_event(event: PipelineEvent, args, data):
|
||||||
|
try:
|
||||||
|
return await handle_rtc_event_once(event, args, data)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Error handling RTC event")
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_rtc_event_once(event: PipelineEvent, args, data):
|
||||||
# OFC the current implementation is not good,
|
# OFC the current implementation is not good,
|
||||||
# but it's just a POC before persistence. It won't query the
|
# but it's just a POC before persistence. It won't query the
|
||||||
# transcript from the database for each event.
|
# transcript from the database for each event.
|
||||||
@@ -530,14 +594,8 @@ async def handle_rtc_event(event: PipelineEvent, args, data):
|
|||||||
title=data.title,
|
title=data.title,
|
||||||
summary=data.summary,
|
summary=data.summary,
|
||||||
timestamp=data.timestamp,
|
timestamp=data.timestamp,
|
||||||
segments=[
|
text=data.transcript.text,
|
||||||
TranscriptSegmentTopic(
|
words=data.transcript.words,
|
||||||
speaker=segment.speaker,
|
|
||||||
text=segment.text,
|
|
||||||
timestamp=segment.start,
|
|
||||||
)
|
|
||||||
for segment in data.transcript.as_segments()
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
resp = transcript.add_event(event=event, data=topic)
|
resp = transcript.add_event(event=event, data=topic)
|
||||||
transcript.upsert_topic(topic)
|
transcript.upsert_topic(topic)
|
||||||
|
|||||||
Reference in New Issue
Block a user