server: move out profanity filter to transcript, and implement segmentation

2025-12-21 12:49:06 +00:00 · 2023-10-19 21:05:13 +02:00
parent 0d9f66c097
commit b323254376
6 changed files with 78 additions and 19 deletions
--- a/server/reflector/views/transcripts.py
+++ b/server/reflector/views/transcripts.py
@@ -49,12 +49,18 @@ class TranscriptText(BaseModel):
    translation: str | None


+class TranscriptSegmentTopic(BaseModel):
+    speaker: int
+    text: str
+    timestamp: float
+
+
 class TranscriptTopic(BaseModel):
    id: str = Field(default_factory=generate_uuid4)
    title: str
    summary: str
-    transcript: str | None = None
    timestamp: float
+    segments: list[TranscriptSegmentTopic] = []


 class TranscriptFinalShortSummary(BaseModel):
@@ -523,8 +529,15 @@ async def handle_rtc_event(event: PipelineEvent, args, data):
        topic = TranscriptTopic(
            title=data.title,
            summary=data.summary,
-            transcript=data.transcript.text,
            timestamp=data.timestamp,
+            segments=[
+                TranscriptSegmentTopic(
+                    speaker=segment.speaker,
+                    text=segment.text,
+                    timestamp=segment.start,
+                )
+                for segment in data.transcript.as_segments()
+            ],
        )
        resp = transcript.add_event(event=event, data=topic)
        transcript.upsert_topic(topic)