From 9642d0fd1e201fc804d61a35cbeb0c5a5217e4ab Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Thu, 2 Nov 2023 19:40:45 +0100 Subject: [PATCH] hotfix/server: fix duplication of topics --- server/reflector/db/transcripts.py | 6 +++--- server/reflector/pipelines/main_live_pipeline.py | 9 +++++++-- server/reflector/processors/types.py | 6 +++++- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/server/reflector/db/transcripts.py b/server/reflector/db/transcripts.py index 89025d53..5d190bdc 100644 --- a/server/reflector/db/transcripts.py +++ b/server/reflector/db/transcripts.py @@ -106,9 +106,9 @@ class Transcript(BaseModel): return ev def upsert_topic(self, topic: TranscriptTopic): - existing_topic = next((t for t in self.topics if t.id == topic.id), None) - if existing_topic: - existing_topic.update_from(topic) + index = next((i for i, t in enumerate(self.topics) if t.id == topic.id), None) + if index is not None: + self.topics[index] = topic else: self.topics.append(topic) diff --git a/server/reflector/pipelines/main_live_pipeline.py b/server/reflector/pipelines/main_live_pipeline.py index 88e1bffd..bf11bdf3 100644 --- a/server/reflector/pipelines/main_live_pipeline.py +++ b/server/reflector/pipelines/main_live_pipeline.py @@ -46,7 +46,9 @@ from reflector.processors import ( TranscriptTranslatorProcessor, ) from reflector.processors.types import AudioDiarizationInput -from reflector.processors.types import TitleSummary as TitleSummaryProcessorType +from reflector.processors.types import ( + TitleSummaryWithId as TitleSummaryWithIdProcessorType, +) from reflector.processors.types import Transcript as TranscriptProcessorType from reflector.settings import settings from reflector.ws_manager import WebsocketManager, get_ws_manager @@ -163,6 +165,8 @@ class PipelineMainBase(PipelineRunner): text=data.transcript.text, words=data.transcript.words, ) + if isinstance(data, TitleSummaryWithIdProcessorType): + topic.id = data.id async with self.transaction(): transcript = await self.get_transcript() await transcripts_controller.upsert_topic(transcript, topic) @@ -302,7 +306,8 @@ class PipelineMainDiarization(PipelineMainBase): # XXX translation is lost when converting our data model to the processor model transcript = await self.get_transcript() topics = [ - TitleSummaryProcessorType( + TitleSummaryWithIdProcessorType( + id=topic.id, title=topic.title, summary=topic.summary, timestamp=topic.timestamp, diff --git a/server/reflector/processors/types.py b/server/reflector/processors/types.py index b67f84b9..312f5433 100644 --- a/server/reflector/processors/types.py +++ b/server/reflector/processors/types.py @@ -167,6 +167,10 @@ class TitleSummary(BaseModel): return f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}" +class TitleSummaryWithId(TitleSummary): + id: str + + class FinalLongSummary(BaseModel): long_summary: str duration: float @@ -386,4 +390,4 @@ class TranslationLanguages(BaseModel): class AudioDiarizationInput(BaseModel): audio_url: str - topics: list[TitleSummary] + topics: list[TitleSummaryWithId]