try to move waveform to pipeline

2026-02-04 18:06:48 +00:00 · 2023-11-15 20:30:00 +01:00
parent e98f1bf4bc
commit 1fc261a669
6 changed files with 72 additions and 23 deletions
--- a/server/reflector/db/transcripts.py
+++ b/server/reflector/db/transcripts.py
@@ -10,7 +10,6 @@ from pydantic import BaseModel, Field
 from reflector.db import database, metadata
 from reflector.processors.types import Word as ProcessorWord
 from reflector.settings import settings
 from reflector.utils.audio_waveform import get_audio_waveform
 transcripts = sqlalchemy.Table(
    "transcript",
@@ -79,6 +78,14 @@ class TranscriptFinalTitle(BaseModel):
    title: str
 class TranscriptDuration(BaseModel):
    duration: float
 class TranscriptWaveform(BaseModel):
    waveform: list[float]
 class TranscriptEvent(BaseModel):
    event: str
    data: dict
@@ -118,22 +125,6 @@ class Transcript(BaseModel):
    def topics_dump(self, mode="json"):
        return [topic.model_dump(mode=mode) for topic in self.topics]
    def convert_audio_to_waveform(self, segments_count=256):
        fn = self.audio_waveform_filename
        if fn.exists():
            return
        waveform = get_audio_waveform(
            path=self.audio_mp3_filename, segments_count=segments_count
        )
        try:
            with open(fn, "w") as fd:
                json.dump(waveform, fd)
        except Exception:
            # remove file if anything happen during the write
            fn.unlink(missing_ok=True)
            raise
        return waveform
    def unlink(self):
        self.data_path.unlink(missing_ok=True)
--- a/server/reflector/pipelines/main_live_pipeline.py
+++ b/server/reflector/pipelines/main_live_pipeline.py
@@ -21,11 +21,13 @@ from pydantic import BaseModel
 from reflector.app import app
 from reflector.db.transcripts import (
    Transcript,
    TranscriptDuration,
    TranscriptFinalLongSummary,
    TranscriptFinalShortSummary,
    TranscriptFinalTitle,
    TranscriptText,
    TranscriptTopic,
    TranscriptWaveform,
    transcripts_controller,
 )
 from reflector.logger import logger
@@ -45,6 +47,7 @@ from reflector.processors import (
    TranscriptTopicDetectorProcessor,
    TranscriptTranslatorProcessor,
 )
 from reflector.processors.audio_waveform_processor import AudioWaveformProcessor
 from reflector.processors.types import AudioDiarizationInput
 from reflector.processors.types import (
    TitleSummaryWithId as TitleSummaryWithIdProcessorType,
@@ -230,15 +233,29 @@ class PipelineMainBase(PipelineRunner):
                data=final_short_summary,
            )
-    async def on_duration(self, duration: float):
+    async def on_duration(self, data):
        async with self.transaction():
            duration = TranscriptDuration(duration=data)
            transcript = await self.get_transcript()
            await transcripts_controller.update(
                transcript,
                {
-                    "duration": duration,
+                    "duration": duration.duration,
                },
            )
            return await transcripts_controller.append_event(
                transcript=transcript, event="DURATION", data=duration
            )
    async def on_waveform(self, data):
        waveform = TranscriptWaveform(waveform=data)
        transcript = await self.get_transcript()
        return await transcripts_controller.append_event(
            transcript=transcript, event="WAVEFORM", data=waveform
        )
 class PipelineMainLive(PipelineMainBase):
@@ -266,6 +283,11 @@ class PipelineMainLive(PipelineMainBase):
            BroadcastProcessor(
                processors=[
                    TranscriptFinalTitleProcessor.as_threaded(callback=self.on_title),
                    AudioWaveformProcessor(
                        audio_path=transcript.audio_mp3_filename,
                        waveform_path=transcript.audio_waveform_filename,
                        on_waveform=self.on_waveform,
                    ),
                ]
            ),
        ]
--- a/server/reflector/processors/audio_waveform_processor.py
+++ b/server/reflector/processors/audio_waveform_processor.py
@@ -0,0 +1,33 @@
 import json
 from pathlib import Path
 from reflector.processors.base import Processor
 from reflector.processors.types import TitleSummary
 from reflector.utils.audio_waveform import get_audio_waveform
 class AudioWaveformProcessor(Processor):
    """
    Write the waveform for the final audio
    """
    INPUT_TYPE = TitleSummary
    def __init__(self, audio_path: Path | str, waveform_path: str, **kwargs):
        super().__init__(**kwargs)
        if isinstance(audio_path, str):
            audio_path = Path(audio_path)
        if audio_path.suffix not in (".mp3", ".wav"):
            raise ValueError("Only mp3 and wav files are supported")
        self.audio_path = audio_path
        self.waveform_path = waveform_path
    async def _push(self, _data):
        self.waveform_path.parent.mkdir(parents=True, exist_ok=True)
        self.logger.info("Waveform Processing Started")
        waveform = get_audio_waveform(path=self.audio_path, segments_count=255)
        with open(self.waveform_path, "w") as fd:
            json.dump(waveform, fd)
        self.logger.info("Waveform Processing Finished")
        await self.emit(waveform, name="waveform")
--- a/server/reflector/views/transcripts.py
+++ b/server/reflector/views/transcripts.py
@@ -22,7 +22,6 @@ from reflector.db.transcripts import (
 from reflector.processors.types import Transcript as ProcessorTranscript
 from reflector.settings import settings
 from reflector.ws_manager import get_ws_manager
 from starlette.concurrency import run_in_threadpool
 from ._range_requests_response import range_requests_response
 from .rtc_offer import RtcOffer, rtc_offer_base
@@ -261,7 +260,7 @@ async def transcript_get_audio_waveform(
    if not transcript.audio_mp3_filename.exists():
        raise HTTPException(status_code=404, detail="Audio not found")
-    await run_in_threadpool(transcript.convert_audio_to_waveform)
+    # await run_in_threadpool(transcript.convert_audio_to_waveform)
    return transcript.audio_waveform
--- a/server/tests/test_transcripts_rtc_ws.py
+++ b/server/tests/test_transcripts_rtc_ws.py
@@ -182,6 +182,10 @@ async def test_transcript_rtc_and_websocket(
    ev = events[eventnames.index("FINAL_TITLE")]
    assert ev["data"]["title"] == "LLM TITLE"
    assert "WAVEFORM" in eventnames
    ev = events[eventnames.index("FINAL_TITLE")]
    assert ev["data"]["title"] == "LLM TITLE"
    # check status order
    statuses = [e["data"]["value"] for e in events if e["event"] == "STATUS"]
    assert statuses.index("recording") < statuses.index("processing")
--- a/www/app/lib/edgeConfig.ts
+++ b/www/app/lib/edgeConfig.ts
@@ -3,9 +3,9 @@ import { isDevelopment } from "./utils";
 const localConfig = {
  features: {
-    requireLogin: true,
+    requireLogin: false,
    privacy: true,
-    browse: true,
+    browse: false,
  },
  api_url: "http://127.0.0.1:1250",
  websocket_url: "ws://127.0.0.1:1250",