server: prevent storing audio for transcription unless wanted

Closes #145
2026-02-04 18:06:48 +00:00 · 2023-08-15 12:29:14 +02:00
parent 98375d5c2c
commit a21a726eb1
3 changed files with 37 additions and 12 deletions
--- a/server/reflector/processors/audio_merge.py
+++ b/server/reflector/processors/audio_merge.py
@@ -1,6 +1,8 @@
 from reflector.processors.base import Processor
 from reflector.processors.types import AudioFile
-from pathlib import Path
+from time import monotonic_ns
 from uuid import uuid4
 import io
 import wave
 import av
@@ -24,12 +26,9 @@ class AudioMergeProcessor(Processor):
        sample_width = frame.format.bytes
        # create audio file
        from time import monotonic_ns
        from uuid import uuid4
        uu = uuid4().hex
-        path = Path(f"audio_{monotonic_ns()}_{uu}.wav")
+        fd = io.BytesIO()
-        with wave.open(path.as_posix(), "wb") as wf:
+        with wave.open(fd, "wb") as wf:
            wf.setnchannels(channels)
            wf.setsampwidth(sample_width)
            wf.setframerate(sample_rate)
@@ -38,7 +37,8 @@ class AudioMergeProcessor(Processor):
        # emit audio file
        audiofile = AudioFile(
-            path=path,
+            name=f"{monotonic_ns()}-{uu}.wav",
            fd=fd,
            sample_rate=sample_rate,
            channels=channels,
            sample_width=sample_width,
--- a/server/reflector/processors/audio_transcript_modal.py
+++ b/server/reflector/processors/audio_transcript_modal.py
@@ -48,9 +48,9 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
    async def _transcript(self, data: AudioFile):
        async with httpx.AsyncClient() as client:
-            self.logger.debug(f"Try to transcribe audio {data.path.name}")
+            self.logger.debug(f"Try to transcribe audio {data.name}")
            files = {
-                "file": (data.path.name, data.path.open("rb")),
+                "file": (data.name, data.fd),
            }
            response = await retry(client.post)(
                self.transcript_url,
--- a/server/reflector/processors/types.py
+++ b/server/reflector/processors/types.py
@@ -1,16 +1,41 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, PrivateAttr
 from pathlib import Path
 import tempfile
 import io
 class AudioFile(BaseModel):
-    path: Path
+    name: str
    sample_rate: int
    channels: int
    sample_width: int
    timestamp: float = 0.0
    _fd: io.BytesIO = PrivateAttr(None)
    _path: Path = PrivateAttr(None)
    def __init__(self, fd, **kwargs):
        super().__init__(**kwargs)
        self._fd = fd
    @property
    def fd(self):
        self._fd.seek(0)
        return self._fd
    @property
    def path(self):
        if self._path is None:
            # write down to disk
            filename = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
            self._path = Path(filename)
            with self._path.open("wb") as f:
                f.write(self._fd.getbuffer())
        return self._path
    def release(self):
-        self.path.unlink()
+        if self._path:
            self._path.unlink()
 class Word(BaseModel):