server: prevent storing audio for transcription unless wanted

Closes #145
2026-02-04 18:06:48 +00:00 · 2023-08-15 12:29:14 +02:00
parent 98375d5c2c
commit a21a726eb1
3 changed files with 37 additions and 12 deletions
--- a/server/reflector/processors/audio_merge.py
+++ b/server/reflector/processors/audio_merge.py
@@ -1,6 +1,8 @@
 from reflector.processors.base import Processor
 from reflector.processors.types import AudioFile
-from pathlib import Path
+from time import monotonic_ns
+from uuid import uuid4
+import io
 import wave
 import av

@@ -24,12 +26,9 @@ class AudioMergeProcessor(Processor):
        sample_width = frame.format.bytes

        # create audio file
-        from time import monotonic_ns
-        from uuid import uuid4
-
        uu = uuid4().hex
-        path = Path(f"audio_{monotonic_ns()}_{uu}.wav")
-        with wave.open(path.as_posix(), "wb") as wf:
+        fd = io.BytesIO()
+        with wave.open(fd, "wb") as wf:
            wf.setnchannels(channels)
            wf.setsampwidth(sample_width)
            wf.setframerate(sample_rate)
@@ -38,7 +37,8 @@ class AudioMergeProcessor(Processor):

        # emit audio file
        audiofile = AudioFile(
-            path=path,
+            name=f"{monotonic_ns()}-{uu}.wav",
+            fd=fd,
            sample_rate=sample_rate,
            channels=channels,
            sample_width=sample_width,
--- a/server/reflector/processors/audio_transcript_modal.py
+++ b/server/reflector/processors/audio_transcript_modal.py
@@ -48,9 +48,9 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):

    async def _transcript(self, data: AudioFile):
        async with httpx.AsyncClient() as client:
-            self.logger.debug(f"Try to transcribe audio {data.path.name}")
+            self.logger.debug(f"Try to transcribe audio {data.name}")
            files = {
-                "file": (data.path.name, data.path.open("rb")),
+                "file": (data.name, data.fd),
            }
            response = await retry(client.post)(
                self.transcript_url,
--- a/server/reflector/processors/types.py
+++ b/server/reflector/processors/types.py
@@ -1,16 +1,41 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, PrivateAttr
 from pathlib import Path
+import tempfile
+import io


 class AudioFile(BaseModel):
-    path: Path
+    name: str
    sample_rate: int
    channels: int
    sample_width: int
    timestamp: float = 0.0

+    _fd: io.BytesIO = PrivateAttr(None)
+    _path: Path = PrivateAttr(None)
+
+    def __init__(self, fd, **kwargs):
+        super().__init__(**kwargs)
+        self._fd = fd
+
+    @property
+    def fd(self):
+        self._fd.seek(0)
+        return self._fd
+
+    @property
+    def path(self):
+        if self._path is None:
+            # write down to disk
+            filename = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+            self._path = Path(filename)
+            with self._path.open("wb") as f:
+                f.write(self._fd.getbuffer())
+        return self._path
+
    def release(self):
-        self.path.unlink()
+        if self._path:
+            self._path.unlink()


 class Word(BaseModel):