diff --git a/server/reflector/processors/audio_merge.py b/server/reflector/processors/audio_merge.py index ac16676d..37734a53 100644 --- a/server/reflector/processors/audio_merge.py +++ b/server/reflector/processors/audio_merge.py @@ -1,6 +1,8 @@ from reflector.processors.base import Processor from reflector.processors.types import AudioFile -from pathlib import Path +from time import monotonic_ns +from uuid import uuid4 +import io import wave import av @@ -24,12 +26,9 @@ class AudioMergeProcessor(Processor): sample_width = frame.format.bytes # create audio file - from time import monotonic_ns - from uuid import uuid4 - uu = uuid4().hex - path = Path(f"audio_{monotonic_ns()}_{uu}.wav") - with wave.open(path.as_posix(), "wb") as wf: + fd = io.BytesIO() + with wave.open(fd, "wb") as wf: wf.setnchannels(channels) wf.setsampwidth(sample_width) wf.setframerate(sample_rate) @@ -38,7 +37,8 @@ class AudioMergeProcessor(Processor): # emit audio file audiofile = AudioFile( - path=path, + name=f"{monotonic_ns()}-{uu}.wav", + fd=fd, sample_rate=sample_rate, channels=channels, sample_width=sample_width, diff --git a/server/reflector/processors/audio_transcript_modal.py b/server/reflector/processors/audio_transcript_modal.py index 4d1dac2d..1ed727d6 100644 --- a/server/reflector/processors/audio_transcript_modal.py +++ b/server/reflector/processors/audio_transcript_modal.py @@ -48,9 +48,9 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor): async def _transcript(self, data: AudioFile): async with httpx.AsyncClient() as client: - self.logger.debug(f"Try to transcribe audio {data.path.name}") + self.logger.debug(f"Try to transcribe audio {data.name}") files = { - "file": (data.path.name, data.path.open("rb")), + "file": (data.name, data.fd), } response = await retry(client.post)( self.transcript_url, diff --git a/server/reflector/processors/types.py b/server/reflector/processors/types.py index 6b193882..0c7c48d4 100644 --- a/server/reflector/processors/types.py +++ b/server/reflector/processors/types.py @@ -1,16 +1,41 @@ -from pydantic import BaseModel +from pydantic import BaseModel, PrivateAttr from pathlib import Path +import tempfile +import io class AudioFile(BaseModel): - path: Path + name: str sample_rate: int channels: int sample_width: int timestamp: float = 0.0 + _fd: io.BytesIO = PrivateAttr(None) + _path: Path = PrivateAttr(None) + + def __init__(self, fd, **kwargs): + super().__init__(**kwargs) + self._fd = fd + + @property + def fd(self): + self._fd.seek(0) + return self._fd + + @property + def path(self): + if self._path is None: + # write down to disk + filename = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name + self._path = Path(filename) + with self._path.open("wb") as f: + f.write(self._fd.getbuffer()) + return self._path + def release(self): - self.path.unlink() + if self._path: + self._path.unlink() class Word(BaseModel):