mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
server: prevent storing audio for transcription unless wanted
Closes #145
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
from reflector.processors.base import Processor
|
from reflector.processors.base import Processor
|
||||||
from reflector.processors.types import AudioFile
|
from reflector.processors.types import AudioFile
|
||||||
from pathlib import Path
|
from time import monotonic_ns
|
||||||
|
from uuid import uuid4
|
||||||
|
import io
|
||||||
import wave
|
import wave
|
||||||
import av
|
import av
|
||||||
|
|
||||||
@@ -24,12 +26,9 @@ class AudioMergeProcessor(Processor):
|
|||||||
sample_width = frame.format.bytes
|
sample_width = frame.format.bytes
|
||||||
|
|
||||||
# create audio file
|
# create audio file
|
||||||
from time import monotonic_ns
|
|
||||||
from uuid import uuid4
|
|
||||||
|
|
||||||
uu = uuid4().hex
|
uu = uuid4().hex
|
||||||
path = Path(f"audio_{monotonic_ns()}_{uu}.wav")
|
fd = io.BytesIO()
|
||||||
with wave.open(path.as_posix(), "wb") as wf:
|
with wave.open(fd, "wb") as wf:
|
||||||
wf.setnchannels(channels)
|
wf.setnchannels(channels)
|
||||||
wf.setsampwidth(sample_width)
|
wf.setsampwidth(sample_width)
|
||||||
wf.setframerate(sample_rate)
|
wf.setframerate(sample_rate)
|
||||||
@@ -38,7 +37,8 @@ class AudioMergeProcessor(Processor):
|
|||||||
|
|
||||||
# emit audio file
|
# emit audio file
|
||||||
audiofile = AudioFile(
|
audiofile = AudioFile(
|
||||||
path=path,
|
name=f"{monotonic_ns()}-{uu}.wav",
|
||||||
|
fd=fd,
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
channels=channels,
|
channels=channels,
|
||||||
sample_width=sample_width,
|
sample_width=sample_width,
|
||||||
|
|||||||
@@ -48,9 +48,9 @@ class AudioTranscriptModalProcessor(AudioTranscriptProcessor):
|
|||||||
|
|
||||||
async def _transcript(self, data: AudioFile):
|
async def _transcript(self, data: AudioFile):
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
self.logger.debug(f"Try to transcribe audio {data.path.name}")
|
self.logger.debug(f"Try to transcribe audio {data.name}")
|
||||||
files = {
|
files = {
|
||||||
"file": (data.path.name, data.path.open("rb")),
|
"file": (data.name, data.fd),
|
||||||
}
|
}
|
||||||
response = await retry(client.post)(
|
response = await retry(client.post)(
|
||||||
self.transcript_url,
|
self.transcript_url,
|
||||||
|
|||||||
@@ -1,16 +1,41 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, PrivateAttr
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
import io
|
||||||
|
|
||||||
|
|
||||||
class AudioFile(BaseModel):
|
class AudioFile(BaseModel):
|
||||||
path: Path
|
name: str
|
||||||
sample_rate: int
|
sample_rate: int
|
||||||
channels: int
|
channels: int
|
||||||
sample_width: int
|
sample_width: int
|
||||||
timestamp: float = 0.0
|
timestamp: float = 0.0
|
||||||
|
|
||||||
|
_fd: io.BytesIO = PrivateAttr(None)
|
||||||
|
_path: Path = PrivateAttr(None)
|
||||||
|
|
||||||
|
def __init__(self, fd, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self._fd = fd
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fd(self):
|
||||||
|
self._fd.seek(0)
|
||||||
|
return self._fd
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self):
|
||||||
|
if self._path is None:
|
||||||
|
# write down to disk
|
||||||
|
filename = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
|
||||||
|
self._path = Path(filename)
|
||||||
|
with self._path.open("wb") as f:
|
||||||
|
f.write(self._fd.getbuffer())
|
||||||
|
return self._path
|
||||||
|
|
||||||
def release(self):
|
def release(self):
|
||||||
self.path.unlink()
|
if self._path:
|
||||||
|
self._path.unlink()
|
||||||
|
|
||||||
|
|
||||||
class Word(BaseModel):
|
class Word(BaseModel):
|
||||||
|
|||||||
Reference in New Issue
Block a user