pull from main

This commit is contained in:
Gokul Mohanarangan
2023-08-17 09:38:35 +05:30
10 changed files with 158 additions and 670 deletions

View File

@@ -1,5 +1,6 @@
from .base import Processor, ThreadedProcessor, Pipeline # noqa: F401
from .types import AudioFile, Transcript, Word, TitleSummary, FinalSummary # noqa: F401
from .audio_file_writer import AudioFileWriterProcessor # noqa: F401
from .audio_chunker import AudioChunkerProcessor # noqa: F401
from .audio_merge import AudioMergeProcessor # noqa: F401
from .audio_transcript import AudioTranscriptProcessor # noqa: F401

View File

@@ -0,0 +1,39 @@
from reflector.processors.base import Processor
import av
from pathlib import Path
class AudioFileWriterProcessor(Processor):
"""
Write audio frames to a file.
"""
INPUT_TYPE = av.AudioFrame
OUTPUT_TYPE = av.AudioFrame
def __init__(self, path: Path | str):
super().__init__()
if isinstance(path, str):
path = Path(path)
self.path = path
self.out_container = None
self.out_stream = None
async def _push(self, data: av.AudioFrame):
if not self.out_container:
self.path.parent.mkdir(parents=True, exist_ok=True)
self.out_container = av.open(self.path.as_posix(), "w", format="wav")
self.out_stream = self.out_container.add_stream(
"pcm_s16le", rate=data.sample_rate
)
for packet in self.out_stream.encode(data):
self.out_container.mux(packet)
await self.emit(data)
async def _flush(self):
if self.out_container:
for packet in self.out_stream.encode(None):
self.out_container.mux(packet)
self.out_container.close()
self.out_container = None
self.out_stream = None

View File

@@ -3,7 +3,6 @@ from reflector.processors.types import AudioFile
from time import monotonic_ns
from uuid import uuid4
import io
import wave
import av
@@ -28,12 +27,16 @@ class AudioMergeProcessor(Processor):
# create audio file
uu = uuid4().hex
fd = io.BytesIO()
with wave.open(fd, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(sample_rate)
for frame in data:
wf.writeframes(frame.to_ndarray().tobytes())
out_container = av.open(fd, "w", format="wav")
out_stream = out_container.add_stream("pcm_s16le", rate=sample_rate)
for frame in data:
for packet in out_stream.encode(frame):
out_container.mux(packet)
for packet in out_stream.encode(None):
out_container.mux(packet)
out_container.close()
fd.seek(0)
# emit audio file
audiofile = AudioFile(
@@ -44,4 +47,5 @@ class AudioMergeProcessor(Processor):
sample_width=sample_width,
timestamp=data[0].pts * data[0].time_base,
)
await self.emit(audiofile)