mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
This commit restore the original behavior with frame cutting. While silero is used on our gpu for files, look like it's not working great on the live pipeline. To be investigated, but at the moment, what we keep is: - refactored to extract the downscale for further processing in the pipeline - remove any downscale implementation from audio_chunker and audio_merge - removed batching from audio_merge too for now
63 lines
1.6 KiB
Python
63 lines
1.6 KiB
Python
import io
|
|
from time import monotonic_ns
|
|
from uuid import uuid4
|
|
|
|
import av
|
|
|
|
from reflector.processors.base import Processor
|
|
from reflector.processors.types import AudioFile
|
|
|
|
|
|
class AudioMergeProcessor(Processor):
|
|
"""
|
|
Merge audio frame into a single file
|
|
"""
|
|
|
|
INPUT_TYPE = list[av.AudioFrame]
|
|
OUTPUT_TYPE = AudioFile
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
async def _push(self, data: list[av.AudioFrame]):
|
|
if not data:
|
|
return
|
|
|
|
# get audio information from first frame
|
|
frame = data[0]
|
|
output_channels = len(frame.layout.channels)
|
|
output_sample_rate = frame.sample_rate
|
|
output_sample_width = frame.format.bytes
|
|
|
|
# create audio file
|
|
uu = uuid4().hex
|
|
fd = io.BytesIO()
|
|
|
|
# Use PyAV to write frames
|
|
out_container = av.open(fd, "w", format="wav")
|
|
out_stream = out_container.add_stream("pcm_s16le", rate=output_sample_rate)
|
|
out_stream.layout = frame.layout.name
|
|
|
|
for frame in data:
|
|
for packet in out_stream.encode(frame):
|
|
out_container.mux(packet)
|
|
|
|
# Flush the encoder
|
|
for packet in out_stream.encode(None):
|
|
out_container.mux(packet)
|
|
out_container.close()
|
|
|
|
fd.seek(0)
|
|
|
|
# emit audio file
|
|
audiofile = AudioFile(
|
|
name=f"{monotonic_ns()}-{uu}.wav",
|
|
fd=fd,
|
|
sample_rate=output_sample_rate,
|
|
channels=output_channels,
|
|
sample_width=output_sample_width,
|
|
timestamp=data[0].pts * data[0].time_base,
|
|
)
|
|
|
|
await self.emit(audiofile)
|