Files
reflector/server/reflector/processors/audio_merge.py
Mathieu Virbel 9265d201b5 fix: restore previous behavior on live pipeline + audio downscaler (#561)
This commit restore the original behavior with frame cutting. While
silero is used on our gpu for files, look like it's not working great on
the live pipeline. To be investigated, but at the moment, what we keep
is:

- refactored to extract the downscale for further processing in the
pipeline
- remove any downscale implementation from audio_chunker and audio_merge
- removed batching from audio_merge too for now
2025-08-22 10:49:26 -06:00

63 lines
1.6 KiB
Python

import io
from time import monotonic_ns
from uuid import uuid4
import av
from reflector.processors.base import Processor
from reflector.processors.types import AudioFile
class AudioMergeProcessor(Processor):
"""
Merge audio frame into a single file
"""
INPUT_TYPE = list[av.AudioFrame]
OUTPUT_TYPE = AudioFile
def __init__(self, **kwargs):
super().__init__(**kwargs)
async def _push(self, data: list[av.AudioFrame]):
if not data:
return
# get audio information from first frame
frame = data[0]
output_channels = len(frame.layout.channels)
output_sample_rate = frame.sample_rate
output_sample_width = frame.format.bytes
# create audio file
uu = uuid4().hex
fd = io.BytesIO()
# Use PyAV to write frames
out_container = av.open(fd, "w", format="wav")
out_stream = out_container.add_stream("pcm_s16le", rate=output_sample_rate)
out_stream.layout = frame.layout.name
for frame in data:
for packet in out_stream.encode(frame):
out_container.mux(packet)
# Flush the encoder
for packet in out_stream.encode(None):
out_container.mux(packet)
out_container.close()
fd.seek(0)
# emit audio file
audiofile = AudioFile(
name=f"{monotonic_ns()}-{uu}.wav",
fd=fd,
sample_rate=output_sample_rate,
channels=output_channels,
sample_width=output_sample_width,
timestamp=data[0].pts * data[0].time_base,
)
await self.emit(audiofile)