Files
reflector/server/reflector/processors/audio_transcript_whisper.py
Juan Diego García a682846645 feat: 3-mode selfhosted refactoring (--gpu, --cpu, --hosted) + audio token auth fallback (#896)
* fix: local processing instead of http server for cpu

* add fallback token if service worker doesnt work

* chore: rename processors to keep processor pattern up to date and allow other processors to be createed and used with env vars
2026-03-04 16:31:08 -05:00

51 lines
1.5 KiB
Python

from faster_whisper import WhisperModel
from reflector.processors.audio_transcript import AudioTranscriptProcessor
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
from reflector.processors.types import AudioFile, Transcript, Word
from reflector.settings import settings
class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):
def __init__(self):
super().__init__()
self.model = WhisperModel(
settings.WHISPER_CHUNK_MODEL,
device="cpu",
compute_type="float32",
num_workers=12,
)
async def _transcript(self, data: AudioFile):
segments, _ = self.model.transcribe(
data.path.as_posix(),
language="en",
beam_size=5,
# condition_on_previous_text=True,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
if not segments:
return
transcript = Transcript(words=[])
segments = list(segments)
ts = data.timestamp
for segment in segments:
for word in segment.words:
transcript.words.append(
Word(
text=word.word,
start=round(ts + word.start, 3),
end=round(ts + word.end, 3),
)
)
return transcript
AudioTranscriptAutoProcessor.register("whisper", AudioTranscriptWhisperProcessor)