Files
reflector/server/reflector/processors/audio_transcript_whisper.py
Mathieu Virbel d94e2911c3 Serverless GPU support on banana.dev (#106)
* serverless: implement banana backend for both audio and LLM

Related to monadical-sas/reflector-gpu-banana project

* serverless: got llm working on banana !

* tests: fixes

* serverless: fix dockerfile to use fastapi server + httpx
2023-08-04 10:24:11 +02:00

47 lines
1.5 KiB
Python

from reflector.processors.audio_transcript import AudioTranscriptProcessor
from reflector.processors.audio_transcript_auto import AudioTranscriptAutoProcessor
from reflector.processors.types import AudioFile, Transcript, Word
from faster_whisper import WhisperModel
class AudioTranscriptWhisperProcessor(AudioTranscriptProcessor):
def __init__(self):
super().__init__()
self.model = WhisperModel(
"tiny", device="cpu", compute_type="float32", num_workers=12
)
async def _transcript(self, data: AudioFile):
segments, _ = self.model.transcribe(
data.path.as_posix(),
language="en",
beam_size=5,
# condition_on_previous_text=True,
word_timestamps=True,
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 500},
)
if not segments:
return
transcript = Transcript(words=[])
segments = list(segments)
ts = data.timestamp
for segment in segments:
transcript.text += segment.text
for word in segment.words:
transcript.words.append(
Word(
text=word.word,
start=round(ts + word.start, 3),
end=round(ts + word.end, 3),
)
)
return transcript
AudioTranscriptAutoProcessor.register("whisper", AudioTranscriptWhisperProcessor)