mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 12:19:06 +00:00
fix: parakeet vad not getting the end timestamp (#728)
This commit is contained in:
@@ -81,9 +81,9 @@ image = (
|
|||||||
"cuda-python==12.8.0",
|
"cuda-python==12.8.0",
|
||||||
"fastapi==0.115.12",
|
"fastapi==0.115.12",
|
||||||
"numpy<2",
|
"numpy<2",
|
||||||
"librosa==0.10.1",
|
"librosa==0.11.0",
|
||||||
"requests",
|
"requests",
|
||||||
"silero-vad==5.1.0",
|
"silero-vad==6.2.0",
|
||||||
"torch",
|
"torch",
|
||||||
)
|
)
|
||||||
.entrypoint([]) # silence chatty logs by container on start
|
.entrypoint([]) # silence chatty logs by container on start
|
||||||
@@ -306,6 +306,7 @@ class TranscriberParakeetFile:
|
|||||||
) -> Generator[TimeSegment, None, None]:
|
) -> Generator[TimeSegment, None, None]:
|
||||||
"""Generate speech segments using VAD with start/end sample indices"""
|
"""Generate speech segments using VAD with start/end sample indices"""
|
||||||
vad_iterator = VADIterator(self.vad_model, sampling_rate=SAMPLERATE)
|
vad_iterator = VADIterator(self.vad_model, sampling_rate=SAMPLERATE)
|
||||||
|
audio_duration = len(audio_array) / float(SAMPLERATE)
|
||||||
window_size = VAD_CONFIG["window_size"]
|
window_size = VAD_CONFIG["window_size"]
|
||||||
start = None
|
start = None
|
||||||
|
|
||||||
@@ -332,6 +333,10 @@ class TranscriberParakeetFile:
|
|||||||
yield TimeSegment(start_time, end_time)
|
yield TimeSegment(start_time, end_time)
|
||||||
start = None
|
start = None
|
||||||
|
|
||||||
|
if start is not None:
|
||||||
|
start_time = start / float(SAMPLERATE)
|
||||||
|
yield TimeSegment(start_time, audio_duration)
|
||||||
|
|
||||||
vad_iterator.reset_states()
|
vad_iterator.reset_states()
|
||||||
|
|
||||||
def batch_speech_segments(
|
def batch_speech_segments(
|
||||||
|
|||||||
Reference in New Issue
Block a user