mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 12:49:06 +00:00
gpu self hosted setup guide (no-mistakes)
This commit is contained in:
@@ -90,6 +90,12 @@ image = (
|
||||
)
|
||||
|
||||
|
||||
# IMPORTANT: This function is duplicated in multiple files for deployment isolation.
|
||||
# If you modify the audio format detection logic, you MUST update all copies:
|
||||
# - gpu/self_hosted/app/utils.py
|
||||
# - gpu/modal_deployments/reflector_transcriber.py (2 copies)
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py (this file)
|
||||
# - gpu/modal_deployments/reflector_diarizer.py
|
||||
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
@@ -105,6 +111,8 @@ def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtens
|
||||
return AudioFileExtension("wav")
|
||||
if "audio/mp4" in content_type:
|
||||
return AudioFileExtension("mp4")
|
||||
if "audio/webm" in content_type or "video/webm" in content_type:
|
||||
return AudioFileExtension("webm")
|
||||
|
||||
raise ValueError(
|
||||
f"Unsupported audio format for URL: {url}. "
|
||||
@@ -301,6 +309,11 @@ class TranscriberParakeetFile:
|
||||
audio_array, sample_rate = librosa.load(file_path, sr=SAMPLERATE, mono=True)
|
||||
return audio_array
|
||||
|
||||
# IMPORTANT: This VAD segment logic is duplicated in multiple files for deployment isolation.
|
||||
# If you modify this function, you MUST update all copies:
|
||||
# - gpu/modal_deployments/reflector_transcriber.py
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py (this file)
|
||||
# - gpu/self_hosted/app/services/transcriber.py
|
||||
def vad_segment_generator(
|
||||
audio_array,
|
||||
) -> Generator[TimeSegment, None, None]:
|
||||
|
||||
Reference in New Issue
Block a user