mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-02-04 09:56:47 +00:00
docs: docs website + installation (#778)
* feat: WIP doc (vibe started and iterated) * install from scratch docs * caddyfile.example * gitignore * authentik script * authentik script * authentik script * llm doc * authentik ongoing * more daily setup logs * doc website * gpu self hosted setup guide (no-mistakes) * doc review round * doc review round * doc review round * update doc site sidebars * feat(docs): add mermaid diagram support * docs polishing * live pipeline doc * move pipeline dev docs to dev docs location * doc pr review iteration * dockerfile healthcheck * docs/pr-comments * remove jwt comment * llm suggestion * pr comments * pr comments * document auto migrations * cleanup docs --------- Co-authored-by: Mathieu Virbel <mat@meltingrocks.com> Co-authored-by: Igor Loskutov <igor.loskutoff@gmail.com>
This commit is contained in:
150
gpu/modal_deployments/deploy-all.sh
Executable file
150
gpu/modal_deployments/deploy-all.sh
Executable file
@@ -0,0 +1,150 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# --- Usage ---
|
||||
usage() {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --hf-token TOKEN HuggingFace token"
|
||||
echo " --help Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 # Interactive mode"
|
||||
echo " $0 --hf-token hf_xxxxx # Non-interactive mode"
|
||||
echo ""
|
||||
exit 0
|
||||
}
|
||||
|
||||
# --- Parse Arguments ---
|
||||
HF_TOKEN=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--hf-token)
|
||||
HF_TOKEN="$2"
|
||||
shift 2
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "=========================================="
|
||||
echo "Reflector GPU Functions Deployment"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# --- Check Dependencies ---
|
||||
if ! command -v modal &> /dev/null; then
|
||||
echo "Error: Modal CLI not installed."
|
||||
echo " Install with: pip install modal"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v openssl &> /dev/null; then
|
||||
echo "Error: openssl not found."
|
||||
echo " Mac: brew install openssl"
|
||||
echo " Ubuntu: sudo apt-get install openssl"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check Modal authentication
|
||||
if ! modal profile current &> /dev/null; then
|
||||
echo "Error: Not authenticated with Modal."
|
||||
echo " Run: modal setup"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- HuggingFace Token Setup ---
|
||||
if [ -z "$HF_TOKEN" ]; then
|
||||
echo "HuggingFace token required for Pyannote diarization model."
|
||||
echo "1. Create account at https://huggingface.co"
|
||||
echo "2. Accept license at https://huggingface.co/pyannote/speaker-diarization-3.1"
|
||||
echo "3. Generate token at https://huggingface.co/settings/tokens"
|
||||
echo ""
|
||||
read -p "Enter your HuggingFace token: " HF_TOKEN
|
||||
fi
|
||||
|
||||
if [ -z "$HF_TOKEN" ]; then
|
||||
echo "Error: HuggingFace token is required for diarization"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Basic token format validation
|
||||
if [[ ! "$HF_TOKEN" =~ ^hf_ ]]; then
|
||||
echo "Warning: HuggingFace tokens usually start with 'hf_'"
|
||||
if [ -t 0 ]; then
|
||||
read -p "Continue anyway? (y/n): " confirm
|
||||
if [ "$confirm" != "y" ]; then
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Non-interactive mode: proceeding anyway"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Auto-generate reflector<->GPU API Key ---
|
||||
echo ""
|
||||
echo "Generating API key for GPU services..."
|
||||
API_KEY=$(openssl rand -hex 32)
|
||||
|
||||
# --- Create Modal Secrets ---
|
||||
echo "Creating Modal secrets..."
|
||||
|
||||
# Create or update hf_token secret (delete first if exists)
|
||||
if modal secret list 2>/dev/null | grep -q "hf_token"; then
|
||||
echo " -> Recreating secret: hf_token"
|
||||
modal secret delete hf_token --yes 2>/dev/null || true
|
||||
fi
|
||||
echo " -> Creating secret: hf_token"
|
||||
modal secret create hf_token HF_TOKEN="$HF_TOKEN"
|
||||
|
||||
# Create or update reflector-gpu secret (delete first if exists)
|
||||
if modal secret list 2>/dev/null | grep -q "reflector-gpu"; then
|
||||
echo " -> Recreating secret: reflector-gpu"
|
||||
modal secret delete reflector-gpu --yes 2>/dev/null || true
|
||||
fi
|
||||
echo " -> Creating secret: reflector-gpu"
|
||||
modal secret create reflector-gpu REFLECTOR_GPU_APIKEY="$API_KEY"
|
||||
|
||||
# --- Deploy Functions ---
|
||||
echo ""
|
||||
echo "Deploying transcriber (Whisper)..."
|
||||
TRANSCRIBER_URL=$(modal deploy reflector_transcriber.py 2>&1 | grep -o 'https://[^ ]*web.modal.run' | head -1)
|
||||
if [ -z "$TRANSCRIBER_URL" ]; then
|
||||
echo "Error: Failed to deploy transcriber. Check Modal dashboard for details."
|
||||
exit 1
|
||||
fi
|
||||
echo " -> $TRANSCRIBER_URL"
|
||||
|
||||
echo ""
|
||||
echo "Deploying diarizer (Pyannote)..."
|
||||
DIARIZER_URL=$(modal deploy reflector_diarizer.py 2>&1 | grep -o 'https://[^ ]*web.modal.run' | head -1)
|
||||
if [ -z "$DIARIZER_URL" ]; then
|
||||
echo "Error: Failed to deploy diarizer. Check Modal dashboard for details."
|
||||
exit 1
|
||||
fi
|
||||
echo " -> $DIARIZER_URL"
|
||||
|
||||
# --- Output Configuration ---
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Deployment complete!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Copy these values to your server's server/.env file:"
|
||||
echo ""
|
||||
echo "# --- Modal GPU Configuration ---"
|
||||
echo "TRANSCRIPT_BACKEND=modal"
|
||||
echo "TRANSCRIPT_URL=$TRANSCRIBER_URL"
|
||||
echo "TRANSCRIPT_MODAL_API_KEY=$API_KEY"
|
||||
echo ""
|
||||
echo "DIARIZATION_BACKEND=modal"
|
||||
echo "DIARIZATION_URL=$DIARIZER_URL"
|
||||
echo "DIARIZATION_MODAL_API_KEY=$API_KEY"
|
||||
echo "# --- End Modal Configuration ---"
|
||||
@@ -24,6 +24,12 @@ app = modal.App(name="reflector-diarizer")
|
||||
upload_volume = modal.Volume.from_name("diarizer-uploads", create_if_missing=True)
|
||||
|
||||
|
||||
# IMPORTANT: This function is duplicated in multiple files for deployment isolation.
|
||||
# If you modify the audio format detection logic, you MUST update all copies:
|
||||
# - gpu/self_hosted/app/utils.py
|
||||
# - gpu/modal_deployments/reflector_transcriber.py (2 copies)
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py
|
||||
# - gpu/modal_deployments/reflector_diarizer.py (this file)
|
||||
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
@@ -39,6 +45,8 @@ def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtens
|
||||
return AudioFileExtension("wav")
|
||||
if "audio/mp4" in content_type:
|
||||
return AudioFileExtension("mp4")
|
||||
if "audio/webm" in content_type or "video/webm" in content_type:
|
||||
return AudioFileExtension("webm")
|
||||
|
||||
raise ValueError(
|
||||
f"Unsupported audio format for URL: {url}. "
|
||||
@@ -105,7 +113,7 @@ def download_pyannote_audio():
|
||||
|
||||
|
||||
diarizer_image = (
|
||||
modal.Image.debian_slim(python_version="3.10.8")
|
||||
modal.Image.debian_slim(python_version="3.10")
|
||||
.pip_install(
|
||||
"pyannote.audio==3.1.0",
|
||||
"requests",
|
||||
@@ -116,7 +124,7 @@ diarizer_image = (
|
||||
"transformers==4.34.0",
|
||||
"sentencepiece",
|
||||
"protobuf",
|
||||
"numpy",
|
||||
"numpy<2",
|
||||
"huggingface_hub",
|
||||
"hf-transfer",
|
||||
)
|
||||
|
||||
@@ -89,6 +89,7 @@ image = (
|
||||
"torch==2.5.1",
|
||||
"faster-whisper==1.1.1",
|
||||
"fastapi==0.115.12",
|
||||
"python-multipart",
|
||||
"requests",
|
||||
"librosa==0.10.1",
|
||||
"numpy<2",
|
||||
@@ -98,6 +99,12 @@ image = (
|
||||
)
|
||||
|
||||
|
||||
# IMPORTANT: This function is duplicated in multiple files for deployment isolation.
|
||||
# If you modify the audio format detection logic, you MUST update all copies:
|
||||
# - gpu/self_hosted/app/utils.py
|
||||
# - gpu/modal_deployments/reflector_transcriber.py (this file - 2 copies!)
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py
|
||||
# - gpu/modal_deployments/reflector_diarizer.py
|
||||
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
@@ -113,6 +120,8 @@ def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtens
|
||||
return AudioFileExtension("wav")
|
||||
if "audio/mp4" in content_type:
|
||||
return AudioFileExtension("mp4")
|
||||
if "audio/webm" in content_type or "video/webm" in content_type:
|
||||
return AudioFileExtension("webm")
|
||||
|
||||
raise ValueError(
|
||||
f"Unsupported audio format for URL: {url}. "
|
||||
@@ -315,6 +324,11 @@ class TranscriberWhisperFile:
|
||||
import numpy as np
|
||||
from silero_vad import VADIterator
|
||||
|
||||
# IMPORTANT: This VAD segment logic is duplicated in multiple files for deployment isolation.
|
||||
# If you modify this function, you MUST update all copies:
|
||||
# - gpu/modal_deployments/reflector_transcriber.py (this file)
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py
|
||||
# - gpu/self_hosted/app/services/transcriber.py
|
||||
def vad_segments(
|
||||
audio_array,
|
||||
sample_rate: int = SAMPLERATE,
|
||||
@@ -322,6 +336,7 @@ class TranscriberWhisperFile:
|
||||
) -> Generator[TimeSegment, None, None]:
|
||||
"""Generate speech segments as TimeSegment using Silero VAD."""
|
||||
iterator = VADIterator(self.vad_model, sampling_rate=sample_rate)
|
||||
audio_duration = len(audio_array) / float(SAMPLERATE)
|
||||
start = None
|
||||
for i in range(0, len(audio_array), window_size):
|
||||
chunk = audio_array[i : i + window_size]
|
||||
@@ -341,6 +356,9 @@ class TranscriberWhisperFile:
|
||||
start / float(SAMPLERATE), end / float(SAMPLERATE)
|
||||
)
|
||||
start = None
|
||||
# Handle case where audio ends while speech is still active
|
||||
if start is not None:
|
||||
yield TimeSegment(start / float(SAMPLERATE), audio_duration)
|
||||
iterator.reset_states()
|
||||
|
||||
upload_volume.reload()
|
||||
@@ -406,6 +424,12 @@ class TranscriberWhisperFile:
|
||||
return {"text": " ".join(all_text), "words": all_words}
|
||||
|
||||
|
||||
# IMPORTANT: This function is duplicated in multiple files for deployment isolation.
|
||||
# If you modify the audio format detection logic, you MUST update all copies:
|
||||
# - gpu/self_hosted/app/utils.py
|
||||
# - gpu/modal_deployments/reflector_transcriber.py (this file - 2 copies!)
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py
|
||||
# - gpu/modal_deployments/reflector_diarizer.py
|
||||
def detect_audio_format(url: str, headers: dict) -> str:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
@@ -423,6 +447,8 @@ def detect_audio_format(url: str, headers: dict) -> str:
|
||||
return "wav"
|
||||
if "audio/mp4" in content_type:
|
||||
return "mp4"
|
||||
if "audio/webm" in content_type or "video/webm" in content_type:
|
||||
return "webm"
|
||||
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
|
||||
@@ -90,6 +90,12 @@ image = (
|
||||
)
|
||||
|
||||
|
||||
# IMPORTANT: This function is duplicated in multiple files for deployment isolation.
|
||||
# If you modify the audio format detection logic, you MUST update all copies:
|
||||
# - gpu/self_hosted/app/utils.py
|
||||
# - gpu/modal_deployments/reflector_transcriber.py (2 copies)
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py (this file)
|
||||
# - gpu/modal_deployments/reflector_diarizer.py
|
||||
def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtension:
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
@@ -105,6 +111,8 @@ def detect_audio_format(url: str, headers: Mapping[str, str]) -> AudioFileExtens
|
||||
return AudioFileExtension("wav")
|
||||
if "audio/mp4" in content_type:
|
||||
return AudioFileExtension("mp4")
|
||||
if "audio/webm" in content_type or "video/webm" in content_type:
|
||||
return AudioFileExtension("webm")
|
||||
|
||||
raise ValueError(
|
||||
f"Unsupported audio format for URL: {url}. "
|
||||
@@ -301,6 +309,11 @@ class TranscriberParakeetFile:
|
||||
audio_array, sample_rate = librosa.load(file_path, sr=SAMPLERATE, mono=True)
|
||||
return audio_array
|
||||
|
||||
# IMPORTANT: This VAD segment logic is duplicated in multiple files for deployment isolation.
|
||||
# If you modify this function, you MUST update all copies:
|
||||
# - gpu/modal_deployments/reflector_transcriber.py
|
||||
# - gpu/modal_deployments/reflector_transcriber_parakeet.py (this file)
|
||||
# - gpu/self_hosted/app/services/transcriber.py
|
||||
def vad_segment_generator(
|
||||
audio_array,
|
||||
) -> Generator[TimeSegment, None, None]:
|
||||
|
||||
@@ -103,7 +103,7 @@ def configure_seamless_m4t():
|
||||
|
||||
|
||||
transcriber_image = (
|
||||
Image.debian_slim(python_version="3.10.8")
|
||||
Image.debian_slim(python_version="3.10")
|
||||
.apt_install("git")
|
||||
.apt_install("wget")
|
||||
.apt_install("libsndfile-dev")
|
||||
@@ -119,6 +119,7 @@ transcriber_image = (
|
||||
"fairseq2",
|
||||
"pyyaml",
|
||||
"hf-transfer~=0.1",
|
||||
"pydantic",
|
||||
)
|
||||
.run_function(install_seamless_communication)
|
||||
.run_function(download_seamlessm4t_model)
|
||||
|
||||
Reference in New Issue
Block a user