mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-31 11:26:47 +00:00
feat: mixdown modal services + processor pattern (#936)
* allow memory flags and per service config * feat: mixdown modal services + processor pattern
This commit is contained in:
committed by
GitHub
parent
12bf0c2d77
commit
d164e486cc
@@ -4,13 +4,21 @@
|
||||
# Single script to configure and launch everything on one server.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/setup-selfhosted.sh <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--custom-ca PATH] [--password PASSWORD] [--build]
|
||||
# ./scripts/setup-selfhosted.sh <--gpu|--cpu|--hosted> [options] [--transcript BACKEND] [--diarization BACKEND] [--translation BACKEND] [--padding BACKEND] [--mixdown BACKEND]
|
||||
# ./scripts/setup-selfhosted.sh (re-run with saved config from last run)
|
||||
#
|
||||
# ML processing modes (pick ONE — required):
|
||||
# ML processing modes (pick ONE — required on first run):
|
||||
# --gpu NVIDIA GPU container for transcription/diarization/translation
|
||||
# --cpu In-process CPU processing (no ML container, slower)
|
||||
# --hosted Remote GPU service URL (no ML container)
|
||||
#
|
||||
# Per-service backend overrides (optional — override individual services from the base mode):
|
||||
# --transcript BACKEND whisper | modal (default: whisper for --cpu, modal for --gpu/--hosted)
|
||||
# --diarization BACKEND pyannote | modal (default: pyannote for --cpu, modal for --gpu/--hosted)
|
||||
# --translation BACKEND marian | modal | passthrough (default: marian for --cpu, modal for --gpu/--hosted)
|
||||
# --padding BACKEND pyav | modal (default: pyav for --cpu, modal for --gpu/--hosted)
|
||||
# --mixdown BACKEND pyav | modal (default: pyav for --cpu, modal for --gpu/--hosted)
|
||||
#
|
||||
# Local LLM (optional — for summarization & topic detection):
|
||||
# --ollama-gpu Local Ollama with NVIDIA GPU acceleration
|
||||
# --ollama-cpu Local Ollama on CPU only
|
||||
@@ -38,12 +46,17 @@
|
||||
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --garage --caddy --domain reflector.example.com
|
||||
# ./scripts/setup-selfhosted.sh --cpu --ollama-cpu --garage --caddy
|
||||
# ./scripts/setup-selfhosted.sh --hosted --garage --caddy
|
||||
# ./scripts/setup-selfhosted.sh --cpu --padding modal --garage --caddy
|
||||
# ./scripts/setup-selfhosted.sh --gpu --translation passthrough --garage --caddy
|
||||
# ./scripts/setup-selfhosted.sh --cpu --diarization modal --translation modal --garage
|
||||
# ./scripts/setup-selfhosted.sh --gpu --ollama-gpu --llm-model mistral --garage --caddy
|
||||
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy --password mysecretpass
|
||||
# ./scripts/setup-selfhosted.sh --gpu --garage --caddy
|
||||
# ./scripts/setup-selfhosted.sh --cpu
|
||||
# ./scripts/setup-selfhosted.sh --gpu --caddy --domain reflector.local --custom-ca certs/
|
||||
# ./scripts/setup-selfhosted.sh --hosted --custom-ca /path/to/corporate-ca.crt
|
||||
# ./scripts/setup-selfhosted.sh # re-run with saved config
|
||||
#
|
||||
# Config memory: after a successful run, flags are saved to data/.selfhosted-last-args.
|
||||
# Re-running with no arguments replays the saved configuration automatically.
|
||||
#
|
||||
# The script auto-detects Daily.co (DAILY_API_KEY) and Whereby (WHEREBY_API_KEY)
|
||||
# from server/.env. If Daily.co is configured, Hatchet workflow services are
|
||||
@@ -59,6 +72,7 @@ ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
COMPOSE_FILE="$ROOT_DIR/docker-compose.selfhosted.yml"
|
||||
SERVER_ENV="$ROOT_DIR/server/.env"
|
||||
WWW_ENV="$ROOT_DIR/www/.env"
|
||||
LAST_ARGS_FILE="$ROOT_DIR/data/.selfhosted-last-args"
|
||||
|
||||
OLLAMA_MODEL="qwen2.5:14b"
|
||||
OS="$(uname -s)"
|
||||
@@ -178,6 +192,17 @@ compose_garage_cmd() {
|
||||
docker compose $files --profile garage "$@"
|
||||
}
|
||||
|
||||
# --- Config memory: replay last args if none provided ---
|
||||
if [[ $# -eq 0 ]] && [[ -f "$LAST_ARGS_FILE" ]]; then
|
||||
SAVED_ARGS="$(cat "$LAST_ARGS_FILE")"
|
||||
if [[ -n "$SAVED_ARGS" ]]; then
|
||||
info "No flags provided — replaying saved configuration:"
|
||||
info " $SAVED_ARGS"
|
||||
echo ""
|
||||
eval "set -- $SAVED_ARGS"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Parse arguments ---
|
||||
MODEL_MODE="" # gpu or cpu (required, mutually exclusive)
|
||||
OLLAMA_MODE="" # ollama-gpu or ollama-cpu (optional)
|
||||
@@ -189,6 +214,19 @@ ADMIN_PASSWORD="" # optional admin password for password auth
|
||||
CUSTOM_CA="" # --custom-ca: path to dir or CA cert file
|
||||
USE_CUSTOM_CA=false # derived flag: true when --custom-ca is provided
|
||||
EXTRA_CA_FILES=() # --extra-ca: additional CA certs to trust (can be repeated)
|
||||
OVERRIDE_TRANSCRIPT="" # per-service override: whisper | modal
|
||||
OVERRIDE_DIARIZATION="" # per-service override: pyannote | modal
|
||||
OVERRIDE_TRANSLATION="" # per-service override: marian | modal | passthrough
|
||||
OVERRIDE_PADDING="" # per-service override: pyav | modal
|
||||
OVERRIDE_MIXDOWN="" # per-service override: pyav | modal
|
||||
|
||||
# Validate per-service backend override values
|
||||
validate_backend() {
|
||||
local service="$1" value="$2"; shift 2; local valid=("$@")
|
||||
for v in "${valid[@]}"; do [[ "$value" == "$v" ]] && return 0; done
|
||||
err "--$service value '$value' is not valid. Choose one of: ${valid[*]}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
SKIP_NEXT=false
|
||||
ARGS=("$@")
|
||||
@@ -265,14 +303,65 @@ for i in "${!ARGS[@]}"; do
|
||||
EXTRA_CA_FILES+=("$extra_ca_file")
|
||||
USE_CUSTOM_CA=true
|
||||
SKIP_NEXT=true ;;
|
||||
--transcript)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--transcript requires a backend (whisper | modal)"
|
||||
exit 1
|
||||
fi
|
||||
validate_backend "transcript" "${ARGS[$next_i]}" whisper modal
|
||||
OVERRIDE_TRANSCRIPT="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--diarization)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--diarization requires a backend (pyannote | modal)"
|
||||
exit 1
|
||||
fi
|
||||
validate_backend "diarization" "${ARGS[$next_i]}" pyannote modal
|
||||
OVERRIDE_DIARIZATION="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--translation)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--translation requires a backend (marian | modal | passthrough)"
|
||||
exit 1
|
||||
fi
|
||||
validate_backend "translation" "${ARGS[$next_i]}" marian modal passthrough
|
||||
OVERRIDE_TRANSLATION="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--padding)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--padding requires a backend (pyav | modal)"
|
||||
exit 1
|
||||
fi
|
||||
validate_backend "padding" "${ARGS[$next_i]}" pyav modal
|
||||
OVERRIDE_PADDING="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
--mixdown)
|
||||
next_i=$((i + 1))
|
||||
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
|
||||
err "--mixdown requires a backend (pyav | modal)"
|
||||
exit 1
|
||||
fi
|
||||
validate_backend "mixdown" "${ARGS[$next_i]}" pyav modal
|
||||
OVERRIDE_MIXDOWN="${ARGS[$next_i]}"
|
||||
SKIP_NEXT=true ;;
|
||||
*)
|
||||
err "Unknown argument: $arg"
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--custom-ca PATH] [--password PASS] [--build]"
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [options] [--transcript BACKEND] [--diarization BACKEND] [--translation BACKEND] [--padding BACKEND] [--mixdown BACKEND]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Save CLI args for config memory (re-run without flags) ---
|
||||
if [[ $# -gt 0 ]]; then
|
||||
mkdir -p "$ROOT_DIR/data"
|
||||
printf '%q ' "$@" > "$LAST_ARGS_FILE"
|
||||
fi
|
||||
|
||||
# --- Resolve --custom-ca flag ---
|
||||
CA_CERT_PATH="" # resolved path to CA certificate
|
||||
TLS_CERT_PATH="" # resolved path to server cert (optional, for Caddy TLS)
|
||||
@@ -330,13 +419,20 @@ fi
|
||||
if [[ -z "$MODEL_MODE" ]]; then
|
||||
err "No model mode specified. You must choose --gpu, --cpu, or --hosted."
|
||||
err ""
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [--ollama-gpu|--ollama-cpu] [--llm-model MODEL] [--garage] [--caddy] [--domain DOMAIN] [--custom-ca PATH] [--password PASS] [--build]"
|
||||
err "Usage: $0 <--gpu|--cpu|--hosted> [options] [--transcript BACKEND] [--diarization BACKEND] [--translation BACKEND] [--padding BACKEND] [--mixdown BACKEND]"
|
||||
err ""
|
||||
err "ML processing modes (required):"
|
||||
err " --gpu NVIDIA GPU container for transcription/diarization/translation"
|
||||
err " --cpu In-process CPU processing (no ML container, slower)"
|
||||
err " --hosted Remote GPU service URL (no ML container)"
|
||||
err ""
|
||||
err "Per-service backend overrides (optional — override individual services):"
|
||||
err " --transcript BACKEND whisper | modal (default: whisper for --cpu, modal for --gpu/--hosted)"
|
||||
err " --diarization BACKEND pyannote | modal (default: pyannote for --cpu, modal for --gpu/--hosted)"
|
||||
err " --translation BACKEND marian | modal | passthrough (default: marian for --cpu, modal for --gpu/--hosted)"
|
||||
err " --padding BACKEND pyav | modal (default: pyav for --cpu, modal for --gpu/--hosted)"
|
||||
err " --mixdown BACKEND pyav | modal (default: pyav for --cpu, modal for --gpu/--hosted)"
|
||||
err ""
|
||||
err "Local LLM (optional):"
|
||||
err " --ollama-gpu Local Ollama with GPU (for summarization/topics)"
|
||||
err " --ollama-cpu Local Ollama on CPU (for summarization/topics)"
|
||||
@@ -351,6 +447,8 @@ if [[ -z "$MODEL_MODE" ]]; then
|
||||
err " --extra-ca FILE Additional CA cert to trust (repeatable for multiple CAs)"
|
||||
err " --password PASS Enable password auth (admin@localhost) instead of public mode"
|
||||
err " --build Build backend/frontend images from source instead of pulling"
|
||||
err ""
|
||||
err "Tip: After your first run, re-run with no flags to reuse the same configuration."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -374,9 +472,38 @@ OLLAMA_SVC=""
|
||||
[[ "$OLLAMA_MODE" == "ollama-gpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama"
|
||||
[[ "$OLLAMA_MODE" == "ollama-cpu" ]] && USES_OLLAMA=true && OLLAMA_SVC="ollama-cpu"
|
||||
|
||||
# Resolve effective backend per service (override wins over base mode default)
|
||||
case "$MODEL_MODE" in
|
||||
gpu|hosted)
|
||||
EFF_TRANSCRIPT="${OVERRIDE_TRANSCRIPT:-modal}"
|
||||
EFF_DIARIZATION="${OVERRIDE_DIARIZATION:-modal}"
|
||||
EFF_TRANSLATION="${OVERRIDE_TRANSLATION:-modal}"
|
||||
EFF_PADDING="${OVERRIDE_PADDING:-modal}"
|
||||
EFF_MIXDOWN="${OVERRIDE_MIXDOWN:-modal}"
|
||||
;;
|
||||
cpu)
|
||||
EFF_TRANSCRIPT="${OVERRIDE_TRANSCRIPT:-whisper}"
|
||||
EFF_DIARIZATION="${OVERRIDE_DIARIZATION:-pyannote}"
|
||||
EFF_TRANSLATION="${OVERRIDE_TRANSLATION:-marian}"
|
||||
EFF_PADDING="${OVERRIDE_PADDING:-pyav}"
|
||||
EFF_MIXDOWN="${OVERRIDE_MIXDOWN:-pyav}"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Check if any per-service overrides were provided
|
||||
HAS_OVERRIDES=false
|
||||
[[ -n "$OVERRIDE_TRANSCRIPT" ]] && HAS_OVERRIDES=true
|
||||
[[ -n "$OVERRIDE_DIARIZATION" ]] && HAS_OVERRIDES=true
|
||||
[[ -n "$OVERRIDE_TRANSLATION" ]] && HAS_OVERRIDES=true
|
||||
[[ -n "$OVERRIDE_PADDING" ]] && HAS_OVERRIDES=true
|
||||
[[ -n "$OVERRIDE_MIXDOWN" ]] && HAS_OVERRIDES=true
|
||||
|
||||
# Human-readable mode string for display
|
||||
MODE_DISPLAY="$MODEL_MODE"
|
||||
[[ -n "$OLLAMA_MODE" ]] && MODE_DISPLAY="$MODEL_MODE + $OLLAMA_MODE"
|
||||
if [[ "$HAS_OVERRIDES" == "true" ]]; then
|
||||
MODE_DISPLAY="$MODE_DISPLAY (overrides: transcript=$EFF_TRANSCRIPT, diarization=$EFF_DIARIZATION, translation=$EFF_TRANSLATION, padding=$EFF_PADDING, mixdown=$EFF_MIXDOWN)"
|
||||
fi
|
||||
|
||||
# =========================================================
|
||||
# Step 0: Prerequisites
|
||||
@@ -623,54 +750,30 @@ step_server_env() {
|
||||
env_set "$SERVER_ENV" "WEBRTC_HOST" "$PRIMARY_IP"
|
||||
fi
|
||||
|
||||
# Specialized models — backend configuration per mode
|
||||
# Specialized models — backend configuration per service
|
||||
env_set "$SERVER_ENV" "DIARIZATION_ENABLED" "true"
|
||||
|
||||
# Resolve the URL for modal backends
|
||||
local modal_url=""
|
||||
case "$MODEL_MODE" in
|
||||
gpu)
|
||||
# GPU container aliased as "transcription" on docker network
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_URL" "http://transcription:8000"
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "selfhosted"
|
||||
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "DIARIZATION_URL" "http://transcription:8000"
|
||||
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "TRANSLATE_URL" "http://transcription:8000"
|
||||
env_set "$SERVER_ENV" "PADDING_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "PADDING_URL" "http://transcription:8000"
|
||||
ok "ML backends: GPU container (modal)"
|
||||
;;
|
||||
cpu)
|
||||
# In-process backends — no ML service container needed
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "whisper"
|
||||
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "pyannote"
|
||||
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "marian"
|
||||
env_set "$SERVER_ENV" "PADDING_BACKEND" "pyav"
|
||||
ok "ML backends: in-process CPU (whisper/pyannote/marian/pyav)"
|
||||
modal_url="http://transcription:8000"
|
||||
;;
|
||||
hosted)
|
||||
# Remote GPU service — user provides URL
|
||||
local gpu_url=""
|
||||
if env_has_key "$SERVER_ENV" "TRANSCRIPT_URL"; then
|
||||
gpu_url=$(env_get "$SERVER_ENV" "TRANSCRIPT_URL")
|
||||
modal_url=$(env_get "$SERVER_ENV" "TRANSCRIPT_URL")
|
||||
fi
|
||||
if [[ -z "$gpu_url" ]] && [[ -t 0 ]]; then
|
||||
if [[ -z "$modal_url" ]] && [[ -t 0 ]]; then
|
||||
echo ""
|
||||
info "Enter the URL of your remote GPU service (e.g. https://gpu.example.com)"
|
||||
read -rp " GPU service URL: " gpu_url
|
||||
read -rp " GPU service URL: " modal_url
|
||||
fi
|
||||
if [[ -z "$gpu_url" ]]; then
|
||||
if [[ -z "$modal_url" ]]; then
|
||||
err "GPU service URL required for --hosted mode."
|
||||
err "Set TRANSCRIPT_URL in server/.env or provide it interactively."
|
||||
exit 1
|
||||
fi
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_URL" "$gpu_url"
|
||||
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "DIARIZATION_URL" "$gpu_url"
|
||||
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "TRANSLATE_URL" "$gpu_url"
|
||||
env_set "$SERVER_ENV" "PADDING_BACKEND" "modal"
|
||||
env_set "$SERVER_ENV" "PADDING_URL" "$gpu_url"
|
||||
# API key for remote service
|
||||
local gpu_api_key=""
|
||||
if env_has_key "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY"; then
|
||||
@@ -682,15 +785,106 @@ step_server_env() {
|
||||
if [[ -n "$gpu_api_key" ]]; then
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "$gpu_api_key"
|
||||
fi
|
||||
ok "ML backends: remote hosted ($gpu_url)"
|
||||
;;
|
||||
cpu)
|
||||
# CPU mode: modal_url stays empty. If services are overridden to modal,
|
||||
# the user must configure the URL (TRANSCRIPT_URL etc.) in server/.env manually.
|
||||
# We intentionally do NOT read from existing env here to avoid overwriting
|
||||
# per-service URLs with a stale TRANSCRIPT_URL from a previous --gpu run.
|
||||
;;
|
||||
esac
|
||||
|
||||
# Set each service backend independently using effective backends
|
||||
# Transcript
|
||||
case "$EFF_TRANSCRIPT" in
|
||||
modal)
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "modal"
|
||||
if [[ -n "$modal_url" ]]; then
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_URL" "$modal_url"
|
||||
fi
|
||||
[[ "$MODEL_MODE" == "gpu" ]] && env_set "$SERVER_ENV" "TRANSCRIPT_MODAL_API_KEY" "selfhosted"
|
||||
;;
|
||||
whisper)
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_BACKEND" "whisper"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Diarization
|
||||
case "$EFF_DIARIZATION" in
|
||||
modal)
|
||||
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "modal"
|
||||
if [[ -n "$modal_url" ]]; then
|
||||
env_set "$SERVER_ENV" "DIARIZATION_URL" "$modal_url"
|
||||
fi
|
||||
;;
|
||||
pyannote)
|
||||
env_set "$SERVER_ENV" "DIARIZATION_BACKEND" "pyannote"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Translation
|
||||
case "$EFF_TRANSLATION" in
|
||||
modal)
|
||||
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "modal"
|
||||
if [[ -n "$modal_url" ]]; then
|
||||
env_set "$SERVER_ENV" "TRANSLATE_URL" "$modal_url"
|
||||
fi
|
||||
;;
|
||||
marian)
|
||||
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "marian"
|
||||
;;
|
||||
passthrough)
|
||||
env_set "$SERVER_ENV" "TRANSLATION_BACKEND" "passthrough"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Padding
|
||||
case "$EFF_PADDING" in
|
||||
modal)
|
||||
env_set "$SERVER_ENV" "PADDING_BACKEND" "modal"
|
||||
if [[ -n "$modal_url" ]]; then
|
||||
env_set "$SERVER_ENV" "PADDING_URL" "$modal_url"
|
||||
fi
|
||||
;;
|
||||
pyav)
|
||||
env_set "$SERVER_ENV" "PADDING_BACKEND" "pyav"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Mixdown
|
||||
case "$EFF_MIXDOWN" in
|
||||
modal)
|
||||
env_set "$SERVER_ENV" "MIXDOWN_BACKEND" "modal"
|
||||
if [[ -n "$modal_url" ]]; then
|
||||
env_set "$SERVER_ENV" "MIXDOWN_URL" "$modal_url"
|
||||
fi
|
||||
;;
|
||||
pyav)
|
||||
env_set "$SERVER_ENV" "MIXDOWN_BACKEND" "pyav"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Warn about modal overrides in CPU mode that need URL configuration
|
||||
if [[ "$MODEL_MODE" == "cpu" ]] && [[ -z "$modal_url" ]]; then
|
||||
local needs_url=false
|
||||
[[ "$EFF_TRANSCRIPT" == "modal" ]] && needs_url=true
|
||||
[[ "$EFF_DIARIZATION" == "modal" ]] && needs_url=true
|
||||
[[ "$EFF_TRANSLATION" == "modal" ]] && needs_url=true
|
||||
[[ "$EFF_PADDING" == "modal" ]] && needs_url=true
|
||||
[[ "$EFF_MIXDOWN" == "modal" ]] && needs_url=true
|
||||
if [[ "$needs_url" == "true" ]]; then
|
||||
warn "One or more services are set to 'modal' but no service URL is configured."
|
||||
warn "Set TRANSCRIPT_URL (and optionally TRANSCRIPT_MODAL_API_KEY) in server/.env"
|
||||
warn "to point to your GPU service, then re-run this script."
|
||||
fi
|
||||
fi
|
||||
|
||||
ok "ML backends: transcript=$EFF_TRANSCRIPT, diarization=$EFF_DIARIZATION, translation=$EFF_TRANSLATION, padding=$EFF_PADDING, mixdown=$EFF_MIXDOWN"
|
||||
|
||||
# HuggingFace token for gated models (pyannote diarization)
|
||||
# --gpu: written to root .env (docker compose passes to GPU container)
|
||||
# --cpu: written to both root .env and server/.env (in-process pyannote needs it)
|
||||
# --hosted: not needed (remote service handles its own auth)
|
||||
if [[ "$MODEL_MODE" != "hosted" ]]; then
|
||||
# Needed when: GPU container is running (MODEL_MODE=gpu), or diarization uses pyannote in-process
|
||||
# Not needed when: all modal services point to a remote hosted URL with its own auth
|
||||
if [[ "$MODEL_MODE" == "gpu" ]] || [[ "$EFF_DIARIZATION" == "pyannote" ]]; then
|
||||
local root_env="$ROOT_DIR/.env"
|
||||
local current_hf_token="${HF_TOKEN:-}"
|
||||
if [[ -f "$root_env" ]] && env_has_key "$root_env" "HF_TOKEN"; then
|
||||
@@ -709,8 +903,8 @@ step_server_env() {
|
||||
touch "$root_env"
|
||||
env_set "$root_env" "HF_TOKEN" "$current_hf_token"
|
||||
export HF_TOKEN="$current_hf_token"
|
||||
# In CPU mode, server process needs HF_TOKEN directly
|
||||
if [[ "$MODEL_MODE" == "cpu" ]]; then
|
||||
# When diarization runs in-process (pyannote), server process needs HF_TOKEN directly
|
||||
if [[ "$EFF_DIARIZATION" == "pyannote" ]]; then
|
||||
env_set "$SERVER_ENV" "HF_TOKEN" "$current_hf_token"
|
||||
fi
|
||||
ok "HF_TOKEN configured"
|
||||
@@ -743,11 +937,15 @@ step_server_env() {
|
||||
fi
|
||||
fi
|
||||
|
||||
# CPU mode: increase file processing timeouts (default 600s is too short for long audio on CPU)
|
||||
if [[ "$MODEL_MODE" == "cpu" ]]; then
|
||||
# Increase file processing timeouts for CPU backends (default 600s is too short for long audio on CPU)
|
||||
if [[ "$EFF_TRANSCRIPT" == "whisper" ]]; then
|
||||
env_set "$SERVER_ENV" "TRANSCRIPT_FILE_TIMEOUT" "3600"
|
||||
fi
|
||||
if [[ "$EFF_DIARIZATION" == "pyannote" ]]; then
|
||||
env_set "$SERVER_ENV" "DIARIZATION_FILE_TIMEOUT" "3600"
|
||||
ok "CPU mode — file processing timeouts set to 3600s (1 hour)"
|
||||
fi
|
||||
if [[ "$EFF_TRANSCRIPT" == "whisper" ]] || [[ "$EFF_DIARIZATION" == "pyannote" ]]; then
|
||||
ok "CPU backend(s) detected — file processing timeouts set to 3600s (1 hour)"
|
||||
fi
|
||||
|
||||
# Hatchet is always required (file, live, and multitrack pipelines all use it)
|
||||
@@ -1175,9 +1373,9 @@ step_health() {
|
||||
warn "Check with: docker compose -f docker-compose.selfhosted.yml logs gpu"
|
||||
fi
|
||||
elif [[ "$MODEL_MODE" == "cpu" ]]; then
|
||||
ok "CPU mode — ML processing runs in-process on server/worker (no separate service)"
|
||||
ok "CPU mode — in-process backends run on server/worker (transcript=$EFF_TRANSCRIPT, diarization=$EFF_DIARIZATION, translation=$EFF_TRANSLATION, padding=$EFF_PADDING, mixdown=$EFF_MIXDOWN)"
|
||||
elif [[ "$MODEL_MODE" == "hosted" ]]; then
|
||||
ok "Hosted mode — ML processing via remote GPU service (no local health check)"
|
||||
ok "Hosted mode — ML processing via remote GPU service (transcript=$EFF_TRANSCRIPT, diarization=$EFF_DIARIZATION, translation=$EFF_TRANSLATION, padding=$EFF_PADDING, mixdown=$EFF_MIXDOWN)"
|
||||
fi
|
||||
|
||||
# Ollama (if applicable)
|
||||
@@ -1375,6 +1573,10 @@ main() {
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo " Models: $MODEL_MODE"
|
||||
if [[ "$HAS_OVERRIDES" == "true" ]]; then
|
||||
echo " transcript=$EFF_TRANSCRIPT, diarization=$EFF_DIARIZATION"
|
||||
echo " translation=$EFF_TRANSLATION, padding=$EFF_PADDING, mixdown=$EFF_MIXDOWN"
|
||||
fi
|
||||
echo " LLM: ${OLLAMA_MODE:-external}"
|
||||
echo " Garage: $USE_GARAGE"
|
||||
echo " Caddy: $USE_CADDY"
|
||||
@@ -1487,7 +1689,13 @@ EOF
|
||||
echo " API: server:1250 (or localhost:1250 from host)"
|
||||
fi
|
||||
echo ""
|
||||
echo " Models: $MODEL_MODE (transcription/diarization/translation)"
|
||||
if [[ "$HAS_OVERRIDES" == "true" ]]; then
|
||||
echo " Models: $MODEL_MODE base + overrides"
|
||||
echo " transcript=$EFF_TRANSCRIPT, diarization=$EFF_DIARIZATION"
|
||||
echo " translation=$EFF_TRANSLATION, padding=$EFF_PADDING, mixdown=$EFF_MIXDOWN"
|
||||
else
|
||||
echo " Models: $MODEL_MODE (transcription/diarization/translation/padding)"
|
||||
fi
|
||||
[[ "$USE_GARAGE" == "true" ]] && echo " Storage: Garage (local S3)"
|
||||
[[ "$USE_GARAGE" != "true" ]] && echo " Storage: External S3"
|
||||
[[ "$USES_OLLAMA" == "true" ]] && echo " LLM: Ollama ($OLLAMA_MODEL) for summarization/topics"
|
||||
@@ -1507,7 +1715,8 @@ EOF
|
||||
echo ""
|
||||
fi
|
||||
echo " To stop: docker compose -f docker-compose.selfhosted.yml down"
|
||||
echo " To re-run: ./scripts/setup-selfhosted.sh $*"
|
||||
echo " To re-run: ./scripts/setup-selfhosted.sh (replays saved config)"
|
||||
echo " Last args: $*"
|
||||
echo ""
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user