Files
reflector/scripts/setup-gpu-host.sh
2026-03-26 15:44:36 -05:00

497 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Standalone GPU service setup for Reflector.
# Deploys ONLY the GPU transcription/diarization/translation service on a dedicated machine.
# The main Reflector instance connects to this machine over HTTPS.
#
# Usage:
# ./scripts/setup-gpu-host.sh [--domain DOMAIN] [--custom-ca PATH] [--extra-ca FILE] [--api-key KEY] [--cpu] [--build]
#
# Options:
# --domain DOMAIN Domain name for this GPU host (e.g., gpu.example.com)
# With --custom-ca: uses custom TLS cert. Without: uses Let's Encrypt.
# --custom-ca PATH Custom CA certificate (dir with ca.crt + server.pem + server-key.pem, or single PEM file)
# --extra-ca FILE Additional CA cert to trust (repeatable)
# --api-key KEY API key to protect the GPU service (recommended for internet-facing deployments)
# --cpu Use CPU-only Dockerfile (no NVIDIA GPU required)
# --build Build image from source (default: build, since no pre-built GPU image is published)
# --port PORT Host port to expose (default: 443 with Caddy, 8000 without)
#
# Examples:
# # GPU on LAN with custom CA
# ./scripts/generate-certs.sh gpu.local
# ./scripts/setup-gpu-host.sh --domain gpu.local --custom-ca certs/ --api-key my-secret-key
#
# # GPU on public internet with Let's Encrypt
# ./scripts/setup-gpu-host.sh --domain gpu.example.com --api-key my-secret-key
#
# # GPU on LAN, IP access only (self-signed cert)
# ./scripts/setup-gpu-host.sh --api-key my-secret-key
#
# # CPU-only mode (no NVIDIA GPU)
# ./scripts/setup-gpu-host.sh --cpu --api-key my-secret-key
#
# After setup, configure the main Reflector instance to use this GPU:
# In server/.env on the Reflector machine:
# TRANSCRIPT_BACKEND=modal
# TRANSCRIPT_URL=https://gpu.example.com
# TRANSCRIPT_MODAL_API_KEY=my-secret-key
# DIARIZATION_BACKEND=modal
# DIARIZATION_URL=https://gpu.example.com
# DIARIZATION_MODAL_API_KEY=my-secret-key
# TRANSLATION_BACKEND=modal
# TRANSLATE_URL=https://gpu.example.com
# TRANSLATION_MODAL_API_KEY=my-secret-key
#
# DNS Resolution:
# - Public domain: Create a DNS A record pointing to this machine's public IP.
# - Internal domain (e.g., gpu.local): Add to /etc/hosts on both machines:
# <GPU_MACHINE_IP> gpu.local
# - IP-only: Use the machine's IP directly in TRANSCRIPT_URL/DIARIZATION_URL.
# The Reflector backend must trust the CA or accept self-signed certs.
#
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
GPU_DIR="$ROOT_DIR/gpu/self_hosted"
OS="$(uname -s)"
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
info() { echo -e "${CYAN}==>${NC} $*"; }
ok() { echo -e "${GREEN}${NC} $*"; }
warn() { echo -e "${YELLOW} !${NC} $*"; }
err() { echo -e "${RED}${NC} $*" >&2; }
# --- Parse arguments ---
CUSTOM_DOMAIN=""
CUSTOM_CA=""
EXTRA_CA_FILES=()
API_KEY=""
USE_CPU=false
HOST_PORT=""
SKIP_NEXT=false
ARGS=("$@")
for i in "${!ARGS[@]}"; do
if [[ "$SKIP_NEXT" == "true" ]]; then
SKIP_NEXT=false
continue
fi
arg="${ARGS[$i]}"
case "$arg" in
--domain)
next_i=$((i + 1))
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
err "--domain requires a domain name"
exit 1
fi
CUSTOM_DOMAIN="${ARGS[$next_i]}"
SKIP_NEXT=true ;;
--custom-ca)
next_i=$((i + 1))
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
err "--custom-ca requires a path to a directory or PEM certificate file"
exit 1
fi
CUSTOM_CA="${ARGS[$next_i]}"
SKIP_NEXT=true ;;
--extra-ca)
next_i=$((i + 1))
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
err "--extra-ca requires a path to a PEM certificate file"
exit 1
fi
if [[ ! -f "${ARGS[$next_i]}" ]]; then
err "--extra-ca file not found: ${ARGS[$next_i]}"
exit 1
fi
EXTRA_CA_FILES+=("${ARGS[$next_i]}")
SKIP_NEXT=true ;;
--api-key)
next_i=$((i + 1))
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
err "--api-key requires a key value"
exit 1
fi
API_KEY="${ARGS[$next_i]}"
SKIP_NEXT=true ;;
--cpu)
USE_CPU=true ;;
--port)
next_i=$((i + 1))
if [[ $next_i -ge ${#ARGS[@]} ]] || [[ "${ARGS[$next_i]}" == --* ]]; then
err "--port requires a port number"
exit 1
fi
HOST_PORT="${ARGS[$next_i]}"
SKIP_NEXT=true ;;
--build)
;; # Always build from source for GPU, flag accepted for compatibility
*)
err "Unknown argument: $arg"
err "Usage: $0 [--domain DOMAIN] [--custom-ca PATH] [--extra-ca FILE] [--api-key KEY] [--cpu] [--port PORT]"
exit 1
;;
esac
done
# --- Resolve CA paths ---
CA_CERT_PATH=""
TLS_CERT_PATH=""
TLS_KEY_PATH=""
USE_CUSTOM_CA=false
USE_CADDY=false
if [[ -n "$CUSTOM_CA" ]] || [[ -n "${EXTRA_CA_FILES[0]+x}" ]]; then
USE_CUSTOM_CA=true
fi
if [[ -n "$CUSTOM_CA" ]]; then
CUSTOM_CA="${CUSTOM_CA%/}"
if [[ -d "$CUSTOM_CA" ]]; then
[[ -f "$CUSTOM_CA/ca.crt" ]] || { err "$CUSTOM_CA/ca.crt not found"; exit 1; }
CA_CERT_PATH="$CUSTOM_CA/ca.crt"
if [[ -f "$CUSTOM_CA/server.pem" ]] && [[ -f "$CUSTOM_CA/server-key.pem" ]]; then
TLS_CERT_PATH="$CUSTOM_CA/server.pem"
TLS_KEY_PATH="$CUSTOM_CA/server-key.pem"
elif [[ -f "$CUSTOM_CA/server.pem" ]] || [[ -f "$CUSTOM_CA/server-key.pem" ]]; then
warn "Found only one of server.pem/server-key.pem — both needed for TLS. Skipping."
fi
elif [[ -f "$CUSTOM_CA" ]]; then
CA_CERT_PATH="$CUSTOM_CA"
else
err "--custom-ca path not found: $CUSTOM_CA"
exit 1
fi
elif [[ -n "${EXTRA_CA_FILES[0]+x}" ]]; then
CA_CERT_PATH="${EXTRA_CA_FILES[0]}"
unset 'EXTRA_CA_FILES[0]'
EXTRA_CA_FILES=("${EXTRA_CA_FILES[@]+"${EXTRA_CA_FILES[@]}"}")
fi
# Caddy if we have a domain or TLS certs
if [[ -n "$CUSTOM_DOMAIN" ]] || [[ -n "$TLS_CERT_PATH" ]]; then
USE_CADDY=true
fi
# Default port
if [[ -z "$HOST_PORT" ]]; then
if [[ "$USE_CADDY" == "true" ]]; then
HOST_PORT="443"
else
HOST_PORT="8000"
fi
fi
# Detect primary IP
PRIMARY_IP=""
if [[ "$OS" == "Linux" ]]; then
PRIMARY_IP=$(hostname -I 2>/dev/null | awk '{print $1}' || true)
if [[ "$PRIMARY_IP" == "127."* ]] || [[ -z "$PRIMARY_IP" ]]; then
PRIMARY_IP=$(ip -4 route get 1 2>/dev/null | sed -n 's/.*src \([0-9.]*\).*/\1/p' || true)
fi
fi
# --- Display config ---
echo ""
echo "=========================================="
echo " Reflector — Standalone GPU Host Setup"
echo "=========================================="
echo ""
echo " Mode: $(if [[ "$USE_CPU" == "true" ]]; then echo "CPU-only"; else echo "NVIDIA GPU"; fi)"
echo " Caddy: $USE_CADDY"
[[ -n "$CUSTOM_DOMAIN" ]] && echo " Domain: $CUSTOM_DOMAIN"
[[ "$USE_CUSTOM_CA" == "true" ]] && echo " CA: Custom"
[[ -n "$TLS_CERT_PATH" ]] && echo " TLS: Custom cert"
[[ -n "$API_KEY" ]] && echo " Auth: API key protected"
[[ -z "$API_KEY" ]] && echo " Auth: NONE (open access — use --api-key for production!)"
echo " Port: $HOST_PORT"
echo ""
# --- Prerequisites ---
info "Checking prerequisites"
if ! command -v docker &>/dev/null; then
err "Docker not found. Install Docker first."
exit 1
fi
ok "Docker available"
if ! docker compose version &>/dev/null; then
err "Docker Compose V2 not found."
exit 1
fi
ok "Docker Compose V2 available"
if [[ "$USE_CPU" != "true" ]]; then
if ! docker info 2>/dev/null | grep -qi nvidia; then
warn "NVIDIA runtime not detected in Docker. GPU mode may fail."
warn "Install nvidia-container-toolkit if you have an NVIDIA GPU."
else
ok "NVIDIA Docker runtime available"
fi
fi
# --- Stage certificates ---
CERTS_DIR="$ROOT_DIR/certs"
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
info "Staging certificates"
mkdir -p "$CERTS_DIR"
if [[ -n "$CA_CERT_PATH" ]]; then
local_ca_dest="$CERTS_DIR/ca.crt"
src_id=$(ls -i "$CA_CERT_PATH" 2>/dev/null | awk '{print $1}')
dst_id=$(ls -i "$local_ca_dest" 2>/dev/null | awk '{print $1}')
if [[ "$src_id" != "$dst_id" ]] || [[ -z "$dst_id" ]]; then
cp "$CA_CERT_PATH" "$local_ca_dest"
fi
chmod 644 "$local_ca_dest"
ok "CA certificate staged"
# Append extra CAs
for extra_ca in "${EXTRA_CA_FILES[@]+"${EXTRA_CA_FILES[@]}"}"; do
echo "" >> "$local_ca_dest"
cat "$extra_ca" >> "$local_ca_dest"
ok "Appended extra CA: $extra_ca"
done
fi
if [[ -n "$TLS_CERT_PATH" ]]; then
cert_dest="$CERTS_DIR/server.pem"
key_dest="$CERTS_DIR/server-key.pem"
src_id=$(ls -i "$TLS_CERT_PATH" 2>/dev/null | awk '{print $1}')
dst_id=$(ls -i "$cert_dest" 2>/dev/null | awk '{print $1}')
if [[ "$src_id" != "$dst_id" ]] || [[ -z "$dst_id" ]]; then
cp "$TLS_CERT_PATH" "$cert_dest"
cp "$TLS_KEY_PATH" "$key_dest"
fi
chmod 644 "$cert_dest"
chmod 600 "$key_dest"
ok "TLS cert/key staged"
fi
fi
# --- Build profiles and compose command ---
COMPOSE_FILE="$ROOT_DIR/docker-compose.gpu-host.yml"
COMPOSE_PROFILES=()
GPU_SERVICE="gpu"
if [[ "$USE_CPU" == "true" ]]; then
COMPOSE_PROFILES+=("cpu")
GPU_SERVICE="cpu"
else
COMPOSE_PROFILES+=("gpu")
fi
if [[ "$USE_CADDY" == "true" ]]; then
COMPOSE_PROFILES+=("caddy")
fi
# Compose command helper
compose_cmd() {
local profiles="" files="-f $COMPOSE_FILE"
if [[ "$USE_CUSTOM_CA" == "true" ]] && [[ -f "$ROOT_DIR/docker-compose.gpu-ca.yml" ]]; then
files="$files -f $ROOT_DIR/docker-compose.gpu-ca.yml"
fi
for p in "${COMPOSE_PROFILES[@]}"; do
profiles="$profiles --profile $p"
done
docker compose $files $profiles "$@"
}
# Generate CA compose override if needed (mounts certs into containers)
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
info "Generating docker-compose.gpu-ca.yml override"
ca_override="$ROOT_DIR/docker-compose.gpu-ca.yml"
cat > "$ca_override" << 'CAEOF'
# Generated by setup-gpu-host.sh — custom CA trust.
# Do not edit manually; re-run setup-gpu-host.sh with --custom-ca to regenerate.
services:
gpu:
volumes:
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
cpu:
volumes:
- ./certs/ca.crt:/usr/local/share/ca-certificates/custom-ca.crt:ro
CAEOF
if [[ -n "$TLS_CERT_PATH" ]]; then
cat >> "$ca_override" << 'CADDYCAEOF'
caddy:
volumes:
- ./certs:/etc/caddy/certs:ro
CADDYCAEOF
fi
ok "Generated docker-compose.gpu-ca.yml"
else
rm -f "$ROOT_DIR/docker-compose.gpu-ca.yml"
fi
# --- Generate Caddyfile ---
if [[ "$USE_CADDY" == "true" ]]; then
info "Generating Caddyfile.gpu-host"
CADDYFILE="$ROOT_DIR/Caddyfile.gpu-host"
if [[ -n "$TLS_CERT_PATH" ]] && [[ -n "$CUSTOM_DOMAIN" ]]; then
cat > "$CADDYFILE" << CADDYEOF
# Generated by setup-gpu-host.sh — Custom TLS cert for $CUSTOM_DOMAIN
$CUSTOM_DOMAIN {
tls /etc/caddy/certs/server.pem /etc/caddy/certs/server-key.pem
reverse_proxy transcription:8000
}
CADDYEOF
ok "Caddyfile: custom TLS for $CUSTOM_DOMAIN"
elif [[ -n "$CUSTOM_DOMAIN" ]]; then
cat > "$CADDYFILE" << CADDYEOF
# Generated by setup-gpu-host.sh — Let's Encrypt for $CUSTOM_DOMAIN
$CUSTOM_DOMAIN {
reverse_proxy transcription:8000
}
CADDYEOF
ok "Caddyfile: Let's Encrypt for $CUSTOM_DOMAIN"
else
cat > "$CADDYFILE" << 'CADDYEOF'
# Generated by setup-gpu-host.sh — self-signed cert for IP access
:443 {
tls internal
reverse_proxy transcription:8000
}
CADDYEOF
ok "Caddyfile: self-signed cert for IP access"
fi
fi
# --- Generate .env ---
info "Generating GPU service .env"
GPU_ENV="$ROOT_DIR/.env.gpu-host"
cat > "$GPU_ENV" << EOF
# Generated by setup-gpu-host.sh
# HuggingFace token for pyannote diarization models
HF_TOKEN=${HF_TOKEN:-}
# API key to protect the GPU service (set via --api-key)
REFLECTOR_GPU_APIKEY=${API_KEY:-}
# Port configuration
GPU_HOST_PORT=${HOST_PORT}
CADDY_HTTPS_PORT=${HOST_PORT}
EOF
if [[ -z "${HF_TOKEN:-}" ]]; then
warn "HF_TOKEN not set. Diarization requires a HuggingFace token."
warn "Set it: export HF_TOKEN=your-token-here and re-run, or edit .env.gpu-host"
fi
ok "Generated .env.gpu-host"
# --- Build and start ---
info "Building $GPU_SERVICE image (first build downloads ML models — may take a while)..."
compose_cmd --env-file "$GPU_ENV" build "$GPU_SERVICE"
ok "$GPU_SERVICE image built"
info "Starting services..."
compose_cmd --env-file "$GPU_ENV" up -d
ok "Services started"
# --- Wait for health ---
info "Waiting for GPU service to be healthy (model loading takes 1-2 minutes)..."
local_url="http://localhost:8000"
for i in $(seq 1 40); do
if curl -sf "$local_url/docs" >/dev/null 2>&1; then
ok "GPU service is healthy!"
break
fi
if [[ $i -eq 40 ]]; then
err "GPU service did not become healthy after 5 minutes."
err "Check logs: docker compose -f docker-compose.gpu-host.yml logs gpu"
exit 1
fi
sleep 8
done
# --- Summary ---
echo ""
echo "=========================================="
echo -e " ${GREEN}GPU service is running!${NC}"
echo "=========================================="
echo ""
if [[ "$USE_CADDY" == "true" ]]; then
if [[ -n "$CUSTOM_DOMAIN" ]]; then
echo " URL: https://$CUSTOM_DOMAIN"
elif [[ -n "$PRIMARY_IP" ]]; then
echo " URL: https://$PRIMARY_IP"
else
echo " URL: https://localhost"
fi
else
if [[ -n "$PRIMARY_IP" ]]; then
echo " URL: http://$PRIMARY_IP:$HOST_PORT"
else
echo " URL: http://localhost:$HOST_PORT"
fi
fi
echo " Health: curl \$(URL)/docs"
[[ -n "$API_KEY" ]] && echo " API key: $API_KEY"
echo ""
echo " Configure the main Reflector instance (in server/.env):"
echo ""
local_gpu_url=""
if [[ "$USE_CADDY" == "true" ]]; then
if [[ -n "$CUSTOM_DOMAIN" ]]; then
local_gpu_url="https://$CUSTOM_DOMAIN"
elif [[ -n "$PRIMARY_IP" ]]; then
local_gpu_url="https://$PRIMARY_IP"
else
local_gpu_url="https://localhost"
fi
else
if [[ -n "$PRIMARY_IP" ]]; then
local_gpu_url="http://$PRIMARY_IP:$HOST_PORT"
else
local_gpu_url="http://localhost:$HOST_PORT"
fi
fi
echo " TRANSCRIPT_BACKEND=modal"
echo " TRANSCRIPT_URL=$local_gpu_url"
[[ -n "$API_KEY" ]] && echo " TRANSCRIPT_MODAL_API_KEY=$API_KEY"
echo " DIARIZATION_BACKEND=modal"
echo " DIARIZATION_URL=$local_gpu_url"
[[ -n "$API_KEY" ]] && echo " DIARIZATION_MODAL_API_KEY=$API_KEY"
echo " TRANSLATION_BACKEND=modal"
echo " TRANSLATE_URL=$local_gpu_url"
[[ -n "$API_KEY" ]] && echo " TRANSLATION_MODAL_API_KEY=$API_KEY"
echo ""
if [[ "$USE_CUSTOM_CA" == "true" ]]; then
echo " The Reflector instance must also trust this CA."
echo " On the Reflector machine, run setup-selfhosted.sh with:"
echo " --extra-ca /path/to/this-machines-ca.crt"
echo ""
fi
echo " DNS Resolution:"
if [[ -n "$CUSTOM_DOMAIN" ]]; then
echo " Ensure '$CUSTOM_DOMAIN' resolves to this machine's IP."
echo " Public: Create a DNS A record."
echo " Internal: Add to /etc/hosts on the Reflector machine:"
echo " ${PRIMARY_IP:-<GPU_IP>} $CUSTOM_DOMAIN"
else
echo " Use this machine's IP directly in TRANSCRIPT_URL/DIARIZATION_URL."
fi
echo ""
echo " To stop: docker compose -f docker-compose.gpu-host.yml down"
echo " To re-run: ./scripts/setup-gpu-host.sh $*"
echo " Logs: docker compose -f docker-compose.gpu-host.yml logs -f gpu"
echo ""