mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-03-28 18:06:47 +00:00
feat: local LLM via Ollama + structured output response_format
- Add setup script (scripts/setup-local-llm.sh) for one-command Ollama setup Mac: native Metal GPU, Linux: containerized via docker-compose profiles - Add ollama-gpu and ollama-cpu docker-compose profiles for Linux - Add extra_hosts to server/hatchet-worker-llm for host.docker.internal - Pass response_format JSON schema in StructuredOutputWorkflow.extract() enabling grammar-based constrained decoding on Ollama/llama.cpp/vLLM/OpenAI - Update .env.example with Ollama as default LLM option - Add Ollama PRD and local dev setup docs
This commit is contained in:
100
scripts/setup-local-llm.sh
Executable file
100
scripts/setup-local-llm.sh
Executable file
@@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
MODEL="${LLM_MODEL:-qwen2.5:14b}"
|
||||
OLLAMA_PORT="${OLLAMA_PORT:-11434}"
|
||||
|
||||
wait_for_ollama() {
|
||||
local url="$1"
|
||||
local retries=30
|
||||
for i in $(seq 1 "$retries"); do
|
||||
if curl -sf "$url/api/tags" > /dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
echo " Waiting for Ollama... ($i/$retries)"
|
||||
sleep 2
|
||||
done
|
||||
echo "ERROR: Ollama not responding at $url after $retries attempts"
|
||||
return 1
|
||||
}
|
||||
|
||||
OS="$(uname -s)"
|
||||
|
||||
case "$OS" in
|
||||
Darwin)
|
||||
echo "macOS detected -- Ollama must run natively for Metal GPU acceleration."
|
||||
echo ""
|
||||
|
||||
if ! command -v ollama &> /dev/null; then
|
||||
echo "Ollama not found. Install it first:"
|
||||
echo " brew install ollama"
|
||||
echo " # or download from https://ollama.com/download"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Start Ollama if not already running
|
||||
if ! curl -sf "http://localhost:$OLLAMA_PORT/api/tags" > /dev/null 2>&1; then
|
||||
echo "Starting Ollama..."
|
||||
ollama serve &
|
||||
disown
|
||||
else
|
||||
echo "Ollama already running."
|
||||
fi
|
||||
|
||||
wait_for_ollama "http://localhost:$OLLAMA_PORT"
|
||||
|
||||
echo "Pulling model $MODEL..."
|
||||
ollama pull "$MODEL"
|
||||
|
||||
echo ""
|
||||
echo "Done. Add to server/.env:"
|
||||
echo " LLM_URL=http://host.docker.internal:$OLLAMA_PORT/v1"
|
||||
echo " LLM_MODEL=$MODEL"
|
||||
echo " LLM_API_KEY=not-needed"
|
||||
echo ""
|
||||
echo "Then: docker compose up -d"
|
||||
;;
|
||||
|
||||
Linux)
|
||||
echo "Linux detected."
|
||||
echo ""
|
||||
|
||||
if command -v nvidia-smi &> /dev/null && nvidia-smi > /dev/null 2>&1; then
|
||||
echo "NVIDIA GPU detected -- using ollama-gpu profile."
|
||||
PROFILE="ollama-gpu"
|
||||
LLM_URL="http://ollama:$OLLAMA_PORT/v1"
|
||||
else
|
||||
echo "No NVIDIA GPU -- using ollama-cpu profile."
|
||||
PROFILE="ollama-cpu"
|
||||
LLM_URL="http://ollama-cpu:$OLLAMA_PORT/v1"
|
||||
fi
|
||||
|
||||
echo "Starting Ollama container..."
|
||||
docker compose --profile "$PROFILE" up -d
|
||||
|
||||
# Determine container name
|
||||
if [ "$PROFILE" = "ollama-gpu" ]; then
|
||||
SVC="ollama"
|
||||
else
|
||||
SVC="ollama-cpu"
|
||||
fi
|
||||
|
||||
wait_for_ollama "http://localhost:$OLLAMA_PORT"
|
||||
|
||||
echo "Pulling model $MODEL..."
|
||||
docker compose exec "$SVC" ollama pull "$MODEL"
|
||||
|
||||
echo ""
|
||||
echo "Done. Add to server/.env:"
|
||||
echo " LLM_URL=$LLM_URL"
|
||||
echo " LLM_MODEL=$MODEL"
|
||||
echo " LLM_API_KEY=not-needed"
|
||||
echo ""
|
||||
echo "Then: docker compose --profile $PROFILE up -d"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unsupported OS: $OS"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user