feat: local LLM via Ollama + structured output response_format

- Add setup script (scripts/setup-local-llm.sh) for one-command Ollama setup
  Mac: native Metal GPU, Linux: containerized via docker-compose profiles
- Add ollama-gpu and ollama-cpu docker-compose profiles for Linux
- Add extra_hosts to server/hatchet-worker-llm for host.docker.internal
- Pass response_format JSON schema in StructuredOutputWorkflow.extract()
  enabling grammar-based constrained decoding on Ollama/llama.cpp/vLLM/OpenAI
- Update .env.example with Ollama as default LLM option
- Add Ollama PRD and local dev setup docs
This commit is contained in:
Igor Loskutov
2026-02-10 15:55:21 -05:00
parent cd2255cfbc
commit 663345ece6
7 changed files with 653 additions and 7 deletions

View File

@@ -11,6 +11,8 @@ services:
- ./server/.env
environment:
ENTRYPOINT: server
extra_hosts:
- "host.docker.internal:host-gateway"
worker:
build:
@@ -57,6 +59,8 @@ services:
- ./server/.env
environment:
ENTRYPOINT: hatchet-worker-llm
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on:
hatchet:
condition: service_healthy
@@ -128,6 +132,44 @@ services:
retries: 5
start_period: 30s
ollama:
image: ollama/ollama:latest
profiles: ["ollama-gpu"]
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
interval: 10s
timeout: 5s
retries: 5
ollama-cpu:
image: ollama/ollama:latest
profiles: ["ollama-cpu"]
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
interval: 10s
timeout: 5s
retries: 5
volumes:
ollama_data:
networks:
default:
attachable: true