feat: local LLM via Ollama + structured output response_format

- Add setup script (scripts/setup-local-llm.sh) for one-command Ollama setup Mac: native Metal GPU, Linux: containerized via docker-compose profiles - Add ollama-gpu and ollama-cpu docker-compose profiles for Linux - Add extra_hosts to server/hatchet-worker-llm for host.docker.internal - Pass response_format JSON schema in StructuredOutputWorkflow.extract() enabling grammar-based constrained decoding on Ollama/llama.cpp/vLLM/OpenAI - Update .env.example with Ollama as default LLM option - Add Ollama PRD and local dev setup docs
2026-04-06 13:56:48 +00:00 · 2026-02-10 15:55:21 -05:00
parent cd2255cfbc
commit 663345ece6
7 changed files with 653 additions and 7 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,6 +11,8 @@ services:
      - ./server/.env
    environment:
      ENTRYPOINT: server
+    extra_hosts:
+      - "host.docker.internal:host-gateway"

  worker:
    build:
@@ -57,6 +59,8 @@ services:
      - ./server/.env
    environment:
      ENTRYPOINT: hatchet-worker-llm
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
    depends_on:
      hatchet:
        condition: service_healthy
@@ -128,6 +132,44 @@ services:
      retries: 5
      start_period: 30s

+  ollama:
+    image: ollama/ollama:latest
+    profiles: ["ollama-gpu"]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  ollama-cpu:
+    image: ollama/ollama:latest
+    profiles: ["ollama-cpu"]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+volumes:
+  ollama_data:
+
 networks:
  default:
    attachable: true