mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-06 13:56:48 +00:00
feat: local LLM via Ollama + structured output response_format
- Add setup script (scripts/setup-local-llm.sh) for one-command Ollama setup Mac: native Metal GPU, Linux: containerized via docker-compose profiles - Add ollama-gpu and ollama-cpu docker-compose profiles for Linux - Add extra_hosts to server/hatchet-worker-llm for host.docker.internal - Pass response_format JSON schema in StructuredOutputWorkflow.extract() enabling grammar-based constrained decoding on Ollama/llama.cpp/vLLM/OpenAI - Update .env.example with Ollama as default LLM option - Add Ollama PRD and local dev setup docs
This commit is contained in:
@@ -11,6 +11,8 @@ services:
|
||||
- ./server/.env
|
||||
environment:
|
||||
ENTRYPOINT: server
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
||||
worker:
|
||||
build:
|
||||
@@ -57,6 +59,8 @@ services:
|
||||
- ./server/.env
|
||||
environment:
|
||||
ENTRYPOINT: hatchet-worker-llm
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
depends_on:
|
||||
hatchet:
|
||||
condition: service_healthy
|
||||
@@ -128,6 +132,44 @@ services:
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
profiles: ["ollama-gpu"]
|
||||
ports:
|
||||
- "11434:11434"
|
||||
volumes:
|
||||
- ollama_data:/root/.ollama
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
ollama-cpu:
|
||||
image: ollama/ollama:latest
|
||||
profiles: ["ollama-cpu"]
|
||||
ports:
|
||||
- "11434:11434"
|
||||
volumes:
|
||||
- ollama_data:/root/.ollama
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
volumes:
|
||||
ollama_data:
|
||||
|
||||
networks:
|
||||
default:
|
||||
attachable: true
|
||||
|
||||
Reference in New Issue
Block a user