diff --git a/docker-compose.standalone.yml b/docker-compose.standalone.yml new file mode 100644 index 00000000..dc24f673 --- /dev/null +++ b/docker-compose.standalone.yml @@ -0,0 +1,45 @@ +# Standalone services for fully local deployment (no external dependencies). +# Usage: docker compose -f docker-compose.yml -f docker-compose.standalone.yml up -d +# +# On Linux with NVIDIA GPU, also pass: --profile ollama-gpu +# On Linux without GPU: --profile ollama-cpu +# On Mac: Ollama runs natively (Metal GPU) — no profile needed, services here unused. + +services: + ollama: + image: ollama/ollama:latest + profiles: ["ollama-gpu"] + ports: + - "11434:11434" + volumes: + - ollama_data:/root/.ollama + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 10s + timeout: 5s + retries: 5 + + ollama-cpu: + image: ollama/ollama:latest + profiles: ["ollama-cpu"] + ports: + - "11434:11434" + volumes: + - ollama_data:/root/.ollama + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + ollama_data: diff --git a/docker-compose.yml b/docker-compose.yml index a86d4547..b695f82d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -132,44 +132,6 @@ services: retries: 5 start_period: 30s - ollama: - image: ollama/ollama:latest - profiles: ["ollama-gpu"] - ports: - - "11434:11434" - volumes: - - ollama_data:/root/.ollama - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] - interval: 10s - timeout: 5s - retries: 5 - - ollama-cpu: - image: ollama/ollama:latest - profiles: ["ollama-cpu"] - ports: - - "11434:11434" - volumes: - - ollama_data:/root/.ollama - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] - interval: 10s - timeout: 5s - retries: 5 - -volumes: - ollama_data: - networks: default: attachable: true diff --git a/docs/01_ollama.prd.md b/docs/01_ollama.prd.md index 68fef80b..dcd28466 100644 --- a/docs/01_ollama.prd.md +++ b/docs/01_ollama.prd.md @@ -190,53 +190,31 @@ LLM_API_KEY=not-needed LLM_CONTEXT_WINDOW=16000 ``` -### Docker Compose additions +### Docker Compose changes +**`docker-compose.yml`** — `extra_hosts` added to `server` and `hatchet-worker-llm` so containers can reach host Ollama on Mac: ```yaml + hatchet-worker-llm: + extra_hosts: + - "host.docker.internal:host-gateway" +``` + +**`docker-compose.standalone.yml`** — Ollama services for Linux (not in main compose, only used with `-f`): +```yaml +# Usage: docker compose -f docker-compose.yml -f docker-compose.standalone.yml --profile ollama-gpu up -d services: ollama: image: ollama/ollama:latest profiles: ["ollama-gpu"] - ports: - - "11434:11434" - volumes: - - ollama_data:/root/.ollama - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] - interval: 10s - timeout: 5s - retries: 5 - + # ... NVIDIA GPU passthrough ollama-cpu: image: ollama/ollama:latest profiles: ["ollama-cpu"] - ports: - - "11434:11434" - volumes: - - ollama_data:/root/.ollama - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] - interval: 10s - timeout: 5s - retries: 5 - - hatchet-worker-llm: - extra_hosts: - - "host.docker.internal:host-gateway" - -volumes: - ollama_data: + # ... CPU-only fallback ``` +Mac devs never touch `docker-compose.standalone.yml` — Ollama runs natively. The standalone file is for Linux deployment and will grow to include other local-only services (e.g. MinIO for S3) as the standalone story expands. + ### Known gotchas 1. **OrbStack `host.docker.internal`**: OrbStack uses `host.internal` by default, but also supports `host.docker.internal` with `extra_hosts: host-gateway`. diff --git a/docs/docs/installation/local-dev-setup.md b/docs/docs/installation/local-dev-setup.md index 1fbb6eba..9dd3ff9f 100644 --- a/docs/docs/installation/local-dev-setup.md +++ b/docs/docs/installation/local-dev-setup.md @@ -27,7 +27,7 @@ The script is idempotent — safe to re-run at any time. It detects what's alrea **Mac**: starts Ollama natively (Metal GPU acceleration). Pulls the LLM model. Docker containers reach it via `host.docker.internal:11434`. -**Linux**: starts containerized Ollama via docker-compose profile (`ollama-gpu` with NVIDIA, `ollama-cpu` without). Pulls model inside the container. +**Linux**: starts containerized Ollama via `docker-compose.standalone.yml` profile (`ollama-gpu` with NVIDIA, `ollama-cpu` without). Pulls model inside the container. Configures `server/.env`: ``` diff --git a/scripts/setup-local-llm.sh b/scripts/setup-local-llm.sh index aae6ad3f..b5c7734e 100755 --- a/scripts/setup-local-llm.sh +++ b/scripts/setup-local-llm.sh @@ -69,8 +69,10 @@ case "$OS" in LLM_URL="http://ollama-cpu:$OLLAMA_PORT/v1" fi + COMPOSE="docker compose -f docker-compose.yml -f docker-compose.standalone.yml" + echo "Starting Ollama container..." - docker compose --profile "$PROFILE" up -d + $COMPOSE --profile "$PROFILE" up -d # Determine container name if [ "$PROFILE" = "ollama-gpu" ]; then @@ -82,7 +84,7 @@ case "$OS" in wait_for_ollama "http://localhost:$OLLAMA_PORT" echo "Pulling model $MODEL..." - docker compose exec "$SVC" ollama pull "$MODEL" + $COMPOSE exec "$SVC" ollama pull "$MODEL" echo "" echo "Done. Add to server/.env:" @@ -90,7 +92,7 @@ case "$OS" in echo " LLM_MODEL=$MODEL" echo " LLM_API_KEY=not-needed" echo "" - echo "Then: docker compose --profile $PROFILE up -d" + echo "Then: $COMPOSE --profile $PROFILE up -d" ;; *)