diff --git a/docker-compose.standalone.yml b/docker-compose.standalone.yml
new file mode 100644
index 00000000..dc24f673
--- /dev/null
+++ b/docker-compose.standalone.yml
@@ -0,0 +1,45 @@
+# Standalone services for fully local deployment (no external dependencies).
+# Usage: docker compose -f docker-compose.yml -f docker-compose.standalone.yml up -d
+#
+# On Linux with NVIDIA GPU, also pass: --profile ollama-gpu
+# On Linux without GPU:                --profile ollama-cpu
+# On Mac: Ollama runs natively (Metal GPU) — no profile needed, services here unused.
+
+services:
+  ollama:
+    image: ollama/ollama:latest
+    profiles: ["ollama-gpu"]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  ollama-cpu:
+    image: ollama/ollama:latest
+    profiles: ["ollama-cpu"]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+volumes:
+  ollama_data:
diff --git a/docker-compose.yml b/docker-compose.yml
index a86d4547..b695f82d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -132,44 +132,6 @@ services:
       retries: 5
       start_period: 30s
 
-  ollama:
-    image: ollama/ollama:latest
-    profiles: ["ollama-gpu"]
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-  ollama-cpu:
-    image: ollama/ollama:latest
-    profiles: ["ollama-cpu"]
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-volumes:
-  ollama_data:
-
 networks:
   default:
     attachable: true
diff --git a/docs/01_ollama.prd.md b/docs/01_ollama.prd.md
index 68fef80b..dcd28466 100644
--- a/docs/01_ollama.prd.md
+++ b/docs/01_ollama.prd.md
@@ -190,53 +190,31 @@ LLM_API_KEY=not-needed
 LLM_CONTEXT_WINDOW=16000
 ```
 
-### Docker Compose additions
+### Docker Compose changes
 
+**`docker-compose.yml`** — `extra_hosts` added to `server` and `hatchet-worker-llm` so containers can reach host Ollama on Mac:
 ```yaml
+  hatchet-worker-llm:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+```
+
+**`docker-compose.standalone.yml`** — Ollama services for Linux (not in main compose, only used with `-f`):
+```yaml
+# Usage: docker compose -f docker-compose.yml -f docker-compose.standalone.yml --profile ollama-gpu up -d
 services:
   ollama:
     image: ollama/ollama:latest
     profiles: ["ollama-gpu"]
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
+    # ... NVIDIA GPU passthrough
   ollama-cpu:
     image: ollama/ollama:latest
     profiles: ["ollama-cpu"]
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-  hatchet-worker-llm:
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-
-volumes:
-  ollama_data:
+    # ... CPU-only fallback
 ```
 
+Mac devs never touch `docker-compose.standalone.yml` — Ollama runs natively. The standalone file is for Linux deployment and will grow to include other local-only services (e.g. MinIO for S3) as the standalone story expands.
+
 ### Known gotchas
 
 1. **OrbStack `host.docker.internal`**: OrbStack uses `host.internal` by default, but also supports `host.docker.internal` with `extra_hosts: host-gateway`.
diff --git a/docs/docs/installation/local-dev-setup.md b/docs/docs/installation/local-dev-setup.md
index 1fbb6eba..9dd3ff9f 100644
--- a/docs/docs/installation/local-dev-setup.md
+++ b/docs/docs/installation/local-dev-setup.md
@@ -27,7 +27,7 @@ The script is idempotent — safe to re-run at any time. It detects what's alrea
 
 **Mac**: starts Ollama natively (Metal GPU acceleration). Pulls the LLM model. Docker containers reach it via `host.docker.internal:11434`.
 
-**Linux**: starts containerized Ollama via docker-compose profile (`ollama-gpu` with NVIDIA, `ollama-cpu` without). Pulls model inside the container.
+**Linux**: starts containerized Ollama via `docker-compose.standalone.yml` profile (`ollama-gpu` with NVIDIA, `ollama-cpu` without). Pulls model inside the container.
 
 Configures `server/.env`:
 ```
diff --git a/scripts/setup-local-llm.sh b/scripts/setup-local-llm.sh
index aae6ad3f..b5c7734e 100755
--- a/scripts/setup-local-llm.sh
+++ b/scripts/setup-local-llm.sh
@@ -69,8 +69,10 @@ case "$OS" in
             LLM_URL="http://ollama-cpu:$OLLAMA_PORT/v1"
         fi
 
+        COMPOSE="docker compose -f docker-compose.yml -f docker-compose.standalone.yml"
+
         echo "Starting Ollama container..."
-        docker compose --profile "$PROFILE" up -d
+        $COMPOSE --profile "$PROFILE" up -d
 
         # Determine container name
         if [ "$PROFILE" = "ollama-gpu" ]; then
@@ -82,7 +84,7 @@ case "$OS" in
         wait_for_ollama "http://localhost:$OLLAMA_PORT"
 
         echo "Pulling model $MODEL..."
-        docker compose exec "$SVC" ollama pull "$MODEL"
+        $COMPOSE exec "$SVC" ollama pull "$MODEL"
 
         echo ""
         echo "Done. Add to server/.env:"
@@ -90,7 +92,7 @@ case "$OS" in
         echo "  LLM_MODEL=$MODEL"
         echo "  LLM_API_KEY=not-needed"
         echo ""
-        echo "Then: docker compose --profile $PROFILE up -d"
+        echo "Then: $COMPOSE --profile $PROFILE up -d"
         ;;
 
     *)