diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000..e0bb0fe
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,243 @@
+name: Benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      min_runs:
+        description: "Minimum benchmark runs"
+        required: false
+        default: "30"
+      quick:
+        description: "Quick mode (fewer runs)"
+        required: false
+        default: "false"
+        type: boolean
+
+  # Run on PRs that potentially touch performance-sensitive code
+  # pull_request:
+  #   branches: [main]
+  #   paths:
+  #     - "internal/sandbox/**"
+  #     - "internal/proxy/**"
+  #     - "cmd/fence/**"
+
+permissions:
+  contents: read
+
+jobs:
+  benchmark-linux:
+    name: Benchmark (Linux)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Download dependencies
+        run: go mod download
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            bubblewrap \
+            socat \
+            uidmap \
+            curl \
+            netcat-openbsd \
+            ripgrep \
+            hyperfine \
+            jq \
+            bc
+          # Configure subuid/subgid
+          echo "$(whoami):100000:65536" | sudo tee -a /etc/subuid
+          echo "$(whoami):100000:65536" | sudo tee -a /etc/subgid
+          sudo chmod u+s $(which bwrap)
+
+      - name: Install benchstat
+        run: go install golang.org/x/perf/cmd/benchstat@latest
+
+      - name: Build fence
+        run: make build-ci
+
+      - name: Run Go microbenchmarks
+        run: |
+          mkdir -p benchmarks
+          go test -run=^$ -bench=. -benchmem -count=10 ./internal/sandbox/... | tee benchmarks/go-bench-linux.txt
+
+      - name: Run CLI benchmarks
+        run: |
+          MIN_RUNS="${{ github.event.inputs.min_runs || '30' }}"
+          QUICK="${{ github.event.inputs.quick || 'false' }}"
+
+          if [[ "$QUICK" == "true" ]]; then
+            ./scripts/benchmark.sh -q -o benchmarks
+          else
+            ./scripts/benchmark.sh -n "$MIN_RUNS" -o benchmarks
+          fi
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results-linux
+          path: benchmarks/
+          retention-days: 30
+
+      - name: Summary
+        run: |
+          echo "## Linux Benchmark Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          for f in benchmarks/*.md; do
+            [[ -f "$f" ]] && cat "$f" >> $GITHUB_STEP_SUMMARY
+          done
+
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "### Go Microbenchmarks" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          grep -E '^Benchmark|^ok|^PASS' benchmarks/go-bench-linux.txt | head -50 >> $GITHUB_STEP_SUMMARY || true
+          echo '```' >> $GITHUB_STEP_SUMMARY
+
+  benchmark-macos:
+    name: Benchmark (macOS)
+    runs-on: macos-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Download dependencies
+        run: go mod download
+
+      - name: Install dependencies
+        run: |
+          brew install hyperfine ripgrep coreutils jq
+
+      - name: Install benchstat
+        run: go install golang.org/x/perf/cmd/benchstat@latest
+
+      - name: Build fence
+        run: make build-ci
+
+      - name: Run Go microbenchmarks
+        run: |
+          mkdir -p benchmarks
+          go test -run=^$ -bench=. -benchmem -count=10 ./internal/sandbox/... | tee benchmarks/go-bench-macos.txt
+
+      - name: Run CLI benchmarks
+        run: |
+          MIN_RUNS="${{ github.event.inputs.min_runs || '30' }}"
+          QUICK="${{ github.event.inputs.quick || 'false' }}"
+
+          if [[ "$QUICK" == "true" ]]; then
+            ./scripts/benchmark.sh -q -o benchmarks
+          else
+            ./scripts/benchmark.sh -n "$MIN_RUNS" -o benchmarks
+          fi
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results-macos
+          path: benchmarks/
+          retention-days: 30
+
+      - name: Summary
+        run: |
+          echo "## macOS Benchmark Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          for f in benchmarks/*.md; do
+            [[ -f "$f" ]] && cat "$f" >> $GITHUB_STEP_SUMMARY
+          done
+
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "### Go Microbenchmarks" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          grep -E '^Benchmark|^ok|^PASS' benchmarks/go-bench-macos.txt | head -50 >> $GITHUB_STEP_SUMMARY || true
+          echo '```' >> $GITHUB_STEP_SUMMARY
+
+  compare:
+    name: Compare Results
+    needs: [benchmark-linux, benchmark-macos]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+
+      - name: Install benchstat
+        run: go install golang.org/x/perf/cmd/benchstat@latest
+
+      - name: Download Linux results
+        uses: actions/download-artifact@v4
+        with:
+          name: benchmark-results-linux
+          path: linux-results/
+
+      - name: Download macOS results
+        uses: actions/download-artifact@v4
+        with:
+          name: benchmark-results-macos
+          path: macos-results/
+
+      - name: Compare Go benchmarks
+        run: |
+          echo "## Cross-Platform Comparison" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          if [[ -f linux-results/go-bench-linux.txt && -f macos-results/go-bench-macos.txt ]]; then
+            echo "### Go Microbenchmark Comparison (Linux vs macOS)" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo '```' >> $GITHUB_STEP_SUMMARY
+            benchstat linux-results/go-bench-linux.txt macos-results/go-bench-macos.txt 2>&1 | head -100 >> $GITHUB_STEP_SUMMARY || echo "benchstat comparison failed"
+            echo '```' >> $GITHUB_STEP_SUMMARY
+          fi
+
+      - name: Display results
+        run: |
+          echo "=== Linux Results ==="
+          ls -la linux-results/
+          echo ""
+          echo "=== macOS Results ==="
+          ls -la macos-results/
+          echo ""
+          if [[ -f linux-results/go-bench-linux.txt && -f macos-results/go-bench-macos.txt ]]; then
+            echo "=== Benchstat Comparison ==="
+            benchstat linux-results/go-bench-linux.txt macos-results/go-bench-macos.txt || true
+          fi
diff --git a/.gitignore b/.gitignore
index d84377c..c4cca2f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,9 @@ coverage.out
 # GoReleaser
 /dist/
 
+# Benchmark results
+/benchmarks/
+*.prof
+cpu.out
+mem.out
+
diff --git a/docs/README.md b/docs/README.md
index c89b888..49c04f5 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -23,6 +23,7 @@ Fence is a sandboxing tool that restricts network and filesystem access for arbi
 - [Security model](security-model.md) - Threat model, guarantees, and limitations
 - [Linux security features](linux-security-features.md) - Landlock, seccomp, eBPF details and fallback behavior
 - [Testing](testing.md) - How to run tests and write new ones
+- [Benchmarking](benchmarking.md) - Performance overhead and profiling
 
 ## Examples
 
diff --git a/docs/benchmarking.md b/docs/benchmarking.md
new file mode 100644
index 0000000..337991a
--- /dev/null
+++ b/docs/benchmarking.md
@@ -0,0 +1,329 @@
+# Benchmarking
+
+This document describes how to run, interpret, and compare sandbox performance benchmarks for Fence.
+
+## Quick Start
+
+```bash
+# Install dependencies
+brew install hyperfine   # macOS
+# apt install hyperfine  # Linux
+
+go install golang.org/x/perf/cmd/benchstat@latest
+
+# Run CLI benchmarks
+./scripts/benchmark.sh
+
+# Run Go microbenchmarks
+go test -run=^$ -bench=. -benchmem ./internal/sandbox/...
+```
+
+## Goals
+
+1. Quantify sandbox overhead on each platform (`sandboxed / unsandboxed` ratio)
+2. Compare macOS (Seatbelt) vs Linux (bwrap+Landlock) overhead fairly
+3. Attribute overhead to specific components (proxy startup, bridge setup, wrap generation)
+4. Track regressions over time
+
+## Benchmark Types
+
+### Layer 1: CLI Benchmarks (`scripts/benchmark.sh`)
+
+**What it measures**: Real-world agent cost - full `fence` invocation including proxy startup, socat bridges (Linux), and sandbox-exec/bwrap setup.
+
+This is the most realistic benchmark for understanding the cost of running agent commands through Fence.
+
+```bash
+# Full benchmark suite
+./scripts/benchmark.sh
+
+# Quick mode (fewer runs)
+./scripts/benchmark.sh -q
+
+# Custom output directory
+./scripts/benchmark.sh -o ./my-results
+
+# Include network benchmarks (requires local server)
+./scripts/benchmark.sh --network
+```
+
+#### Options
+
+| Option | Description |
+|--------|-------------|
+| `-b, --binary PATH` | Path to fence binary (default: ./fence) |
+| `-o, --output DIR` | Output directory (default: ./benchmarks) |
+| `-n, --runs N` | Minimum runs per benchmark (default: 30) |
+| `-q, --quick` | Quick mode: fewer runs, skip slow benchmarks |
+| `--network` | Include network benchmarks |
+
+### Layer 2: Go Microbenchmarks (`internal/sandbox/benchmark_test.go`)
+
+**What it measures**: Component-level overhead - isolates Manager initialization, WrapCommand generation, and execution.
+
+```bash
+# Run all benchmarks
+go test -run=^$ -bench=. -benchmem ./internal/sandbox/...
+
+# Run specific benchmark
+go test -run=^$ -bench=BenchmarkWarmSandbox -benchmem ./internal/sandbox/...
+
+# Multiple runs for statistical analysis
+go test -run=^$ -bench=. -benchmem -count=10 ./internal/sandbox/... > bench.txt
+benchstat bench.txt
+```
+
+#### Available Benchmarks
+
+| Benchmark | Description |
+|-----------|-------------|
+| `BenchmarkBaseline_*` | Unsandboxed command execution |
+| `BenchmarkManagerInitialize` | Cold initialization (proxies + bridges) |
+| `BenchmarkWrapCommand` | Command string construction only |
+| `BenchmarkColdSandbox_*` | Full init + wrap + exec per iteration |
+| `BenchmarkWarmSandbox_*` | Pre-initialized manager, just exec |
+| `BenchmarkOverhead` | Grouped comparison of baseline vs sandbox |
+
+### Layer 3: OS-Level Profiling
+
+**What it measures**: Kernel/system overhead - context switches, syscalls, page faults.
+
+#### Linux
+
+```bash
+# Quick syscall cost breakdown
+strace -f -c ./fence -- true
+
+# Context switches, page faults
+perf stat -- ./fence -- true
+
+# Full profiling (flamegraph-ready)
+perf record -F 99 -g -- ./fence -- git status
+perf report
+```
+
+#### macOS
+
+```bash
+# Time Profiler via Instruments
+xcrun xctrace record --template 'Time Profiler' --launch -- ./fence -- true
+
+# Quick call-stack snapshot
+./fence -- sleep 5 &
+sample $! 5 -file sample.txt
+```
+
+## Interpreting Results
+
+### Key Metric: Overhead Factor
+
+```text
+Overhead Factor = time(sandboxed) / time(unsandboxed)
+```
+
+Compare overhead factors across platforms, not absolute times, because hardware differences swamp absolute timings.
+
+### Example Output
+
+```text
+Benchmark                      Unsandboxed    Sandboxed    Overhead
+true                           1.2 ms         45 ms        37.5x
+git status                     15 ms          62 ms        4.1x
+python -c 'pass'               25 ms          73 ms        2.9x
+```
+
+### What to Expect
+
+| Workload | Linux Overhead | macOS Overhead | Notes |
+|----------|----------------|----------------|-------|
+| `true` | 180-360x | 8-10x | Dominated by cold start |
+| `echo` | 150-300x | 6-8x | Similar to true |
+| `python3 -c 'pass'` | 10-12x | 2-3x | Interpreter startup dominates |
+| `git status` | 50-60x | 4-5x | Real I/O helps amortize |
+| `rg` | 40-50x | 3-4x | Search I/O helps amortize |
+
+The overhead factor decreases as the actual workload increases (because sandbox setup is fixed cost). Linux overhead is significantly higher due to bwrap/socat setup.
+
+## Cross-Platform Comparison
+
+### Fair Comparison Approach
+
+1. Run benchmarks on each platform independently
+2. Compare overhead factors, not absolute times
+3. Use the same fence version and workloads
+
+```bash
+# On macOS
+go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > bench_macos.txt
+
+# On Linux
+go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > bench_linux.txt
+
+# Compare
+benchstat bench_macos.txt bench_linux.txt
+```
+
+### Caveats
+
+- macOS uses Seatbelt (sandbox-exec) - built-in, lightweight kernel sandbox
+- Linux uses bwrap + Landlock, this creates socat bridges for network, incurring significant setup cost
+- Linux cold start is ~10x slower than macOS due to bwrap/socat bridge setup
+- Linux warm path is still ~5x slower than macOS - bwrap execution itself has overhead
+- For long-running agents, this difference is negligible (one-time startup cost)
+
+> [!TIP]
+> Running Linux benchmarks inside a VM (Colima, Docker Desktop, etc.) inflates overhead due to virtualization. Use native Linux (bare metal or CI) for fair cross-platform comparison.
+
+## GitHub Actions
+
+Benchmarks can be run in CI via the workflow at `.github/workflows/benchmark.yml`:
+
+```bash
+# Trigger manually from GitHub UI: Actions > Benchmarks > Run workflow
+
+# Or via gh CLI
+gh workflow run benchmark.yml
+```
+
+Results are uploaded as artifacts and summarized in the workflow summary.
+
+## Tips
+
+### Reducing Variance
+
+- Run with `--min-runs 50` or higher
+- Close other applications
+- Pin CPU frequency if possible (Linux: `cpupower frequency-set --governor performance`)
+- Run multiple times and use benchstat for statistical analysis
+
+### Profiling Hotspots
+
+```bash
+# CPU profile
+go test -run=^$ -bench=BenchmarkWarmSandbox -cpuprofile=cpu.out ./internal/sandbox/...
+go tool pprof -http=:8080 cpu.out
+
+# Memory profile
+go test -run=^$ -bench=BenchmarkWarmSandbox -memprofile=mem.out ./internal/sandbox/...
+go tool pprof -http=:8080 mem.out
+```
+
+### Tracking Regressions
+
+1. Run benchmarks before and after changes
+2. Save results to files
+3. Compare with benchstat
+
+```bash
+# Before
+go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > before.txt
+
+# Make changes...
+
+# After
+go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > after.txt
+
+# Compare
+benchstat before.txt after.txt
+```
+
+## Workload Categories
+
+| Category | Commands | What it Stresses |
+|----------|----------|------------------|
+| **Spawn-only** | `true`, `echo` | Process spawn, wrapper overhead |
+| **Interpreter** | `python3 -c`, `node -e` | Runtime startup under sandbox |
+| **FS-heavy** | file creation, `rg` | Landlock/Seatbelt FS rules |
+| **Network (local)** | `curl localhost` | Proxy forwarding overhead |
+| **Real tools** | `git status` | Practical agent workloads |
+
+## Benchmark Findings (12/28/2025)
+
+Results from GitHub Actions CI runners (Linux: AMD EPYC 7763, macOS: Apple M1 Virtual).
+
+### Manager Initialization
+
+| Platform | `Manager.Initialize()` |
+|----------|------------------------|
+| Linux | 101.9 ms |
+| macOS | 27.5 µs |
+
+Linux initialization is ~3,700x slower because it must:
+
+- Start HTTP + SOCKS proxies
+- Create Unix socket bridges for socat
+- Set up bwrap namespace configuration
+
+macOS only generates a Seatbelt profile string (very cheap).
+
+### Cold Start Overhead (one `fence` invocation per command)
+
+| Workload | Linux | macOS |
+|----------|-------|-------|
+| `true` | 215 ms | 22 ms |
+| Python | 124 ms | 33 ms |
+| Git status | 114 ms | 25 ms |
+
+This is the realistic cost for scripts running `fence -c "command"` repeatedly.
+
+### Warm Path Overhead (pre-initialized manager)
+
+| Workload | Linux | macOS |
+|----------|-------|-------|
+| `true` | 112 ms | 20 ms |
+| Python | 124 ms | 33 ms |
+| Git status | 114 ms | 25 ms |
+
+Even with proxies already running, Linux bwrap execution adds ~110ms overhead per command.
+
+### Overhead Factors
+
+| Workload | Linux Overhead | macOS Overhead |
+|----------|----------------|----------------|
+| `true` (cold) | ~360x | ~10x |
+| `true` (warm) | ~187x | ~8x |
+| Python (warm) | ~11x | ~2x |
+| Git status (warm) | ~54x | ~4x |
+
+Overhead decreases as the actual workload increases (sandbox setup is fixed cost).
+
+## Impact on Agent Usage
+
+### Long-Running Agents (`fence claude`, `fence codex`)
+
+For agents that run as a child process under fence:
+
+| Phase | Cost |
+|-------|------|
+| Startup (once) | Linux: ~215ms, macOS: ~22ms |
+| Per tool call | Negligible (baseline fork+exec only) |
+
+Child processes inherit the sandbox - no re-initialization, no WrapCommand overhead. The per-command cost is just normal process spawning:
+
+| Command | Linux | macOS |
+|---------|-------|-------|
+| `true` | 0.6 ms | 2.3 ms |
+| `git status` | 2.1 ms | 5.9 ms |
+| Python script | 11 ms | 15 ms |
+
+**Bottom line**: For `fence <agent>` usage, sandbox overhead is a one-time startup cost. Tool calls inside the agent run at native speed.
+
+### Per-Command Invocation (`fence -c "command"`)
+
+For scripts or CI running fence per command:
+
+| Session | Linux Cost | macOS Cost |
+|---------|------------|------------|
+| 1 command | 215 ms | 22 ms |
+| 10 commands | 2.15 s | 220 ms |
+| 50 commands | 10.75 s | 1.1 s |
+
+Consider keeping the manager alive (daemon mode) or batching commands to reduce overhead.
+
+## Additional Notes
+
+- `Manager.Initialize()` starts HTTP + SOCKS proxies; on Linux also creates socat bridges
+- Cold start includes all initialization; hot path is just `WrapCommand + exec`
+- `-m` (monitor mode) spawns additional monitoring processes, so we'll have to benchmark separately
+- Keep workloads under the repo - avoid `/tmp` since Linux bwrap does `--tmpfs /tmp`
+- `debug` mode changes logging, so always benchmark with debug off
diff --git a/internal/sandbox/benchmark_test.go b/internal/sandbox/benchmark_test.go
new file mode 100644
index 0000000..b940a14
--- /dev/null
+++ b/internal/sandbox/benchmark_test.go
@@ -0,0 +1,369 @@
+package sandbox
+
+import (
+	"bytes"
+	"context"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"testing"
+	"time"
+
+	"github.com/Use-Tusk/fence/internal/config"
+)
+
+// ============================================================================
+// Baseline Benchmarks (unsandboxed)
+// ============================================================================
+
+// BenchmarkBaseline_True measures the cost of spawning a minimal process.
+func BenchmarkBaseline_True(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		cmd := exec.Command("true")
+		_ = cmd.Run()
+	}
+}
+
+// BenchmarkBaseline_Echo measures echo command without sandbox.
+func BenchmarkBaseline_Echo(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		cmd := exec.Command("sh", "-c", "echo hello")
+		_ = cmd.Run()
+	}
+}
+
+// BenchmarkBaseline_Python measures Python startup without sandbox.
+func BenchmarkBaseline_Python(b *testing.B) {
+	if _, err := exec.LookPath("python3"); err != nil {
+		b.Skip("python3 not found")
+	}
+	for i := 0; i < b.N; i++ {
+		cmd := exec.Command("python3", "-c", "pass")
+		_ = cmd.Run()
+	}
+}
+
+// BenchmarkBaseline_Node measures Node.js startup without sandbox.
+func BenchmarkBaseline_Node(b *testing.B) {
+	if _, err := exec.LookPath("node"); err != nil {
+		b.Skip("node not found")
+	}
+	for i := 0; i < b.N; i++ {
+		cmd := exec.Command("node", "-e", "")
+		_ = cmd.Run()
+	}
+}
+
+// BenchmarkBaseline_GitStatus measures git status without sandbox.
+func BenchmarkBaseline_GitStatus(b *testing.B) {
+	if _, err := exec.LookPath("git"); err != nil {
+		b.Skip("git not found")
+	}
+	// Find a git repo to run in
+	repoDir := findGitRepo()
+	if repoDir == "" {
+		b.Skip("no git repo found")
+	}
+
+	for i := 0; i < b.N; i++ {
+		cmd := exec.Command("git", "status", "--porcelain")
+		cmd.Dir = repoDir
+		cmd.Stdout = nil // discard
+		_ = cmd.Run()
+	}
+}
+
+// ============================================================================
+// Component Benchmarks (isolate overhead sources)
+// ============================================================================
+
+// BenchmarkManagerInitialize measures cold initialization cost (proxies + bridges).
+func BenchmarkManagerInitialize(b *testing.B) {
+	skipBenchIfSandboxed(b)
+
+	workspace := b.TempDir()
+	cfg := benchConfig(workspace)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		manager := NewManager(cfg, false, false)
+		if err := manager.Initialize(); err != nil {
+			b.Fatalf("failed to initialize: %v", err)
+		}
+		manager.Cleanup()
+	}
+}
+
+// BenchmarkWrapCommand measures the cost of command wrapping (string construction only).
+func BenchmarkWrapCommand(b *testing.B) {
+	skipBenchIfSandboxed(b)
+
+	workspace := b.TempDir()
+	cfg := benchConfig(workspace)
+
+	manager := NewManager(cfg, false, false)
+	if err := manager.Initialize(); err != nil {
+		b.Fatalf("failed to initialize: %v", err)
+	}
+	defer manager.Cleanup()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, err := manager.WrapCommand("echo hello")
+		if err != nil {
+			b.Fatalf("wrap failed: %v", err)
+		}
+	}
+}
+
+// ============================================================================
+// Cold Sandbox Benchmarks (full init + wrap + exec each iteration)
+// ============================================================================
+
+// BenchmarkColdSandbox_True measures full cold-start sandbox cost.
+func BenchmarkColdSandbox_True(b *testing.B) {
+	skipBenchIfSandboxed(b)
+
+	workspace := b.TempDir()
+	cfg := benchConfig(workspace)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		manager := NewManager(cfg, false, false)
+		if err := manager.Initialize(); err != nil {
+			b.Fatalf("init failed: %v", err)
+		}
+
+		wrappedCmd, err := manager.WrapCommand("true")
+		if err != nil {
+			manager.Cleanup()
+			b.Fatalf("wrap failed: %v", err)
+		}
+
+		execBenchCommand(b, wrappedCmd, workspace)
+		manager.Cleanup()
+	}
+}
+
+// ============================================================================
+// Warm Sandbox Benchmarks (Manager.Initialize once, repeat WrapCommand + exec)
+// ============================================================================
+
+// BenchmarkWarmSandbox_True measures sandbox cost with pre-initialized manager.
+func BenchmarkWarmSandbox_True(b *testing.B) {
+	skipBenchIfSandboxed(b)
+
+	workspace := b.TempDir()
+	cfg := benchConfig(workspace)
+
+	manager := NewManager(cfg, false, false)
+	if err := manager.Initialize(); err != nil {
+		b.Fatalf("init failed: %v", err)
+	}
+	defer manager.Cleanup()
+
+	wrappedCmd, err := manager.WrapCommand("true")
+	if err != nil {
+		b.Fatalf("wrap failed: %v", err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		execBenchCommand(b, wrappedCmd, workspace)
+	}
+}
+
+// BenchmarkWarmSandbox_Echo measures echo command with pre-initialized manager.
+func BenchmarkWarmSandbox_Echo(b *testing.B) {
+	skipBenchIfSandboxed(b)
+
+	workspace := b.TempDir()
+	cfg := benchConfig(workspace)
+
+	manager := NewManager(cfg, false, false)
+	if err := manager.Initialize(); err != nil {
+		b.Fatalf("init failed: %v", err)
+	}
+	defer manager.Cleanup()
+
+	wrappedCmd, err := manager.WrapCommand("echo hello")
+	if err != nil {
+		b.Fatalf("wrap failed: %v", err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		execBenchCommand(b, wrappedCmd, workspace)
+	}
+}
+
+// BenchmarkWarmSandbox_Python measures Python startup with pre-initialized manager.
+func BenchmarkWarmSandbox_Python(b *testing.B) {
+	skipBenchIfSandboxed(b)
+	if _, err := exec.LookPath("python3"); err != nil {
+		b.Skip("python3 not found")
+	}
+
+	workspace := b.TempDir()
+	cfg := benchConfig(workspace)
+
+	manager := NewManager(cfg, false, false)
+	if err := manager.Initialize(); err != nil {
+		b.Fatalf("init failed: %v", err)
+	}
+	defer manager.Cleanup()
+
+	wrappedCmd, err := manager.WrapCommand("python3 -c 'pass'")
+	if err != nil {
+		b.Fatalf("wrap failed: %v", err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		execBenchCommand(b, wrappedCmd, workspace)
+	}
+}
+
+// BenchmarkWarmSandbox_FileWrite measures file write with pre-initialized manager.
+func BenchmarkWarmSandbox_FileWrite(b *testing.B) {
+	skipBenchIfSandboxed(b)
+
+	workspace := b.TempDir()
+	cfg := benchConfig(workspace)
+
+	manager := NewManager(cfg, false, false)
+	if err := manager.Initialize(); err != nil {
+		b.Fatalf("init failed: %v", err)
+	}
+	defer manager.Cleanup()
+
+	testFile := filepath.Join(workspace, "bench.txt")
+	wrappedCmd, err := manager.WrapCommand("echo 'benchmark data' > " + testFile)
+	if err != nil {
+		b.Fatalf("wrap failed: %v", err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		execBenchCommand(b, wrappedCmd, workspace)
+		_ = os.Remove(testFile)
+	}
+}
+
+// BenchmarkWarmSandbox_GitStatus measures git status with pre-initialized manager.
+func BenchmarkWarmSandbox_GitStatus(b *testing.B) {
+	skipBenchIfSandboxed(b)
+	if _, err := exec.LookPath("git"); err != nil {
+		b.Skip("git not found")
+	}
+
+	repoDir := findGitRepo()
+	if repoDir == "" {
+		b.Skip("no git repo found")
+	}
+
+	cfg := benchConfig(repoDir)
+
+	manager := NewManager(cfg, false, false)
+	if err := manager.Initialize(); err != nil {
+		b.Fatalf("init failed: %v", err)
+	}
+	defer manager.Cleanup()
+
+	wrappedCmd, err := manager.WrapCommand("git status --porcelain")
+	if err != nil {
+		b.Fatalf("wrap failed: %v", err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		execBenchCommand(b, wrappedCmd, repoDir)
+	}
+}
+
+// ============================================================================
+// Comparison Sub-benchmarks
+// ============================================================================
+
+// BenchmarkOverhead runs baseline vs sandbox comparisons for easy diffing.
+func BenchmarkOverhead(b *testing.B) {
+	b.Run("Baseline/True", BenchmarkBaseline_True)
+	b.Run("Baseline/Echo", BenchmarkBaseline_Echo)
+	b.Run("Baseline/Python", BenchmarkBaseline_Python)
+
+	b.Run("Warm/True", BenchmarkWarmSandbox_True)
+	b.Run("Warm/Echo", BenchmarkWarmSandbox_Echo)
+	b.Run("Warm/Python", BenchmarkWarmSandbox_Python)
+
+	b.Run("Cold/True", BenchmarkColdSandbox_True)
+}
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+func skipBenchIfSandboxed(b *testing.B) {
+	b.Helper()
+	if os.Getenv("FENCE_SANDBOX") == "1" {
+		b.Skip("already running inside Fence sandbox")
+	}
+}
+
+func benchConfig(workspace string) *config.Config {
+	return &config.Config{
+		Network: config.NetworkConfig{
+			AllowedDomains: []string{},
+		},
+		Filesystem: config.FilesystemConfig{
+			AllowWrite: []string{workspace},
+		},
+		Command: config.CommandConfig{
+			UseDefaults: boolPtr(false),
+		},
+	}
+}
+
+func execBenchCommand(b *testing.B, command string, workDir string) {
+	b.Helper()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	shell := "/bin/sh"
+	if runtime.GOOS == "darwin" {
+		shell = "/bin/bash"
+	}
+
+	cmd := exec.CommandContext(ctx, shell, "-c", command)
+	cmd.Dir = workDir
+	cmd.Stdout = &bytes.Buffer{}
+	cmd.Stderr = &bytes.Buffer{}
+
+	if err := cmd.Run(); err != nil {
+		// Don't fail on command errors - we're measuring timing, not correctness
+		// (e.g., git status might fail if not in a repo)
+		_ = err
+	}
+}
+
+func findGitRepo() string {
+	// Try current directory and parents
+	dir, err := os.Getwd()
+	if err != nil {
+		return ""
+	}
+
+	for {
+		if _, err := os.Stat(filepath.Join(dir, ".git")); err == nil {
+			return dir
+		}
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			break
+		}
+		dir = parent
+	}
+
+	return ""
+}
diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh
new file mode 100755
index 0000000..3ac64c8
--- /dev/null
+++ b/scripts/benchmark.sh
@@ -0,0 +1,396 @@
+#!/bin/bash
+# benchmark.sh - Comprehensive sandbox benchmarking
+#
+# This script compares sandbox overhead between:
+#   - Unsandboxed (baseline)
+#   - Sandboxed (default mode)
+#   - Sandboxed with monitor (-m)
+#
+# Usage:
+#   ./scripts/benchmark.sh [options]
+#
+# Options:
+#   -b, --binary PATH    Path to fence binary (default: ./fence or builds one)
+#   -o, --output DIR     Output directory for results (default: ./benchmarks)
+#   -n, --runs N         Minimum runs per benchmark (default: 30)
+#   -q, --quick          Quick mode: fewer runs, skip slow benchmarks
+#   --network            Include network benchmarks (requires local server)
+#   -h, --help           Show this help
+#
+# Requirements:
+#   - hyperfine (brew install hyperfine / apt install hyperfine)
+#   - go (for building fence if needed)
+#   - Optional: python3 (for local-server.py network benchmarks)
+
+set -euo pipefail
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+# Defaults
+FENCE_BIN=""
+OUTPUT_DIR="./benchmarks"
+MIN_RUNS=30
+WARMUP=3
+QUICK=false
+NETWORK=false
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -b|--binary)
+            FENCE_BIN="$2"
+            shift 2
+            ;;
+        -o|--output)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -n|--runs)
+            MIN_RUNS="$2"
+            shift 2
+            ;;
+        -q|--quick)
+            QUICK=true
+            MIN_RUNS=10
+            WARMUP=1
+            shift
+            ;;
+        --network)
+            NETWORK=true
+            shift
+            ;;
+        -h|--help)
+            head -30 "$0" | tail -28
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Find or build fence binary
+if [[ -z "$FENCE_BIN" ]]; then
+    if [[ -x "./fence" ]]; then
+        FENCE_BIN="./fence"
+    elif [[ -x "./dist/fence" ]]; then
+        FENCE_BIN="./dist/fence"
+    else
+        echo -e "${BLUE}Building fence...${NC}"
+        go build -o ./fence ./cmd/fence
+        FENCE_BIN="./fence"
+    fi
+fi
+
+if [[ ! -x "$FENCE_BIN" ]]; then
+    echo -e "${RED}Error: fence binary not found at $FENCE_BIN${NC}"
+    exit 1
+fi
+
+# Check for hyperfine
+if ! command -v hyperfine &> /dev/null; then
+    echo -e "${RED}Error: hyperfine not found. Install with:${NC}"
+    echo "  brew install hyperfine   # macOS"
+    echo "  apt install hyperfine    # Linux"
+    exit 1
+fi
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+
+# Create workspace in current directory (not /tmp, which bwrap overlays)
+WORKSPACE=$(mktemp -d -p .)
+trap 'rm -rf "$WORKSPACE"' EXIT
+
+# Create settings file for sandbox
+SETTINGS_FILE="$WORKSPACE/fence.json"
+cat > "$SETTINGS_FILE" << EOF
+{
+  "filesystem": {
+    "allowWrite": ["$WORKSPACE", "."]
+  }
+}
+EOF
+
+# Platform info
+OS=$(uname -s)
+ARCH=$(uname -m)
+KERNEL=$(uname -r)
+DATE=$(date +%Y-%m-%d)
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+
+# Results file
+RESULTS_JSON="$OUTPUT_DIR/${OS,,}-${ARCH}-${TIMESTAMP}.json"
+RESULTS_MD="$OUTPUT_DIR/${OS,,}-${ARCH}-${TIMESTAMP}.md"
+
+echo ""
+echo -e "${BLUE}==========================================${NC}"
+echo -e "${BLUE}Fence Sandbox Benchmarks${NC}"
+echo -e "${BLUE}==========================================${NC}"
+echo ""
+echo "Platform:     $OS $ARCH"
+echo "Kernel:       $KERNEL"
+echo "Date:         $DATE"
+echo "Fence:        $FENCE_BIN"
+echo "Output:       $OUTPUT_DIR"
+echo "Min runs:     $MIN_RUNS"
+echo ""
+
+# Helper to run hyperfine with consistent options
+run_bench() {
+    local name="$1"
+    shift
+    local json_file="$WORKSPACE/${name}.json"
+    
+    echo -e "${GREEN}Benchmarking: $name${NC}"
+    
+    hyperfine \
+        --warmup "$WARMUP" \
+        --min-runs "$MIN_RUNS" \
+        --export-json "$json_file" \
+        --style basic \
+        "$@"
+    
+    echo ""
+}
+
+# ============================================================================
+# Spawn-only benchmarks (minimal process overhead)
+# ============================================================================
+
+echo -e "${YELLOW}=== Spawn-Only Benchmarks ===${NC}"
+echo ""
+
+run_bench "true" \
+    --command-name "unsandboxed" "true" \
+    --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -- true"
+
+run_bench "echo" \
+    --command-name "unsandboxed" "echo hello >/dev/null" \
+    --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c 'echo hello' >/dev/null"
+
+# ============================================================================
+# Tool compatibility benchmarks
+# ============================================================================
+
+echo -e "${YELLOW}=== Tool Compatibility Benchmarks ===${NC}"
+echo ""
+
+if command -v python3 &> /dev/null; then
+    run_bench "python" \
+        --command-name "unsandboxed" "python3 -c 'pass'" \
+        --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"python3 -c 'pass'\""
+else
+    echo -e "${YELLOW}Skipping python3 (not found)${NC}"
+fi
+
+if command -v node &> /dev/null && [[ "$QUICK" == "false" ]]; then
+    run_bench "node" \
+        --command-name "unsandboxed" "node -e ''" \
+        --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"node -e ''\""
+else
+    echo -e "${YELLOW}Skipping node (not found or quick mode)${NC}"
+fi
+
+# ============================================================================
+# Real workload benchmarks
+# ============================================================================
+
+echo -e "${YELLOW}=== Real Workload Benchmarks ===${NC}"
+echo ""
+
+if command -v git &> /dev/null && [[ -d .git ]]; then
+    run_bench "git-status" \
+        --command-name "unsandboxed" "git status --porcelain >/dev/null" \
+        --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -- git status --porcelain >/dev/null"
+else
+    echo -e "${YELLOW}Skipping git status (not in a git repo)${NC}"
+fi
+
+if command -v rg &> /dev/null && [[ "$QUICK" == "false" ]]; then
+    run_bench "ripgrep" \
+        --command-name "unsandboxed" "rg -n 'package' -S . >/dev/null 2>&1 || true" \
+        --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"rg -n 'package' -S . >/dev/null 2>&1\" || true"
+else
+    echo -e "${YELLOW}Skipping ripgrep (not found or quick mode)${NC}"
+fi
+
+# ============================================================================
+# File I/O benchmarks
+# ============================================================================
+
+echo -e "${YELLOW}=== File I/O Benchmarks ===${NC}"
+echo ""
+
+run_bench "file-write" \
+    --command-name "unsandboxed" "echo 'test' > $WORKSPACE/test.txt" \
+    --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"echo 'test' > $WORKSPACE/test.txt\""
+
+run_bench "file-read" \
+    --command-name "unsandboxed" "cat $WORKSPACE/test.txt >/dev/null" \
+    --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c 'cat $WORKSPACE/test.txt' >/dev/null"
+
+# ============================================================================
+# Monitor mode benchmarks (optional)
+# ============================================================================
+
+if [[ "$QUICK" == "false" ]]; then
+    echo -e "${YELLOW}=== Monitor Mode Benchmarks ===${NC}"
+    echo ""
+    
+    run_bench "monitor-true" \
+        --command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -- true" \
+        --command-name "sandboxed+monitor" "$FENCE_BIN -m -s $SETTINGS_FILE -- true"
+fi
+
+# ============================================================================
+# Network benchmarks (optional, requires local server)
+# ============================================================================
+
+if [[ "$NETWORK" == "true" ]]; then
+    echo -e "${YELLOW}=== Network Benchmarks ===${NC}"
+    echo ""
+    
+    # Start local server
+    SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+    if [[ -f "$SCRIPT_DIR/local-server.py" ]]; then
+        python3 "$SCRIPT_DIR/local-server.py" &
+        SERVER_PID=$!
+        trap 'kill $SERVER_PID 2>/dev/null || true; rm -rf "$WORKSPACE"' EXIT
+        sleep 1
+        
+        # Create network settings
+        NET_SETTINGS="$WORKSPACE/fence-net.json"
+        cat > "$NET_SETTINGS" << EOF
+{
+  "network": {
+    "allowedDomains": ["127.0.0.1", "localhost"]
+  },
+  "filesystem": {
+    "allowWrite": ["$WORKSPACE"]
+  }
+}
+EOF
+        
+        if command -v curl &> /dev/null; then
+            run_bench "network-curl" \
+                --command-name "unsandboxed" "curl -s http://127.0.0.1:8765/ >/dev/null" \
+                --command-name "sandboxed" "$FENCE_BIN -s $NET_SETTINGS -c 'curl -s http://127.0.0.1:8765/' >/dev/null"
+        fi
+        
+        kill $SERVER_PID 2>/dev/null || true
+    else
+        echo -e "${YELLOW}Skipping network benchmarks (local-server.py not found)${NC}"
+    fi
+fi
+
+# ============================================================================
+# Combine results and generate report
+# ============================================================================
+
+echo -e "${YELLOW}=== Generating Report ===${NC}"
+echo ""
+
+# Combine all JSON results
+echo "{" > "$RESULTS_JSON"
+echo "  \"platform\": \"$OS\"," >> "$RESULTS_JSON"
+echo "  \"arch\": \"$ARCH\"," >> "$RESULTS_JSON"
+echo "  \"kernel\": \"$KERNEL\"," >> "$RESULTS_JSON"
+echo "  \"date\": \"$DATE\"," >> "$RESULTS_JSON"
+echo "  \"fence_version\": \"$($FENCE_BIN --version 2>/dev/null || echo unknown)\"," >> "$RESULTS_JSON"
+echo "  \"benchmarks\": {" >> "$RESULTS_JSON"
+
+first=true
+for json_file in "$WORKSPACE"/*.json; do
+    [[ -f "$json_file" ]] || continue
+    name=$(basename "$json_file" .json)
+    if [[ "$first" == "true" ]]; then
+        first=false
+    else
+        echo "," >> "$RESULTS_JSON"
+    fi
+    echo "    \"$name\": $(cat "$json_file")" >> "$RESULTS_JSON"
+done
+
+echo "" >> "$RESULTS_JSON"
+echo "  }" >> "$RESULTS_JSON"
+echo "}" >> "$RESULTS_JSON"
+
+# Generate Markdown report
+cat > "$RESULTS_MD" << EOF
+# Fence Benchmark Results
+
+**Platform:** $OS $ARCH  
+**Kernel:** $KERNEL  
+**Date:** $DATE  
+**Fence:** $($FENCE_BIN --version 2>/dev/null || echo unknown)
+
+## Summary
+
+| Benchmark | Unsandboxed | Sandboxed | Overhead |
+|-----------|-------------|-----------|----------|
+EOF
+
+# Parse results and add to markdown (run in subshell to prevent failures from stopping script)
+if command -v jq &> /dev/null; then
+    for json_file in "$WORKSPACE"/*.json; do
+        [[ -f "$json_file" ]] || continue
+        name=$(basename "$json_file" .json)
+        
+        # Extract mean times, defaulting to empty if not found
+        unsandboxed=$(jq -r '.results[] | select(.command == "unsandboxed") | .mean // empty' "$json_file" 2>/dev/null) || true
+        sandboxed=$(jq -r '.results[] | select(.command == "sandboxed") | .mean // empty' "$json_file" 2>/dev/null) || true
+        
+        # Skip if values are missing, null, or zero
+        if [[ -z "$unsandboxed" || -z "$sandboxed" || "$unsandboxed" == "null" || "$sandboxed" == "null" ]]; then
+            continue
+        fi
+        
+        # Calculate values, catching any bc errors
+        overhead=$(echo "scale=1; $sandboxed / $unsandboxed" | bc 2>/dev/null) || continue
+        unsandboxed_ms=$(echo "scale=2; $unsandboxed * 1000" | bc 2>/dev/null) || continue
+        sandboxed_ms=$(echo "scale=2; $sandboxed * 1000" | bc 2>/dev/null) || continue
+        
+        if [[ -n "$overhead" && -n "$unsandboxed_ms" && -n "$sandboxed_ms" ]]; then
+            echo "| $name | ${unsandboxed_ms}ms | ${sandboxed_ms}ms | ${overhead}x |" >> "$RESULTS_MD"
+        fi
+    done
+fi
+
+echo ""
+echo -e "${GREEN}Results saved to:${NC}"
+echo "  JSON: $RESULTS_JSON"
+echo "  Markdown: $RESULTS_MD"
+echo ""
+
+# Print quick summary (errors in this section should not fail the script)
+if command -v jq &> /dev/null; then
+    echo -e "${BLUE}Quick Summary (overhead factors):${NC}"
+    for json_file in "$WORKSPACE"/*.json; do
+        (
+            [[ -f "$json_file" ]] || exit 0
+            name=$(basename "$json_file" .json)
+            
+            # Extract values, defaulting to empty if not found
+            unsandboxed=$(jq -r '.results[] | select(.command == "unsandboxed") | .mean // empty' "$json_file" 2>/dev/null) || exit 0
+            sandboxed=$(jq -r '.results[] | select(.command == "sandboxed") | .mean // empty' "$json_file" 2>/dev/null) || exit 0
+            
+            # Skip if either value is missing or null
+            [[ -z "$unsandboxed" || -z "$sandboxed" || "$unsandboxed" == "null" || "$sandboxed" == "null" ]] && exit 0
+            
+            # Calculate overhead, catching any bc errors
+            overhead=$(echo "scale=1; $sandboxed / $unsandboxed" | bc 2>/dev/null) || exit 0
+            
+            [[ -n "$overhead" ]] && printf "  %-15s %sx\n" "$name:" "$overhead"
+        ) || true  # Ignore errors from subshell
+    done
+fi
+
+echo ""
+echo -e "${GREEN}Done!${NC}"
diff --git a/scripts/local-server.py b/scripts/local-server.py
new file mode 100755
index 0000000..84ad84d
--- /dev/null
+++ b/scripts/local-server.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+"""
+Simple HTTP server for network benchmarking.
+
+Runs on port 8765 and responds to all requests with a minimal JSON response.
+Used by benchmark.sh to measure proxy overhead without internet variability.
+
+Usage:
+    python3 scripts/local-server.py
+    # Server runs on http://127.0.0.1:8765/
+
+    # In another terminal:
+    curl http://127.0.0.1:8765/
+"""
+
+import http.server
+import json
+import socketserver
+import sys
+
+PORT = 8765
+
+
+class BenchmarkHandler(http.server.BaseHTTPRequestHandler):
+    """Minimal HTTP handler for benchmarking."""
+
+    def do_GET(self):
+        """Handle GET requests with minimal response."""
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        response = {"status": "ok", "path": self.path}
+        self.wfile.write(json.dumps(response).encode())
+
+    def do_POST(self):
+        """Handle POST requests with minimal response."""
+        content_length = int(self.headers.get("Content-Length", 0))
+        _ = self.rfile.read(content_length)  # Read and discard body
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        response = {"status": "ok", "method": "POST"}
+        self.wfile.write(json.dumps(response).encode())
+
+    def log_message(self, format, *args):
+        """Suppress request logging for cleaner benchmark output."""
+        pass
+
+
+def main():
+    socketserver.TCPServer.allow_reuse_address = True
+    with socketserver.TCPServer(("127.0.0.1", PORT), BenchmarkHandler) as httpd:
+        print(f"Benchmark server running on http://127.0.0.1:{PORT}/", file=sys.stderr)
+        print("Press Ctrl+C to stop", file=sys.stderr)
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            print("\nShutting down...", file=sys.stderr)
+            httpd.shutdown()
+
+
+if __name__ == "__main__":
+    main()