perf: add benchmarks (#5)
This commit is contained in:
243
.github/workflows/benchmark.yml
vendored
Normal file
243
.github/workflows/benchmark.yml
vendored
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
name: Benchmarks
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
min_runs:
|
||||||
|
description: "Minimum benchmark runs"
|
||||||
|
required: false
|
||||||
|
default: "30"
|
||||||
|
quick:
|
||||||
|
description: "Quick mode (fewer runs)"
|
||||||
|
required: false
|
||||||
|
default: "false"
|
||||||
|
type: boolean
|
||||||
|
|
||||||
|
# Run on PRs that potentially touch performance-sensitive code
|
||||||
|
# pull_request:
|
||||||
|
# branches: [main]
|
||||||
|
# paths:
|
||||||
|
# - "internal/sandbox/**"
|
||||||
|
# - "internal/proxy/**"
|
||||||
|
# - "cmd/fence/**"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
benchmark-linux:
|
||||||
|
name: Benchmark (Linux)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version-file: go.mod
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
|
||||||
|
- name: Set up Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
|
|
||||||
|
- name: Download dependencies
|
||||||
|
run: go mod download
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y \
|
||||||
|
bubblewrap \
|
||||||
|
socat \
|
||||||
|
uidmap \
|
||||||
|
curl \
|
||||||
|
netcat-openbsd \
|
||||||
|
ripgrep \
|
||||||
|
hyperfine \
|
||||||
|
jq \
|
||||||
|
bc
|
||||||
|
# Configure subuid/subgid
|
||||||
|
echo "$(whoami):100000:65536" | sudo tee -a /etc/subuid
|
||||||
|
echo "$(whoami):100000:65536" | sudo tee -a /etc/subgid
|
||||||
|
sudo chmod u+s $(which bwrap)
|
||||||
|
|
||||||
|
- name: Install benchstat
|
||||||
|
run: go install golang.org/x/perf/cmd/benchstat@latest
|
||||||
|
|
||||||
|
- name: Build fence
|
||||||
|
run: make build-ci
|
||||||
|
|
||||||
|
- name: Run Go microbenchmarks
|
||||||
|
run: |
|
||||||
|
mkdir -p benchmarks
|
||||||
|
go test -run=^$ -bench=. -benchmem -count=10 ./internal/sandbox/... | tee benchmarks/go-bench-linux.txt
|
||||||
|
|
||||||
|
- name: Run CLI benchmarks
|
||||||
|
run: |
|
||||||
|
MIN_RUNS="${{ github.event.inputs.min_runs || '30' }}"
|
||||||
|
QUICK="${{ github.event.inputs.quick || 'false' }}"
|
||||||
|
|
||||||
|
if [[ "$QUICK" == "true" ]]; then
|
||||||
|
./scripts/benchmark.sh -q -o benchmarks
|
||||||
|
else
|
||||||
|
./scripts/benchmark.sh -n "$MIN_RUNS" -o benchmarks
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Upload benchmark results
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: benchmark-results-linux
|
||||||
|
path: benchmarks/
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
- name: Summary
|
||||||
|
run: |
|
||||||
|
echo "## Linux Benchmark Results" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
for f in benchmarks/*.md; do
|
||||||
|
[[ -f "$f" ]] && cat "$f" >> $GITHUB_STEP_SUMMARY
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Go Microbenchmarks" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
grep -E '^Benchmark|^ok|^PASS' benchmarks/go-bench-linux.txt | head -50 >> $GITHUB_STEP_SUMMARY || true
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
benchmark-macos:
|
||||||
|
name: Benchmark (macOS)
|
||||||
|
runs-on: macos-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version-file: go.mod
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
|
||||||
|
- name: Set up Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
|
|
||||||
|
- name: Download dependencies
|
||||||
|
run: go mod download
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
brew install hyperfine ripgrep coreutils jq
|
||||||
|
|
||||||
|
- name: Install benchstat
|
||||||
|
run: go install golang.org/x/perf/cmd/benchstat@latest
|
||||||
|
|
||||||
|
- name: Build fence
|
||||||
|
run: make build-ci
|
||||||
|
|
||||||
|
- name: Run Go microbenchmarks
|
||||||
|
run: |
|
||||||
|
mkdir -p benchmarks
|
||||||
|
go test -run=^$ -bench=. -benchmem -count=10 ./internal/sandbox/... | tee benchmarks/go-bench-macos.txt
|
||||||
|
|
||||||
|
- name: Run CLI benchmarks
|
||||||
|
run: |
|
||||||
|
MIN_RUNS="${{ github.event.inputs.min_runs || '30' }}"
|
||||||
|
QUICK="${{ github.event.inputs.quick || 'false' }}"
|
||||||
|
|
||||||
|
if [[ "$QUICK" == "true" ]]; then
|
||||||
|
./scripts/benchmark.sh -q -o benchmarks
|
||||||
|
else
|
||||||
|
./scripts/benchmark.sh -n "$MIN_RUNS" -o benchmarks
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Upload benchmark results
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: benchmark-results-macos
|
||||||
|
path: benchmarks/
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
- name: Summary
|
||||||
|
run: |
|
||||||
|
echo "## macOS Benchmark Results" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
for f in benchmarks/*.md; do
|
||||||
|
[[ -f "$f" ]] && cat "$f" >> $GITHUB_STEP_SUMMARY
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "### Go Microbenchmarks" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
grep -E '^Benchmark|^ok|^PASS' benchmarks/go-bench-macos.txt | head -50 >> $GITHUB_STEP_SUMMARY || true
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
compare:
|
||||||
|
name: Compare Results
|
||||||
|
needs: [benchmark-linux, benchmark-macos]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version-file: go.mod
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Install benchstat
|
||||||
|
run: go install golang.org/x/perf/cmd/benchstat@latest
|
||||||
|
|
||||||
|
- name: Download Linux results
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: benchmark-results-linux
|
||||||
|
path: linux-results/
|
||||||
|
|
||||||
|
- name: Download macOS results
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: benchmark-results-macos
|
||||||
|
path: macos-results/
|
||||||
|
|
||||||
|
- name: Compare Go benchmarks
|
||||||
|
run: |
|
||||||
|
echo "## Cross-Platform Comparison" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
if [[ -f linux-results/go-bench-linux.txt && -f macos-results/go-bench-macos.txt ]]; then
|
||||||
|
echo "### Go Microbenchmark Comparison (Linux vs macOS)" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
benchstat linux-results/go-bench-linux.txt macos-results/go-bench-macos.txt 2>&1 | head -100 >> $GITHUB_STEP_SUMMARY || echo "benchstat comparison failed"
|
||||||
|
echo '```' >> $GITHUB_STEP_SUMMARY
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Display results
|
||||||
|
run: |
|
||||||
|
echo "=== Linux Results ==="
|
||||||
|
ls -la linux-results/
|
||||||
|
echo ""
|
||||||
|
echo "=== macOS Results ==="
|
||||||
|
ls -la macos-results/
|
||||||
|
echo ""
|
||||||
|
if [[ -f linux-results/go-bench-linux.txt && -f macos-results/go-bench-macos.txt ]]; then
|
||||||
|
echo "=== Benchstat Comparison ==="
|
||||||
|
benchstat linux-results/go-bench-linux.txt macos-results/go-bench-macos.txt || true
|
||||||
|
fi
|
||||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -23,3 +23,9 @@ coverage.out
|
|||||||
# GoReleaser
|
# GoReleaser
|
||||||
/dist/
|
/dist/
|
||||||
|
|
||||||
|
# Benchmark results
|
||||||
|
/benchmarks/
|
||||||
|
*.prof
|
||||||
|
cpu.out
|
||||||
|
mem.out
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ Fence is a sandboxing tool that restricts network and filesystem access for arbi
|
|||||||
- [Security model](security-model.md) - Threat model, guarantees, and limitations
|
- [Security model](security-model.md) - Threat model, guarantees, and limitations
|
||||||
- [Linux security features](linux-security-features.md) - Landlock, seccomp, eBPF details and fallback behavior
|
- [Linux security features](linux-security-features.md) - Landlock, seccomp, eBPF details and fallback behavior
|
||||||
- [Testing](testing.md) - How to run tests and write new ones
|
- [Testing](testing.md) - How to run tests and write new ones
|
||||||
|
- [Benchmarking](benchmarking.md) - Performance overhead and profiling
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
|
|||||||
329
docs/benchmarking.md
Normal file
329
docs/benchmarking.md
Normal file
@@ -0,0 +1,329 @@
|
|||||||
|
# Benchmarking
|
||||||
|
|
||||||
|
This document describes how to run, interpret, and compare sandbox performance benchmarks for Fence.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
brew install hyperfine # macOS
|
||||||
|
# apt install hyperfine # Linux
|
||||||
|
|
||||||
|
go install golang.org/x/perf/cmd/benchstat@latest
|
||||||
|
|
||||||
|
# Run CLI benchmarks
|
||||||
|
./scripts/benchmark.sh
|
||||||
|
|
||||||
|
# Run Go microbenchmarks
|
||||||
|
go test -run=^$ -bench=. -benchmem ./internal/sandbox/...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
|
||||||
|
1. Quantify sandbox overhead on each platform (`sandboxed / unsandboxed` ratio)
|
||||||
|
2. Compare macOS (Seatbelt) vs Linux (bwrap+Landlock) overhead fairly
|
||||||
|
3. Attribute overhead to specific components (proxy startup, bridge setup, wrap generation)
|
||||||
|
4. Track regressions over time
|
||||||
|
|
||||||
|
## Benchmark Types
|
||||||
|
|
||||||
|
### Layer 1: CLI Benchmarks (`scripts/benchmark.sh`)
|
||||||
|
|
||||||
|
**What it measures**: Real-world agent cost - full `fence` invocation including proxy startup, socat bridges (Linux), and sandbox-exec/bwrap setup.
|
||||||
|
|
||||||
|
This is the most realistic benchmark for understanding the cost of running agent commands through Fence.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full benchmark suite
|
||||||
|
./scripts/benchmark.sh
|
||||||
|
|
||||||
|
# Quick mode (fewer runs)
|
||||||
|
./scripts/benchmark.sh -q
|
||||||
|
|
||||||
|
# Custom output directory
|
||||||
|
./scripts/benchmark.sh -o ./my-results
|
||||||
|
|
||||||
|
# Include network benchmarks (requires local server)
|
||||||
|
./scripts/benchmark.sh --network
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Options
|
||||||
|
|
||||||
|
| Option | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| `-b, --binary PATH` | Path to fence binary (default: ./fence) |
|
||||||
|
| `-o, --output DIR` | Output directory (default: ./benchmarks) |
|
||||||
|
| `-n, --runs N` | Minimum runs per benchmark (default: 30) |
|
||||||
|
| `-q, --quick` | Quick mode: fewer runs, skip slow benchmarks |
|
||||||
|
| `--network` | Include network benchmarks |
|
||||||
|
|
||||||
|
### Layer 2: Go Microbenchmarks (`internal/sandbox/benchmark_test.go`)
|
||||||
|
|
||||||
|
**What it measures**: Component-level overhead - isolates Manager initialization, WrapCommand generation, and execution.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run all benchmarks
|
||||||
|
go test -run=^$ -bench=. -benchmem ./internal/sandbox/...
|
||||||
|
|
||||||
|
# Run specific benchmark
|
||||||
|
go test -run=^$ -bench=BenchmarkWarmSandbox -benchmem ./internal/sandbox/...
|
||||||
|
|
||||||
|
# Multiple runs for statistical analysis
|
||||||
|
go test -run=^$ -bench=. -benchmem -count=10 ./internal/sandbox/... > bench.txt
|
||||||
|
benchstat bench.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Available Benchmarks
|
||||||
|
|
||||||
|
| Benchmark | Description |
|
||||||
|
|-----------|-------------|
|
||||||
|
| `BenchmarkBaseline_*` | Unsandboxed command execution |
|
||||||
|
| `BenchmarkManagerInitialize` | Cold initialization (proxies + bridges) |
|
||||||
|
| `BenchmarkWrapCommand` | Command string construction only |
|
||||||
|
| `BenchmarkColdSandbox_*` | Full init + wrap + exec per iteration |
|
||||||
|
| `BenchmarkWarmSandbox_*` | Pre-initialized manager, just exec |
|
||||||
|
| `BenchmarkOverhead` | Grouped comparison of baseline vs sandbox |
|
||||||
|
|
||||||
|
### Layer 3: OS-Level Profiling
|
||||||
|
|
||||||
|
**What it measures**: Kernel/system overhead - context switches, syscalls, page faults.
|
||||||
|
|
||||||
|
#### Linux
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Quick syscall cost breakdown
|
||||||
|
strace -f -c ./fence -- true
|
||||||
|
|
||||||
|
# Context switches, page faults
|
||||||
|
perf stat -- ./fence -- true
|
||||||
|
|
||||||
|
# Full profiling (flamegraph-ready)
|
||||||
|
perf record -F 99 -g -- ./fence -- git status
|
||||||
|
perf report
|
||||||
|
```
|
||||||
|
|
||||||
|
#### macOS
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Time Profiler via Instruments
|
||||||
|
xcrun xctrace record --template 'Time Profiler' --launch -- ./fence -- true
|
||||||
|
|
||||||
|
# Quick call-stack snapshot
|
||||||
|
./fence -- sleep 5 &
|
||||||
|
sample $! 5 -file sample.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Interpreting Results
|
||||||
|
|
||||||
|
### Key Metric: Overhead Factor
|
||||||
|
|
||||||
|
```text
|
||||||
|
Overhead Factor = time(sandboxed) / time(unsandboxed)
|
||||||
|
```
|
||||||
|
|
||||||
|
Compare overhead factors across platforms, not absolute times, because hardware differences swamp absolute timings.
|
||||||
|
|
||||||
|
### Example Output
|
||||||
|
|
||||||
|
```text
|
||||||
|
Benchmark Unsandboxed Sandboxed Overhead
|
||||||
|
true 1.2 ms 45 ms 37.5x
|
||||||
|
git status 15 ms 62 ms 4.1x
|
||||||
|
python -c 'pass' 25 ms 73 ms 2.9x
|
||||||
|
```
|
||||||
|
|
||||||
|
### What to Expect
|
||||||
|
|
||||||
|
| Workload | Linux Overhead | macOS Overhead | Notes |
|
||||||
|
|----------|----------------|----------------|-------|
|
||||||
|
| `true` | 180-360x | 8-10x | Dominated by cold start |
|
||||||
|
| `echo` | 150-300x | 6-8x | Similar to true |
|
||||||
|
| `python3 -c 'pass'` | 10-12x | 2-3x | Interpreter startup dominates |
|
||||||
|
| `git status` | 50-60x | 4-5x | Real I/O helps amortize |
|
||||||
|
| `rg` | 40-50x | 3-4x | Search I/O helps amortize |
|
||||||
|
|
||||||
|
The overhead factor decreases as the actual workload increases (because sandbox setup is fixed cost). Linux overhead is significantly higher due to bwrap/socat setup.
|
||||||
|
|
||||||
|
## Cross-Platform Comparison
|
||||||
|
|
||||||
|
### Fair Comparison Approach
|
||||||
|
|
||||||
|
1. Run benchmarks on each platform independently
|
||||||
|
2. Compare overhead factors, not absolute times
|
||||||
|
3. Use the same fence version and workloads
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On macOS
|
||||||
|
go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > bench_macos.txt
|
||||||
|
|
||||||
|
# On Linux
|
||||||
|
go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > bench_linux.txt
|
||||||
|
|
||||||
|
# Compare
|
||||||
|
benchstat bench_macos.txt bench_linux.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Caveats
|
||||||
|
|
||||||
|
- macOS uses Seatbelt (sandbox-exec) - built-in, lightweight kernel sandbox
|
||||||
|
- Linux uses bwrap + Landlock, this creates socat bridges for network, incurring significant setup cost
|
||||||
|
- Linux cold start is ~10x slower than macOS due to bwrap/socat bridge setup
|
||||||
|
- Linux warm path is still ~5x slower than macOS - bwrap execution itself has overhead
|
||||||
|
- For long-running agents, this difference is negligible (one-time startup cost)
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> Running Linux benchmarks inside a VM (Colima, Docker Desktop, etc.) inflates overhead due to virtualization. Use native Linux (bare metal or CI) for fair cross-platform comparison.
|
||||||
|
|
||||||
|
## GitHub Actions
|
||||||
|
|
||||||
|
Benchmarks can be run in CI via the workflow at `.github/workflows/benchmark.yml`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Trigger manually from GitHub UI: Actions > Benchmarks > Run workflow
|
||||||
|
|
||||||
|
# Or via gh CLI
|
||||||
|
gh workflow run benchmark.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
Results are uploaded as artifacts and summarized in the workflow summary.
|
||||||
|
|
||||||
|
## Tips
|
||||||
|
|
||||||
|
### Reducing Variance
|
||||||
|
|
||||||
|
- Run with `--min-runs 50` or higher
|
||||||
|
- Close other applications
|
||||||
|
- Pin CPU frequency if possible (Linux: `cpupower frequency-set --governor performance`)
|
||||||
|
- Run multiple times and use benchstat for statistical analysis
|
||||||
|
|
||||||
|
### Profiling Hotspots
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# CPU profile
|
||||||
|
go test -run=^$ -bench=BenchmarkWarmSandbox -cpuprofile=cpu.out ./internal/sandbox/...
|
||||||
|
go tool pprof -http=:8080 cpu.out
|
||||||
|
|
||||||
|
# Memory profile
|
||||||
|
go test -run=^$ -bench=BenchmarkWarmSandbox -memprofile=mem.out ./internal/sandbox/...
|
||||||
|
go tool pprof -http=:8080 mem.out
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tracking Regressions
|
||||||
|
|
||||||
|
1. Run benchmarks before and after changes
|
||||||
|
2. Save results to files
|
||||||
|
3. Compare with benchstat
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Before
|
||||||
|
go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > before.txt
|
||||||
|
|
||||||
|
# Make changes...
|
||||||
|
|
||||||
|
# After
|
||||||
|
go test -run=^$ -bench=. -count=10 ./internal/sandbox/... > after.txt
|
||||||
|
|
||||||
|
# Compare
|
||||||
|
benchstat before.txt after.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workload Categories
|
||||||
|
|
||||||
|
| Category | Commands | What it Stresses |
|
||||||
|
|----------|----------|------------------|
|
||||||
|
| **Spawn-only** | `true`, `echo` | Process spawn, wrapper overhead |
|
||||||
|
| **Interpreter** | `python3 -c`, `node -e` | Runtime startup under sandbox |
|
||||||
|
| **FS-heavy** | file creation, `rg` | Landlock/Seatbelt FS rules |
|
||||||
|
| **Network (local)** | `curl localhost` | Proxy forwarding overhead |
|
||||||
|
| **Real tools** | `git status` | Practical agent workloads |
|
||||||
|
|
||||||
|
## Benchmark Findings (12/28/2025)
|
||||||
|
|
||||||
|
Results from GitHub Actions CI runners (Linux: AMD EPYC 7763, macOS: Apple M1 Virtual).
|
||||||
|
|
||||||
|
### Manager Initialization
|
||||||
|
|
||||||
|
| Platform | `Manager.Initialize()` |
|
||||||
|
|----------|------------------------|
|
||||||
|
| Linux | 101.9 ms |
|
||||||
|
| macOS | 27.5 µs |
|
||||||
|
|
||||||
|
Linux initialization is ~3,700x slower because it must:
|
||||||
|
|
||||||
|
- Start HTTP + SOCKS proxies
|
||||||
|
- Create Unix socket bridges for socat
|
||||||
|
- Set up bwrap namespace configuration
|
||||||
|
|
||||||
|
macOS only generates a Seatbelt profile string (very cheap).
|
||||||
|
|
||||||
|
### Cold Start Overhead (one `fence` invocation per command)
|
||||||
|
|
||||||
|
| Workload | Linux | macOS |
|
||||||
|
|----------|-------|-------|
|
||||||
|
| `true` | 215 ms | 22 ms |
|
||||||
|
| Python | 124 ms | 33 ms |
|
||||||
|
| Git status | 114 ms | 25 ms |
|
||||||
|
|
||||||
|
This is the realistic cost for scripts running `fence -c "command"` repeatedly.
|
||||||
|
|
||||||
|
### Warm Path Overhead (pre-initialized manager)
|
||||||
|
|
||||||
|
| Workload | Linux | macOS |
|
||||||
|
|----------|-------|-------|
|
||||||
|
| `true` | 112 ms | 20 ms |
|
||||||
|
| Python | 124 ms | 33 ms |
|
||||||
|
| Git status | 114 ms | 25 ms |
|
||||||
|
|
||||||
|
Even with proxies already running, Linux bwrap execution adds ~110ms overhead per command.
|
||||||
|
|
||||||
|
### Overhead Factors
|
||||||
|
|
||||||
|
| Workload | Linux Overhead | macOS Overhead |
|
||||||
|
|----------|----------------|----------------|
|
||||||
|
| `true` (cold) | ~360x | ~10x |
|
||||||
|
| `true` (warm) | ~187x | ~8x |
|
||||||
|
| Python (warm) | ~11x | ~2x |
|
||||||
|
| Git status (warm) | ~54x | ~4x |
|
||||||
|
|
||||||
|
Overhead decreases as the actual workload increases (sandbox setup is fixed cost).
|
||||||
|
|
||||||
|
## Impact on Agent Usage
|
||||||
|
|
||||||
|
### Long-Running Agents (`fence claude`, `fence codex`)
|
||||||
|
|
||||||
|
For agents that run as a child process under fence:
|
||||||
|
|
||||||
|
| Phase | Cost |
|
||||||
|
|-------|------|
|
||||||
|
| Startup (once) | Linux: ~215ms, macOS: ~22ms |
|
||||||
|
| Per tool call | Negligible (baseline fork+exec only) |
|
||||||
|
|
||||||
|
Child processes inherit the sandbox - no re-initialization, no WrapCommand overhead. The per-command cost is just normal process spawning:
|
||||||
|
|
||||||
|
| Command | Linux | macOS |
|
||||||
|
|---------|-------|-------|
|
||||||
|
| `true` | 0.6 ms | 2.3 ms |
|
||||||
|
| `git status` | 2.1 ms | 5.9 ms |
|
||||||
|
| Python script | 11 ms | 15 ms |
|
||||||
|
|
||||||
|
**Bottom line**: For `fence <agent>` usage, sandbox overhead is a one-time startup cost. Tool calls inside the agent run at native speed.
|
||||||
|
|
||||||
|
### Per-Command Invocation (`fence -c "command"`)
|
||||||
|
|
||||||
|
For scripts or CI running fence per command:
|
||||||
|
|
||||||
|
| Session | Linux Cost | macOS Cost |
|
||||||
|
|---------|------------|------------|
|
||||||
|
| 1 command | 215 ms | 22 ms |
|
||||||
|
| 10 commands | 2.15 s | 220 ms |
|
||||||
|
| 50 commands | 10.75 s | 1.1 s |
|
||||||
|
|
||||||
|
Consider keeping the manager alive (daemon mode) or batching commands to reduce overhead.
|
||||||
|
|
||||||
|
## Additional Notes
|
||||||
|
|
||||||
|
- `Manager.Initialize()` starts HTTP + SOCKS proxies; on Linux also creates socat bridges
|
||||||
|
- Cold start includes all initialization; hot path is just `WrapCommand + exec`
|
||||||
|
- `-m` (monitor mode) spawns additional monitoring processes, so we'll have to benchmark separately
|
||||||
|
- Keep workloads under the repo - avoid `/tmp` since Linux bwrap does `--tmpfs /tmp`
|
||||||
|
- `debug` mode changes logging, so always benchmark with debug off
|
||||||
369
internal/sandbox/benchmark_test.go
Normal file
369
internal/sandbox/benchmark_test.go
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
package sandbox
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/Use-Tusk/fence/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Baseline Benchmarks (unsandboxed)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// BenchmarkBaseline_True measures the cost of spawning a minimal process.
|
||||||
|
func BenchmarkBaseline_True(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
cmd := exec.Command("true")
|
||||||
|
_ = cmd.Run()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkBaseline_Echo measures echo command without sandbox.
|
||||||
|
func BenchmarkBaseline_Echo(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
cmd := exec.Command("sh", "-c", "echo hello")
|
||||||
|
_ = cmd.Run()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkBaseline_Python measures Python startup without sandbox.
|
||||||
|
func BenchmarkBaseline_Python(b *testing.B) {
|
||||||
|
if _, err := exec.LookPath("python3"); err != nil {
|
||||||
|
b.Skip("python3 not found")
|
||||||
|
}
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
cmd := exec.Command("python3", "-c", "pass")
|
||||||
|
_ = cmd.Run()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkBaseline_Node measures Node.js startup without sandbox.
|
||||||
|
func BenchmarkBaseline_Node(b *testing.B) {
|
||||||
|
if _, err := exec.LookPath("node"); err != nil {
|
||||||
|
b.Skip("node not found")
|
||||||
|
}
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
cmd := exec.Command("node", "-e", "")
|
||||||
|
_ = cmd.Run()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkBaseline_GitStatus measures git status without sandbox.
|
||||||
|
func BenchmarkBaseline_GitStatus(b *testing.B) {
|
||||||
|
if _, err := exec.LookPath("git"); err != nil {
|
||||||
|
b.Skip("git not found")
|
||||||
|
}
|
||||||
|
// Find a git repo to run in
|
||||||
|
repoDir := findGitRepo()
|
||||||
|
if repoDir == "" {
|
||||||
|
b.Skip("no git repo found")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
cmd := exec.Command("git", "status", "--porcelain")
|
||||||
|
cmd.Dir = repoDir
|
||||||
|
cmd.Stdout = nil // discard
|
||||||
|
_ = cmd.Run()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Component Benchmarks (isolate overhead sources)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// BenchmarkManagerInitialize measures cold initialization cost (proxies + bridges).
|
||||||
|
func BenchmarkManagerInitialize(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
|
||||||
|
workspace := b.TempDir()
|
||||||
|
cfg := benchConfig(workspace)
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("failed to initialize: %v", err)
|
||||||
|
}
|
||||||
|
manager.Cleanup()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkWrapCommand measures the cost of command wrapping (string construction only).
|
||||||
|
func BenchmarkWrapCommand(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
|
||||||
|
workspace := b.TempDir()
|
||||||
|
cfg := benchConfig(workspace)
|
||||||
|
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("failed to initialize: %v", err)
|
||||||
|
}
|
||||||
|
defer manager.Cleanup()
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_, err := manager.WrapCommand("echo hello")
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("wrap failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Cold Sandbox Benchmarks (full init + wrap + exec each iteration)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// BenchmarkColdSandbox_True measures full cold-start sandbox cost.
|
||||||
|
func BenchmarkColdSandbox_True(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
|
||||||
|
workspace := b.TempDir()
|
||||||
|
cfg := benchConfig(workspace)
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("init failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
wrappedCmd, err := manager.WrapCommand("true")
|
||||||
|
if err != nil {
|
||||||
|
manager.Cleanup()
|
||||||
|
b.Fatalf("wrap failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
execBenchCommand(b, wrappedCmd, workspace)
|
||||||
|
manager.Cleanup()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Warm Sandbox Benchmarks (Manager.Initialize once, repeat WrapCommand + exec)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// BenchmarkWarmSandbox_True measures sandbox cost with pre-initialized manager.
|
||||||
|
func BenchmarkWarmSandbox_True(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
|
||||||
|
workspace := b.TempDir()
|
||||||
|
cfg := benchConfig(workspace)
|
||||||
|
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("init failed: %v", err)
|
||||||
|
}
|
||||||
|
defer manager.Cleanup()
|
||||||
|
|
||||||
|
wrappedCmd, err := manager.WrapCommand("true")
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("wrap failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
execBenchCommand(b, wrappedCmd, workspace)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkWarmSandbox_Echo measures echo command with pre-initialized manager.
|
||||||
|
func BenchmarkWarmSandbox_Echo(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
|
||||||
|
workspace := b.TempDir()
|
||||||
|
cfg := benchConfig(workspace)
|
||||||
|
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("init failed: %v", err)
|
||||||
|
}
|
||||||
|
defer manager.Cleanup()
|
||||||
|
|
||||||
|
wrappedCmd, err := manager.WrapCommand("echo hello")
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("wrap failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
execBenchCommand(b, wrappedCmd, workspace)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkWarmSandbox_Python measures Python startup with pre-initialized manager.
|
||||||
|
func BenchmarkWarmSandbox_Python(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
if _, err := exec.LookPath("python3"); err != nil {
|
||||||
|
b.Skip("python3 not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
workspace := b.TempDir()
|
||||||
|
cfg := benchConfig(workspace)
|
||||||
|
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("init failed: %v", err)
|
||||||
|
}
|
||||||
|
defer manager.Cleanup()
|
||||||
|
|
||||||
|
wrappedCmd, err := manager.WrapCommand("python3 -c 'pass'")
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("wrap failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
execBenchCommand(b, wrappedCmd, workspace)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkWarmSandbox_FileWrite measures file write with pre-initialized manager.
|
||||||
|
func BenchmarkWarmSandbox_FileWrite(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
|
||||||
|
workspace := b.TempDir()
|
||||||
|
cfg := benchConfig(workspace)
|
||||||
|
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("init failed: %v", err)
|
||||||
|
}
|
||||||
|
defer manager.Cleanup()
|
||||||
|
|
||||||
|
testFile := filepath.Join(workspace, "bench.txt")
|
||||||
|
wrappedCmd, err := manager.WrapCommand("echo 'benchmark data' > " + testFile)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("wrap failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
execBenchCommand(b, wrappedCmd, workspace)
|
||||||
|
_ = os.Remove(testFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BenchmarkWarmSandbox_GitStatus measures git status with pre-initialized manager.
|
||||||
|
func BenchmarkWarmSandbox_GitStatus(b *testing.B) {
|
||||||
|
skipBenchIfSandboxed(b)
|
||||||
|
if _, err := exec.LookPath("git"); err != nil {
|
||||||
|
b.Skip("git not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
repoDir := findGitRepo()
|
||||||
|
if repoDir == "" {
|
||||||
|
b.Skip("no git repo found")
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := benchConfig(repoDir)
|
||||||
|
|
||||||
|
manager := NewManager(cfg, false, false)
|
||||||
|
if err := manager.Initialize(); err != nil {
|
||||||
|
b.Fatalf("init failed: %v", err)
|
||||||
|
}
|
||||||
|
defer manager.Cleanup()
|
||||||
|
|
||||||
|
wrappedCmd, err := manager.WrapCommand("git status --porcelain")
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("wrap failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
execBenchCommand(b, wrappedCmd, repoDir)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Comparison Sub-benchmarks
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// BenchmarkOverhead runs baseline vs sandbox comparisons for easy diffing.
|
||||||
|
func BenchmarkOverhead(b *testing.B) {
|
||||||
|
b.Run("Baseline/True", BenchmarkBaseline_True)
|
||||||
|
b.Run("Baseline/Echo", BenchmarkBaseline_Echo)
|
||||||
|
b.Run("Baseline/Python", BenchmarkBaseline_Python)
|
||||||
|
|
||||||
|
b.Run("Warm/True", BenchmarkWarmSandbox_True)
|
||||||
|
b.Run("Warm/Echo", BenchmarkWarmSandbox_Echo)
|
||||||
|
b.Run("Warm/Python", BenchmarkWarmSandbox_Python)
|
||||||
|
|
||||||
|
b.Run("Cold/True", BenchmarkColdSandbox_True)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Helpers
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
func skipBenchIfSandboxed(b *testing.B) {
|
||||||
|
b.Helper()
|
||||||
|
if os.Getenv("FENCE_SANDBOX") == "1" {
|
||||||
|
b.Skip("already running inside Fence sandbox")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchConfig(workspace string) *config.Config {
|
||||||
|
return &config.Config{
|
||||||
|
Network: config.NetworkConfig{
|
||||||
|
AllowedDomains: []string{},
|
||||||
|
},
|
||||||
|
Filesystem: config.FilesystemConfig{
|
||||||
|
AllowWrite: []string{workspace},
|
||||||
|
},
|
||||||
|
Command: config.CommandConfig{
|
||||||
|
UseDefaults: boolPtr(false),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func execBenchCommand(b *testing.B, command string, workDir string) {
|
||||||
|
b.Helper()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
shell := "/bin/sh"
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
shell = "/bin/bash"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(ctx, shell, "-c", command)
|
||||||
|
cmd.Dir = workDir
|
||||||
|
cmd.Stdout = &bytes.Buffer{}
|
||||||
|
cmd.Stderr = &bytes.Buffer{}
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
// Don't fail on command errors - we're measuring timing, not correctness
|
||||||
|
// (e.g., git status might fail if not in a repo)
|
||||||
|
_ = err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func findGitRepo() string {
|
||||||
|
// Try current directory and parents
|
||||||
|
dir, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
if _, err := os.Stat(filepath.Join(dir, ".git")); err == nil {
|
||||||
|
return dir
|
||||||
|
}
|
||||||
|
parent := filepath.Dir(dir)
|
||||||
|
if parent == dir {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
dir = parent
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
396
scripts/benchmark.sh
Executable file
396
scripts/benchmark.sh
Executable file
@@ -0,0 +1,396 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# benchmark.sh - Comprehensive sandbox benchmarking
|
||||||
|
#
|
||||||
|
# This script compares sandbox overhead between:
|
||||||
|
# - Unsandboxed (baseline)
|
||||||
|
# - Sandboxed (default mode)
|
||||||
|
# - Sandboxed with monitor (-m)
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./scripts/benchmark.sh [options]
|
||||||
|
#
|
||||||
|
# Options:
|
||||||
|
# -b, --binary PATH Path to fence binary (default: ./fence or builds one)
|
||||||
|
# -o, --output DIR Output directory for results (default: ./benchmarks)
|
||||||
|
# -n, --runs N Minimum runs per benchmark (default: 30)
|
||||||
|
# -q, --quick Quick mode: fewer runs, skip slow benchmarks
|
||||||
|
# --network Include network benchmarks (requires local server)
|
||||||
|
# -h, --help Show this help
|
||||||
|
#
|
||||||
|
# Requirements:
|
||||||
|
# - hyperfine (brew install hyperfine / apt install hyperfine)
|
||||||
|
# - go (for building fence if needed)
|
||||||
|
# - Optional: python3 (for local-server.py network benchmarks)
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
# Defaults
|
||||||
|
FENCE_BIN=""
|
||||||
|
OUTPUT_DIR="./benchmarks"
|
||||||
|
MIN_RUNS=30
|
||||||
|
WARMUP=3
|
||||||
|
QUICK=false
|
||||||
|
NETWORK=false
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-b|--binary)
|
||||||
|
FENCE_BIN="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-o|--output)
|
||||||
|
OUTPUT_DIR="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-n|--runs)
|
||||||
|
MIN_RUNS="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-q|--quick)
|
||||||
|
QUICK=true
|
||||||
|
MIN_RUNS=10
|
||||||
|
WARMUP=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--network)
|
||||||
|
NETWORK=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
head -30 "$0" | tail -28
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Find or build fence binary
|
||||||
|
if [[ -z "$FENCE_BIN" ]]; then
|
||||||
|
if [[ -x "./fence" ]]; then
|
||||||
|
FENCE_BIN="./fence"
|
||||||
|
elif [[ -x "./dist/fence" ]]; then
|
||||||
|
FENCE_BIN="./dist/fence"
|
||||||
|
else
|
||||||
|
echo -e "${BLUE}Building fence...${NC}"
|
||||||
|
go build -o ./fence ./cmd/fence
|
||||||
|
FENCE_BIN="./fence"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -x "$FENCE_BIN" ]]; then
|
||||||
|
echo -e "${RED}Error: fence binary not found at $FENCE_BIN${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for hyperfine
|
||||||
|
if ! command -v hyperfine &> /dev/null; then
|
||||||
|
echo -e "${RED}Error: hyperfine not found. Install with:${NC}"
|
||||||
|
echo " brew install hyperfine # macOS"
|
||||||
|
echo " apt install hyperfine # Linux"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
mkdir -p "$OUTPUT_DIR"
|
||||||
|
|
||||||
|
# Create workspace in current directory (not /tmp, which bwrap overlays)
|
||||||
|
WORKSPACE=$(mktemp -d -p .)
|
||||||
|
trap 'rm -rf "$WORKSPACE"' EXIT
|
||||||
|
|
||||||
|
# Create settings file for sandbox
|
||||||
|
SETTINGS_FILE="$WORKSPACE/fence.json"
|
||||||
|
cat > "$SETTINGS_FILE" << EOF
|
||||||
|
{
|
||||||
|
"filesystem": {
|
||||||
|
"allowWrite": ["$WORKSPACE", "."]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Platform info
|
||||||
|
OS=$(uname -s)
|
||||||
|
ARCH=$(uname -m)
|
||||||
|
KERNEL=$(uname -r)
|
||||||
|
DATE=$(date +%Y-%m-%d)
|
||||||
|
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||||
|
|
||||||
|
# Results file
|
||||||
|
RESULTS_JSON="$OUTPUT_DIR/${OS,,}-${ARCH}-${TIMESTAMP}.json"
|
||||||
|
RESULTS_MD="$OUTPUT_DIR/${OS,,}-${ARCH}-${TIMESTAMP}.md"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${BLUE}==========================================${NC}"
|
||||||
|
echo -e "${BLUE}Fence Sandbox Benchmarks${NC}"
|
||||||
|
echo -e "${BLUE}==========================================${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Platform: $OS $ARCH"
|
||||||
|
echo "Kernel: $KERNEL"
|
||||||
|
echo "Date: $DATE"
|
||||||
|
echo "Fence: $FENCE_BIN"
|
||||||
|
echo "Output: $OUTPUT_DIR"
|
||||||
|
echo "Min runs: $MIN_RUNS"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Helper to run hyperfine with consistent options
|
||||||
|
run_bench() {
|
||||||
|
local name="$1"
|
||||||
|
shift
|
||||||
|
local json_file="$WORKSPACE/${name}.json"
|
||||||
|
|
||||||
|
echo -e "${GREEN}Benchmarking: $name${NC}"
|
||||||
|
|
||||||
|
hyperfine \
|
||||||
|
--warmup "$WARMUP" \
|
||||||
|
--min-runs "$MIN_RUNS" \
|
||||||
|
--export-json "$json_file" \
|
||||||
|
--style basic \
|
||||||
|
"$@"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Spawn-only benchmarks (minimal process overhead)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
echo -e "${YELLOW}=== Spawn-Only Benchmarks ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
run_bench "true" \
|
||||||
|
--command-name "unsandboxed" "true" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -- true"
|
||||||
|
|
||||||
|
run_bench "echo" \
|
||||||
|
--command-name "unsandboxed" "echo hello >/dev/null" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c 'echo hello' >/dev/null"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Tool compatibility benchmarks
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
echo -e "${YELLOW}=== Tool Compatibility Benchmarks ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if command -v python3 &> /dev/null; then
|
||||||
|
run_bench "python" \
|
||||||
|
--command-name "unsandboxed" "python3 -c 'pass'" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"python3 -c 'pass'\""
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}Skipping python3 (not found)${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v node &> /dev/null && [[ "$QUICK" == "false" ]]; then
|
||||||
|
run_bench "node" \
|
||||||
|
--command-name "unsandboxed" "node -e ''" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"node -e ''\""
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}Skipping node (not found or quick mode)${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Real workload benchmarks
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
echo -e "${YELLOW}=== Real Workload Benchmarks ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if command -v git &> /dev/null && [[ -d .git ]]; then
|
||||||
|
run_bench "git-status" \
|
||||||
|
--command-name "unsandboxed" "git status --porcelain >/dev/null" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -- git status --porcelain >/dev/null"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}Skipping git status (not in a git repo)${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v rg &> /dev/null && [[ "$QUICK" == "false" ]]; then
|
||||||
|
run_bench "ripgrep" \
|
||||||
|
--command-name "unsandboxed" "rg -n 'package' -S . >/dev/null 2>&1 || true" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"rg -n 'package' -S . >/dev/null 2>&1\" || true"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}Skipping ripgrep (not found or quick mode)${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# File I/O benchmarks
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
echo -e "${YELLOW}=== File I/O Benchmarks ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
run_bench "file-write" \
|
||||||
|
--command-name "unsandboxed" "echo 'test' > $WORKSPACE/test.txt" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c \"echo 'test' > $WORKSPACE/test.txt\""
|
||||||
|
|
||||||
|
run_bench "file-read" \
|
||||||
|
--command-name "unsandboxed" "cat $WORKSPACE/test.txt >/dev/null" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -c 'cat $WORKSPACE/test.txt' >/dev/null"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Monitor mode benchmarks (optional)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
if [[ "$QUICK" == "false" ]]; then
|
||||||
|
echo -e "${YELLOW}=== Monitor Mode Benchmarks ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
run_bench "monitor-true" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $SETTINGS_FILE -- true" \
|
||||||
|
--command-name "sandboxed+monitor" "$FENCE_BIN -m -s $SETTINGS_FILE -- true"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Network benchmarks (optional, requires local server)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
if [[ "$NETWORK" == "true" ]]; then
|
||||||
|
echo -e "${YELLOW}=== Network Benchmarks ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Start local server
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
if [[ -f "$SCRIPT_DIR/local-server.py" ]]; then
|
||||||
|
python3 "$SCRIPT_DIR/local-server.py" &
|
||||||
|
SERVER_PID=$!
|
||||||
|
trap 'kill $SERVER_PID 2>/dev/null || true; rm -rf "$WORKSPACE"' EXIT
|
||||||
|
sleep 1
|
||||||
|
|
||||||
|
# Create network settings
|
||||||
|
NET_SETTINGS="$WORKSPACE/fence-net.json"
|
||||||
|
cat > "$NET_SETTINGS" << EOF
|
||||||
|
{
|
||||||
|
"network": {
|
||||||
|
"allowedDomains": ["127.0.0.1", "localhost"]
|
||||||
|
},
|
||||||
|
"filesystem": {
|
||||||
|
"allowWrite": ["$WORKSPACE"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
if command -v curl &> /dev/null; then
|
||||||
|
run_bench "network-curl" \
|
||||||
|
--command-name "unsandboxed" "curl -s http://127.0.0.1:8765/ >/dev/null" \
|
||||||
|
--command-name "sandboxed" "$FENCE_BIN -s $NET_SETTINGS -c 'curl -s http://127.0.0.1:8765/' >/dev/null"
|
||||||
|
fi
|
||||||
|
|
||||||
|
kill $SERVER_PID 2>/dev/null || true
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}Skipping network benchmarks (local-server.py not found)${NC}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Combine results and generate report
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
echo -e "${YELLOW}=== Generating Report ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Combine all JSON results
|
||||||
|
echo "{" > "$RESULTS_JSON"
|
||||||
|
echo " \"platform\": \"$OS\"," >> "$RESULTS_JSON"
|
||||||
|
echo " \"arch\": \"$ARCH\"," >> "$RESULTS_JSON"
|
||||||
|
echo " \"kernel\": \"$KERNEL\"," >> "$RESULTS_JSON"
|
||||||
|
echo " \"date\": \"$DATE\"," >> "$RESULTS_JSON"
|
||||||
|
echo " \"fence_version\": \"$($FENCE_BIN --version 2>/dev/null || echo unknown)\"," >> "$RESULTS_JSON"
|
||||||
|
echo " \"benchmarks\": {" >> "$RESULTS_JSON"
|
||||||
|
|
||||||
|
first=true
|
||||||
|
for json_file in "$WORKSPACE"/*.json; do
|
||||||
|
[[ -f "$json_file" ]] || continue
|
||||||
|
name=$(basename "$json_file" .json)
|
||||||
|
if [[ "$first" == "true" ]]; then
|
||||||
|
first=false
|
||||||
|
else
|
||||||
|
echo "," >> "$RESULTS_JSON"
|
||||||
|
fi
|
||||||
|
echo " \"$name\": $(cat "$json_file")" >> "$RESULTS_JSON"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "" >> "$RESULTS_JSON"
|
||||||
|
echo " }" >> "$RESULTS_JSON"
|
||||||
|
echo "}" >> "$RESULTS_JSON"
|
||||||
|
|
||||||
|
# Generate Markdown report
|
||||||
|
cat > "$RESULTS_MD" << EOF
|
||||||
|
# Fence Benchmark Results
|
||||||
|
|
||||||
|
**Platform:** $OS $ARCH
|
||||||
|
**Kernel:** $KERNEL
|
||||||
|
**Date:** $DATE
|
||||||
|
**Fence:** $($FENCE_BIN --version 2>/dev/null || echo unknown)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
| Benchmark | Unsandboxed | Sandboxed | Overhead |
|
||||||
|
|-----------|-------------|-----------|----------|
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Parse results and add to markdown (run in subshell to prevent failures from stopping script)
|
||||||
|
if command -v jq &> /dev/null; then
|
||||||
|
for json_file in "$WORKSPACE"/*.json; do
|
||||||
|
[[ -f "$json_file" ]] || continue
|
||||||
|
name=$(basename "$json_file" .json)
|
||||||
|
|
||||||
|
# Extract mean times, defaulting to empty if not found
|
||||||
|
unsandboxed=$(jq -r '.results[] | select(.command == "unsandboxed") | .mean // empty' "$json_file" 2>/dev/null) || true
|
||||||
|
sandboxed=$(jq -r '.results[] | select(.command == "sandboxed") | .mean // empty' "$json_file" 2>/dev/null) || true
|
||||||
|
|
||||||
|
# Skip if values are missing, null, or zero
|
||||||
|
if [[ -z "$unsandboxed" || -z "$sandboxed" || "$unsandboxed" == "null" || "$sandboxed" == "null" ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Calculate values, catching any bc errors
|
||||||
|
overhead=$(echo "scale=1; $sandboxed / $unsandboxed" | bc 2>/dev/null) || continue
|
||||||
|
unsandboxed_ms=$(echo "scale=2; $unsandboxed * 1000" | bc 2>/dev/null) || continue
|
||||||
|
sandboxed_ms=$(echo "scale=2; $sandboxed * 1000" | bc 2>/dev/null) || continue
|
||||||
|
|
||||||
|
if [[ -n "$overhead" && -n "$unsandboxed_ms" && -n "$sandboxed_ms" ]]; then
|
||||||
|
echo "| $name | ${unsandboxed_ms}ms | ${sandboxed_ms}ms | ${overhead}x |" >> "$RESULTS_MD"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}Results saved to:${NC}"
|
||||||
|
echo " JSON: $RESULTS_JSON"
|
||||||
|
echo " Markdown: $RESULTS_MD"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Print quick summary (errors in this section should not fail the script)
|
||||||
|
if command -v jq &> /dev/null; then
|
||||||
|
echo -e "${BLUE}Quick Summary (overhead factors):${NC}"
|
||||||
|
for json_file in "$WORKSPACE"/*.json; do
|
||||||
|
(
|
||||||
|
[[ -f "$json_file" ]] || exit 0
|
||||||
|
name=$(basename "$json_file" .json)
|
||||||
|
|
||||||
|
# Extract values, defaulting to empty if not found
|
||||||
|
unsandboxed=$(jq -r '.results[] | select(.command == "unsandboxed") | .mean // empty' "$json_file" 2>/dev/null) || exit 0
|
||||||
|
sandboxed=$(jq -r '.results[] | select(.command == "sandboxed") | .mean // empty' "$json_file" 2>/dev/null) || exit 0
|
||||||
|
|
||||||
|
# Skip if either value is missing or null
|
||||||
|
[[ -z "$unsandboxed" || -z "$sandboxed" || "$unsandboxed" == "null" || "$sandboxed" == "null" ]] && exit 0
|
||||||
|
|
||||||
|
# Calculate overhead, catching any bc errors
|
||||||
|
overhead=$(echo "scale=1; $sandboxed / $unsandboxed" | bc 2>/dev/null) || exit 0
|
||||||
|
|
||||||
|
[[ -n "$overhead" ]] && printf " %-15s %sx\n" "$name:" "$overhead"
|
||||||
|
) || true # Ignore errors from subshell
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}Done!${NC}"
|
||||||
63
scripts/local-server.py
Executable file
63
scripts/local-server.py
Executable file
@@ -0,0 +1,63 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple HTTP server for network benchmarking.
|
||||||
|
|
||||||
|
Runs on port 8765 and responds to all requests with a minimal JSON response.
|
||||||
|
Used by benchmark.sh to measure proxy overhead without internet variability.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/local-server.py
|
||||||
|
# Server runs on http://127.0.0.1:8765/
|
||||||
|
|
||||||
|
# In another terminal:
|
||||||
|
curl http://127.0.0.1:8765/
|
||||||
|
"""
|
||||||
|
|
||||||
|
import http.server
|
||||||
|
import json
|
||||||
|
import socketserver
|
||||||
|
import sys
|
||||||
|
|
||||||
|
PORT = 8765
|
||||||
|
|
||||||
|
|
||||||
|
class BenchmarkHandler(http.server.BaseHTTPRequestHandler):
|
||||||
|
"""Minimal HTTP handler for benchmarking."""
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
"""Handle GET requests with minimal response."""
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.end_headers()
|
||||||
|
response = {"status": "ok", "path": self.path}
|
||||||
|
self.wfile.write(json.dumps(response).encode())
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
"""Handle POST requests with minimal response."""
|
||||||
|
content_length = int(self.headers.get("Content-Length", 0))
|
||||||
|
_ = self.rfile.read(content_length) # Read and discard body
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.end_headers()
|
||||||
|
response = {"status": "ok", "method": "POST"}
|
||||||
|
self.wfile.write(json.dumps(response).encode())
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
"""Suppress request logging for cleaner benchmark output."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
socketserver.TCPServer.allow_reuse_address = True
|
||||||
|
with socketserver.TCPServer(("127.0.0.1", PORT), BenchmarkHandler) as httpd:
|
||||||
|
print(f"Benchmark server running on http://127.0.0.1:{PORT}/", file=sys.stderr)
|
||||||
|
print("Press Ctrl+C to stop", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
httpd.serve_forever()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nShutting down...", file=sys.stderr)
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user