This repository has been archived on 2026-03-13. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
greywall/internal/daemon/tun.go
Mathieu Virbel cfe29d2c0b feat: switch macOS daemon from user-based to group-based pf routing
Sandboxed commands previously ran as `sudo -u _greywall`, breaking user
identity (home dir, SSH keys, git config). Now uses `sudo -u #<uid> -g
_greywall` so the process keeps the real user's identity while pf
matches
on EGID for traffic routing.

Key changes:
- pf rules use `group <GID>` instead of `user _greywall`
- GID resolved dynamically at daemon startup (not hardcoded, since macOS
  system groups like com.apple.access_ssh may claim preferred IDs)
- Sudoers rule installed at /etc/sudoers.d/greywall (validated with
visudo)
- Invoking user added to _greywall group via dscl (not dseditgroup,
which
  clobbers group attributes)
- tun2socks device discovery scans both stdout and stderr (fixes 10s
  timeout caused by STACK message going to stdout)
- Always-on daemon logging for session create/destroy events
2026-02-26 09:56:15 -06:00

571 lines
18 KiB
Go

//go:build darwin
package daemon
import (
"bufio"
"fmt"
"io"
"os"
"os/exec"
"regexp"
"strings"
"sync"
"time"
)
const (
tunIP = "198.18.0.1"
dnsRelayIP = "127.0.0.2"
dnsRelayPort = "15353" // high port to avoid conflicts with system DNS (mDNSResponder, Docker/Lima)
pfAnchorName = "co.greyhaven.greywall"
// tun2socksStopGracePeriod is the time to wait for tun2socks to exit
// after SIGTERM before sending SIGKILL.
tun2socksStopGracePeriod = 5 * time.Second
)
// utunDevicePattern matches "utunN" device names in tun2socks output or ifconfig.
var utunDevicePattern = regexp.MustCompile(`(utun\d+)`)
// TunManager handles utun device creation via tun2socks, tun2socks process
// lifecycle, and pf (packet filter) rule management for routing sandboxed
// traffic through the tunnel on macOS.
type TunManager struct {
tunDevice string // e.g., "utun7"
tun2socksPath string // path to tun2socks binary
tun2socksCmd *exec.Cmd // running tun2socks process
proxyURL string // SOCKS5 proxy URL for tun2socks
pfAnchor string // pf anchor name
debug bool
done chan struct{}
mu sync.Mutex
}
// NewTunManager creates a new TunManager that will use the given tun2socks
// binary and SOCKS5 proxy URL. The pf anchor is set to "co.greyhaven.greywall".
func NewTunManager(tun2socksPath string, proxyURL string, debug bool) *TunManager {
return &TunManager{
tun2socksPath: tun2socksPath,
proxyURL: proxyURL,
pfAnchor: pfAnchorName,
debug: debug,
done: make(chan struct{}),
}
}
// Start brings up the full tunnel stack:
// 1. Start tun2socks with "-device utun" (it auto-creates a utunN device)
// 2. Discover which utunN device was created
// 3. Configure the utun interface IP
// 4. Set up a loopback alias for the DNS relay
// 5. Load pf anchor rules (deferred until LoadPFRules is called explicitly)
func (t *TunManager) Start() error {
t.mu.Lock()
defer t.mu.Unlock()
if t.tun2socksCmd != nil {
return fmt.Errorf("tun manager already started")
}
// Step 1: Start tun2socks. It creates the utun device automatically.
if err := t.startTun2Socks(); err != nil {
return fmt.Errorf("failed to start tun2socks: %w", err)
}
// Step 2: Configure the utun interface with a point-to-point IP.
if err := t.configureInterface(); err != nil {
_ = t.stopTun2Socks()
return fmt.Errorf("failed to configure interface %s: %w", t.tunDevice, err)
}
// Step 3: Add a loopback alias for the DNS relay address.
if err := t.addLoopbackAlias(); err != nil {
_ = t.stopTun2Socks()
return fmt.Errorf("failed to add loopback alias: %w", err)
}
t.logDebug("Tunnel stack started: device=%s proxy=%s", t.tunDevice, t.proxyURL)
return nil
}
// Stop tears down the tunnel stack in reverse order:
// 1. Unload pf rules
// 2. Stop tun2socks (SIGTERM, then SIGKILL after grace period)
// 3. Remove loopback alias
// 4. The utun device is destroyed automatically when tun2socks exits
func (t *TunManager) Stop() error {
t.mu.Lock()
defer t.mu.Unlock()
var errs []string
// Signal the monitoring goroutine to stop.
select {
case <-t.done:
// Already closed.
default:
close(t.done)
}
// Step 1: Unload pf rules (best effort).
if err := t.unloadPFRulesLocked(); err != nil {
errs = append(errs, fmt.Sprintf("unload pf rules: %v", err))
}
// Step 2: Stop tun2socks.
if err := t.stopTun2Socks(); err != nil {
errs = append(errs, fmt.Sprintf("stop tun2socks: %v", err))
}
// Step 3: Remove loopback alias (best effort).
if err := t.removeLoopbackAlias(); err != nil {
errs = append(errs, fmt.Sprintf("remove loopback alias: %v", err))
}
if len(errs) > 0 {
return fmt.Errorf("stop errors: %s", strings.Join(errs, "; "))
}
t.logDebug("Tunnel stack stopped")
return nil
}
// TunDevice returns the name of the utun device (e.g., "utun7").
// Returns an empty string if the tunnel has not been started.
func (t *TunManager) TunDevice() string {
t.mu.Lock()
defer t.mu.Unlock()
return t.tunDevice
}
// LoadPFRules loads pf anchor rules that route traffic from the given sandbox
// group through the utun device. The rules:
// - Route all TCP from the sandbox group through the utun interface
// - Redirect DNS (UDP port 53) from the sandbox group to the local DNS relay
//
// This requires root privileges and an active pf firewall.
func (t *TunManager) LoadPFRules(sandboxGroup string) error {
t.mu.Lock()
defer t.mu.Unlock()
if t.tunDevice == "" {
return fmt.Errorf("tunnel not started, no device available")
}
// Ensure the anchor reference exists in the main pf.conf.
if err := t.ensureAnchorInPFConf(); err != nil {
return fmt.Errorf("failed to ensure pf anchor: %w", err)
}
// Build the anchor rules. pf requires strict ordering:
// translation (rdr) before filtering (pass).
// Note: macOS pf does not support "group" in rdr rules, so DNS
// redirection uses a two-step approach:
// 1. rdr on lo0 — redirects DNS arriving on loopback to our relay
// 2. pass out route-to lo0 — sends sandbox group's DNS to loopback
// 3. pass out route-to utun — sends sandbox group's TCP through tunnel
rules := fmt.Sprintf(
"rdr on lo0 proto udp from any to any port 53 -> %s port %s\n"+
"pass out on !lo0 route-to (lo0 127.0.0.1) proto udp from any to any port 53 group %s\n"+
"pass out route-to (%s %s) proto tcp from any to any group %s\n",
dnsRelayIP, dnsRelayPort,
sandboxGroup,
t.tunDevice, tunIP, sandboxGroup,
)
t.logDebug("Loading pf rules into anchor %s:\n%s", t.pfAnchor, rules)
// Load the rules into the anchor.
//nolint:gosec // arguments are controlled internal constants, not user input
cmd := exec.Command("pfctl", "-a", t.pfAnchor, "-f", "-")
cmd.Stdin = strings.NewReader(rules)
cmd.Stderr = os.Stderr
if output, err := cmd.Output(); err != nil {
return fmt.Errorf("pfctl load rules failed: %w (output: %s)", err, string(output))
}
// Enable pf if it is not already enabled.
if err := t.enablePF(); err != nil {
// Non-fatal: pf may already be enabled.
t.logDebug("Warning: failed to enable pf (may already be active): %v", err)
}
t.logDebug("pf rules loaded for group %s on %s", sandboxGroup, t.tunDevice)
return nil
}
// UnloadPFRules removes the pf rules from the anchor.
func (t *TunManager) UnloadPFRules() error {
t.mu.Lock()
defer t.mu.Unlock()
return t.unloadPFRulesLocked()
}
// startTun2Socks launches the tun2socks process with "-device utun" so that it
// auto-creates a utun device. The device name is discovered by scanning tun2socks
// stderr output for the utunN identifier.
func (t *TunManager) startTun2Socks() error {
//nolint:gosec // tun2socksPath is an internal path, not user input
cmd := exec.Command(t.tun2socksPath, "-device", "utun", "-proxy", t.proxyURL)
// Capture both stdout and stderr to discover the device name.
// tun2socks may log the device name on either stream depending on version.
stderrPipe, err := cmd.StderrPipe()
if err != nil {
return fmt.Errorf("failed to create stderr pipe: %w", err)
}
stdoutPipe, err := cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("failed to create stdout pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start tun2socks: %w", err)
}
t.tun2socksCmd = cmd
// Read both stdout and stderr to discover the utun device name.
// tun2socks logs the device name shortly after startup
// (e.g., "level=INFO msg=[STACK] tun://utun7 <-> ...").
deviceCh := make(chan string, 2) // buffered for both goroutines
stderrLines := make(chan string, 100)
// scanPipe scans lines from a pipe, looking for the utun device name.
scanPipe := func(pipe io.Reader, label string) {
scanner := bufio.NewScanner(pipe)
for scanner.Scan() {
line := scanner.Text()
fmt.Fprintf(os.Stderr, "[greywall:tun] tun2socks(%s): %s\n", label, line) //nolint:gosec // logging tun2socks output
if match := utunDevicePattern.FindString(line); match != "" {
select {
case deviceCh <- match:
default:
// Already found by the other pipe.
}
}
select {
case stderrLines <- line:
default:
}
}
}
go scanPipe(stderrPipe, "stderr")
go scanPipe(stdoutPipe, "stdout")
// Wait for the device name with a timeout.
select {
case device := <-deviceCh:
if device == "" {
t.logDebug("Empty device from tun2socks output, trying ifconfig")
device, err = t.discoverUtunFromIfconfig()
if err != nil {
_ = cmd.Process.Kill()
return fmt.Errorf("failed to discover utun device: %w", err)
}
}
t.tunDevice = device
case <-time.After(10 * time.Second):
// Timeout: try ifconfig fallback.
t.logDebug("Timeout waiting for tun2socks device name, trying ifconfig")
device, err := t.discoverUtunFromIfconfig()
if err != nil {
_ = cmd.Process.Kill()
return fmt.Errorf("tun2socks did not report device name within timeout: %w", err)
}
t.tunDevice = device
}
t.logDebug("tun2socks started (pid=%d, device=%s)", cmd.Process.Pid, t.tunDevice)
// Monitor tun2socks in the background.
go t.monitorTun2Socks(stderrLines)
return nil
}
// discoverUtunFromIfconfig runs ifconfig and looks for a utun device. This is
// used as a fallback when we cannot parse the device name from tun2socks output.
func (t *TunManager) discoverUtunFromIfconfig() (string, error) {
out, err := exec.Command("ifconfig").Output()
if err != nil {
return "", fmt.Errorf("ifconfig failed: %w", err)
}
// Look for utun interfaces. We scan for lines starting with "utunN:"
// and return the highest-numbered one (most recently created).
ifPattern := regexp.MustCompile(`^(utun\d+):`)
var lastDevice string
for _, line := range strings.Split(string(out), "\n") {
if m := ifPattern.FindStringSubmatch(line); m != nil {
lastDevice = m[1]
}
}
if lastDevice == "" {
return "", fmt.Errorf("no utun device found in ifconfig output")
}
return lastDevice, nil
}
// monitorTun2Socks watches the tun2socks process and logs if it exits unexpectedly.
func (t *TunManager) monitorTun2Socks(stderrLines <-chan string) {
if t.tun2socksCmd == nil || t.tun2socksCmd.Process == nil {
return
}
// Drain any remaining stderr lines.
go func() {
for range stderrLines {
// Already logged in the scanner goroutine when debug is on.
}
}()
err := t.tun2socksCmd.Wait()
select {
case <-t.done:
// Expected shutdown.
t.logDebug("tun2socks exited (expected shutdown)")
default:
// Unexpected exit.
fmt.Fprintf(os.Stderr, "[greywall:tun] ERROR: tun2socks exited unexpectedly: %v\n", err)
}
}
// stopTun2Socks sends SIGTERM to the tun2socks process and waits for it to exit.
// If it does not exit within the grace period, SIGKILL is sent.
func (t *TunManager) stopTun2Socks() error {
if t.tun2socksCmd == nil || t.tun2socksCmd.Process == nil {
return nil
}
t.logDebug("Stopping tun2socks (pid=%d)", t.tun2socksCmd.Process.Pid)
// Send SIGTERM.
if err := t.tun2socksCmd.Process.Signal(os.Interrupt); err != nil {
// Process may have already exited.
t.logDebug("SIGTERM failed (process may have exited): %v", err)
t.tun2socksCmd = nil
return nil
}
// Wait for exit with a timeout.
exited := make(chan error, 1)
go func() {
// Wait may have already been called by the monitor goroutine,
// in which case this will return immediately.
exited <- t.tun2socksCmd.Wait()
}()
select {
case err := <-exited:
if err != nil {
t.logDebug("tun2socks exited with: %v", err)
}
case <-time.After(tun2socksStopGracePeriod):
t.logDebug("tun2socks did not exit after SIGTERM, sending SIGKILL")
_ = t.tun2socksCmd.Process.Kill()
}
t.tun2socksCmd = nil
return nil
}
// configureInterface sets up the utun interface with a point-to-point IP address.
func (t *TunManager) configureInterface() error {
t.logDebug("Configuring interface %s with IP %s", t.tunDevice, tunIP)
//nolint:gosec // tunDevice and tunIP are controlled internal values
cmd := exec.Command("ifconfig", t.tunDevice, tunIP, tunIP, "up")
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("ifconfig %s failed: %w (output: %s)", t.tunDevice, err, string(output))
}
return nil
}
// addLoopbackAlias adds an alias IP on lo0 for the DNS relay.
func (t *TunManager) addLoopbackAlias() error {
t.logDebug("Adding loopback alias %s on lo0", dnsRelayIP)
cmd := exec.Command("ifconfig", "lo0", "alias", dnsRelayIP, "up")
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("ifconfig lo0 alias failed: %w (output: %s)", err, string(output))
}
return nil
}
// removeLoopbackAlias removes the DNS relay alias from lo0.
func (t *TunManager) removeLoopbackAlias() error {
t.logDebug("Removing loopback alias %s from lo0", dnsRelayIP)
cmd := exec.Command("ifconfig", "lo0", "-alias", dnsRelayIP)
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("ifconfig lo0 -alias failed: %w (output: %s)", err, string(output))
}
return nil
}
// ensureAnchorInPFConf checks whether the pf anchor reference exists in
// /etc/pf.conf. If not, it inserts the anchor lines at the correct positions
// (pf requires strict ordering: rdr-anchor before anchor, both before load anchor)
// and reloads the main ruleset.
func (t *TunManager) ensureAnchorInPFConf() error {
const pfConfPath = "/etc/pf.conf"
anchorLine := fmt.Sprintf(`anchor "%s"`, t.pfAnchor)
rdrAnchorLine := fmt.Sprintf(`rdr-anchor "%s"`, t.pfAnchor)
data, err := os.ReadFile(pfConfPath)
if err != nil {
return fmt.Errorf("failed to read %s: %w", pfConfPath, err)
}
lines := strings.Split(string(data), "\n")
// Line-level presence check avoids substring false positives
// (e.g. 'anchor "X"' matching inside 'rdr-anchor "X"').
hasAnchor := false
hasRdrAnchor := false
lastRdrIdx := -1
lastAnchorIdx := -1
for i, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == rdrAnchorLine {
hasRdrAnchor = true
}
if trimmed == anchorLine {
hasAnchor = true
}
if strings.HasPrefix(trimmed, "rdr-anchor ") {
lastRdrIdx = i
}
// Standalone "anchor" lines — not rdr-anchor, nat-anchor, etc.
if strings.HasPrefix(trimmed, "anchor ") {
lastAnchorIdx = i
}
}
if hasAnchor && hasRdrAnchor {
t.logDebug("pf anchor already present in %s", pfConfPath)
return nil
}
t.logDebug("Adding pf anchor to %s", pfConfPath)
// Insert at the correct positions. Process in reverse index order
// so earlier insertions don't shift later indices.
var result []string
for i, line := range lines {
result = append(result, line)
if !hasRdrAnchor && i == lastRdrIdx {
result = append(result, rdrAnchorLine)
}
if !hasAnchor && i == lastAnchorIdx {
result = append(result, anchorLine)
}
}
// Fallback: if no existing rdr-anchor/anchor found, append at end.
if !hasRdrAnchor && lastRdrIdx == -1 {
result = append(result, rdrAnchorLine)
}
if !hasAnchor && lastAnchorIdx == -1 {
result = append(result, anchorLine)
}
newContent := strings.Join(result, "\n")
//nolint:gosec // pf.conf must be writable by root; the daemon runs as root
if err := os.WriteFile(pfConfPath, []byte(newContent), 0o644); err != nil {
return fmt.Errorf("failed to write %s: %w", pfConfPath, err)
}
// Reload the main pf.conf so the anchor reference is recognized.
//nolint:gosec // pfConfPath is a constant
reloadCmd := exec.Command("pfctl", "-f", pfConfPath)
if output, err := reloadCmd.CombinedOutput(); err != nil {
return fmt.Errorf("pfctl reload failed: %w (output: %s)", err, string(output))
}
t.logDebug("pf anchor added and pf.conf reloaded")
return nil
}
// enablePF enables the pf firewall if it is not already active.
func (t *TunManager) enablePF() error {
// Check current pf status.
out, err := exec.Command("pfctl", "-s", "info").CombinedOutput()
if err == nil && strings.Contains(string(out), "Status: Enabled") {
t.logDebug("pf is already enabled")
return nil
}
t.logDebug("Enabling pf")
cmd := exec.Command("pfctl", "-e")
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("pfctl -e failed: %w (output: %s)", err, string(output))
}
return nil
}
// unloadPFRulesLocked flushes all rules from the pf anchor. Must be called
// with t.mu held.
func (t *TunManager) unloadPFRulesLocked() error {
t.logDebug("Flushing pf anchor %s", t.pfAnchor)
//nolint:gosec // pfAnchor is a controlled internal constant
cmd := exec.Command("pfctl", "-a", t.pfAnchor, "-F", "all")
if output, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("pfctl flush anchor failed: %w (output: %s)", err, string(output))
}
return nil
}
// removeAnchorFromPFConf removes greywall anchor lines from /etc/pf.conf.
// Called during uninstall to clean up.
func removeAnchorFromPFConf(debug bool) error {
const pfConfPath = "/etc/pf.conf"
anchorLine := fmt.Sprintf(`anchor "%s"`, pfAnchorName)
rdrAnchorLine := fmt.Sprintf(`rdr-anchor "%s"`, pfAnchorName)
data, err := os.ReadFile(pfConfPath)
if err != nil {
return fmt.Errorf("failed to read %s: %w", pfConfPath, err)
}
lines := strings.Split(string(data), "\n")
var filtered []string
removed := 0
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == anchorLine || trimmed == rdrAnchorLine {
removed++
continue
}
filtered = append(filtered, line)
}
if removed == 0 {
logDebug(debug, "No pf anchor lines to remove from %s", pfConfPath)
return nil
}
//nolint:gosec // pf.conf must be writable by root; the daemon runs as root
if err := os.WriteFile(pfConfPath, []byte(strings.Join(filtered, "\n")), 0o644); err != nil {
return fmt.Errorf("failed to write %s: %w", pfConfPath, err)
}
logDebug(debug, "Removed %d pf anchor lines from %s", removed, pfConfPath)
return nil
}
// logDebug writes a debug message to stderr with the [greywall:tun] prefix.
func (t *TunManager) logDebug(format string, args ...interface{}) {
if t.debug {
fmt.Fprintf(os.Stderr, "[greywall:tun] "+format+"\n", args...)
}
}