From 9d5d85286056408d0e7f302d71f4d4c0ff98eecf Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Thu, 26 Feb 2026 17:23:43 -0600 Subject: [PATCH] feat: switch macOS learning mode from fs_usage to eslogger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace fs_usage (reports Mach thread IDs, requiring process name matching with false positives) with eslogger (Endpoint Security framework, reports real Unix PIDs via audit_token.pid plus fork events for process tree tracking). Key changes: - Daemon starts eslogger instead of fs_usage, with early-exit detection and clear Full Disk Access error messaging - New two-pass eslogger JSON parser: pass 1 builds PID tree from fork events, pass 2 filters filesystem events by PID set - Remove runtime PID polling (StartPIDTracking, pollDescendantPIDs) — process tree is now built post-hoc from the eslogger log - Platform-specific generateLearnedTemplatePlatform() for darwin/linux/stub - Refactor TraceResult and GenerateLearnedTemplate to be platform-agnostic --- cmd/greywall/main.go | 8 +- internal/daemon/client.go | 37 ++ internal/daemon/server.go | 193 +++++++- internal/sandbox/learning.go | 33 +- internal/sandbox/learning_darwin.go | 459 +++++++++++++++++++ internal/sandbox/learning_darwin_test.go | 557 +++++++++++++++++++++++ internal/sandbox/learning_linux.go | 14 +- internal/sandbox/learning_linux_test.go | 4 +- internal/sandbox/learning_stub.go | 19 +- internal/sandbox/learning_test.go | 23 +- internal/sandbox/manager.go | 74 ++- internal/sandbox/manager_darwin.go | 42 ++ internal/sandbox/manager_linux.go | 31 ++ internal/sandbox/manager_stub.go | 10 + 14 files changed, 1434 insertions(+), 70 deletions(-) create mode 100644 internal/sandbox/learning_darwin.go create mode 100644 internal/sandbox/learning_darwin_test.go create mode 100644 internal/sandbox/manager_darwin.go create mode 100644 internal/sandbox/manager_linux.go create mode 100644 internal/sandbox/manager_stub.go diff --git a/cmd/greywall/main.go b/cmd/greywall/main.go index 5f33895..90eb31d 100644 --- a/cmd/greywall/main.go +++ b/cmd/greywall/main.go @@ -267,7 +267,7 @@ func runCommand(cmd *cobra.Command, args []string) error { // Learning mode setup if learning { - if err := sandbox.CheckStraceAvailable(); err != nil { + if err := sandbox.CheckLearningAvailable(); err != nil { return err } fmt.Fprintf(os.Stderr, "[greywall] Learning mode: tracing filesystem access for %q\n", cmdName) @@ -305,6 +305,7 @@ func runCommand(cmd *cobra.Command, args []string) error { if debug { fmt.Fprintf(os.Stderr, "[greywall] Sandboxed command: %s\n", sandboxedCommand) + fmt.Fprintf(os.Stderr, "[greywall] Executing: sh -c %q\n", sandboxedCommand) } hardenedEnv := sandbox.GetHardenedEnv() @@ -328,6 +329,11 @@ func runCommand(cmd *cobra.Command, args []string) error { return fmt.Errorf("failed to start command: %w", err) } + // Record root PID for macOS learning mode (eslogger uses this for process tree tracking) + if learning && platform.Detect() == platform.MacOS && execCmd.Process != nil { + manager.SetLearningRootPID(execCmd.Process.Pid) + } + // Start Linux monitors (eBPF tracing for filesystem violations) var linuxMonitors *sandbox.LinuxMonitors if monitor && execCmd.Process != nil { diff --git a/internal/daemon/client.go b/internal/daemon/client.go index fa12c78..df64edf 100644 --- a/internal/daemon/client.go +++ b/internal/daemon/client.go @@ -71,6 +71,43 @@ func (c *Client) DestroySession(sessionID string) error { return nil } +// StartLearning asks the daemon to start an fs_usage trace for learning mode. +func (c *Client) StartLearning() (*Response, error) { + req := Request{ + Action: "start_learning", + } + + resp, err := c.sendRequest(req) + if err != nil { + return nil, fmt.Errorf("start learning request failed: %w", err) + } + + if !resp.OK { + return resp, fmt.Errorf("start learning failed: %s", resp.Error) + } + + return resp, nil +} + +// StopLearning asks the daemon to stop the fs_usage trace for the given learning session. +func (c *Client) StopLearning(learningID string) error { + req := Request{ + Action: "stop_learning", + LearningID: learningID, + } + + resp, err := c.sendRequest(req) + if err != nil { + return fmt.Errorf("stop learning request failed: %w", err) + } + + if !resp.OK { + return fmt.Errorf("stop learning failed: %s", resp.Error) + } + + return nil +} + // Status queries the daemon for its current status. func (c *Client) Status() (*Response, error) { req := Request{ diff --git a/internal/daemon/server.go b/internal/daemon/server.go index b2bebf7..a7269ec 100644 --- a/internal/daemon/server.go +++ b/internal/daemon/server.go @@ -7,8 +7,11 @@ import ( "fmt" "net" "os" + "os/exec" "os/user" + "strings" "sync" + "syscall" "time" ) @@ -16,10 +19,11 @@ import ( // Request from CLI to daemon. type Request struct { - Action string `json:"action"` // "create_session", "destroy_session", "status" - ProxyURL string `json:"proxy_url,omitempty"` // for create_session - DNSAddr string `json:"dns_addr,omitempty"` // for create_session - SessionID string `json:"session_id,omitempty"` // for destroy_session + Action string `json:"action"` // "create_session", "destroy_session", "status", "start_learning", "stop_learning" + ProxyURL string `json:"proxy_url,omitempty"` // for create_session + DNSAddr string `json:"dns_addr,omitempty"` // for create_session + SessionID string `json:"session_id,omitempty"` // for destroy_session + LearningID string `json:"learning_id,omitempty"` // for stop_learning } // Response from daemon to CLI. @@ -33,6 +37,9 @@ type Response struct { // Status response fields. Running bool `json:"running,omitempty"` ActiveSessions int `json:"active_sessions,omitempty"` + // Learning response fields. + LearningID string `json:"learning_id,omitempty"` + LearningLog string `json:"learning_log,omitempty"` } // Session tracks an active sandbox session. @@ -57,6 +64,11 @@ type Server struct { debug bool tun2socksPath string sandboxGID string // cached numeric GID for the sandbox group + // Learning mode state + esloggerCmd *exec.Cmd // running eslogger process + esloggerLogPath string // temp file path for eslogger output + esloggerDone chan error // receives result of cmd.Wait() (set once, reused for stop) + learningID string // current learning session ID } // NewServer creates a new daemon server that will listen on the given Unix socket path. @@ -133,9 +145,26 @@ func (s *Server) Stop() error { // Wait for the accept loop and any in-flight handlers to finish. s.wg.Wait() - // Tear down all active sessions. + // Tear down all active sessions and learning. s.mu.Lock() var errs []string + + // Stop learning session if active + if s.esloggerCmd != nil && s.esloggerCmd.Process != nil { + s.logDebug("Stopping eslogger during shutdown") + _ = s.esloggerCmd.Process.Kill() + if s.esloggerDone != nil { + <-s.esloggerDone + } + s.esloggerCmd = nil + s.esloggerDone = nil + s.learningID = "" + } + if s.esloggerLogPath != "" { + _ = os.Remove(s.esloggerLogPath) + s.esloggerLogPath = "" + } + for id := range s.sessions { s.logDebug("Stopping session %s during shutdown", id) } @@ -227,6 +256,10 @@ func (s *Server) handleConnection(conn net.Conn) { resp = s.handleCreateSession(req) case "destroy_session": resp = s.handleDestroySession(req) + case "start_learning": + resp = s.handleStartLearning() + case "stop_learning": + resp = s.handleStopLearning(req) case "status": resp = s.handleStatus() default: @@ -387,6 +420,156 @@ func (s *Server) handleDestroySession(req Request) Response { return Response{OK: true} } +// handleStartLearning starts an eslogger trace for learning mode. +// eslogger uses the Endpoint Security framework and reports real Unix PIDs +// via audit_token.pid, plus fork events for process tree tracking. +func (s *Server) handleStartLearning() Response { + s.mu.Lock() + defer s.mu.Unlock() + + // Only one learning session at a time + if s.learningID != "" { + return Response{OK: false, Error: "a learning session is already active"} + } + + // Create temp file for eslogger output. + // The daemon runs as root but the CLI reads this file as a normal user, + // so we must make it world-readable. + logFile, err := os.CreateTemp("", "greywall-eslogger-*.log") + if err != nil { + return Response{OK: false, Error: fmt.Sprintf("failed to create temp file: %v", err)} + } + + logPath := logFile.Name() + if err := os.Chmod(logPath, 0o644); err != nil { //nolint:gosec // intentionally world-readable so non-root CLI can parse the log + _ = logFile.Close() + _ = os.Remove(logPath) //nolint:gosec // logPath from os.CreateTemp, not user input + return Response{OK: false, Error: fmt.Sprintf("failed to set log file permissions: %v", err)} + } + + // Create a separate file for eslogger stderr so we can diagnose failures. + stderrFile, err := os.CreateTemp("", "greywall-eslogger-stderr-*.log") + if err != nil { + _ = logFile.Close() + _ = os.Remove(logPath) //nolint:gosec // logPath from os.CreateTemp, not user input + return Response{OK: false, Error: fmt.Sprintf("failed to create stderr file: %v", err)} + } + stderrPath := stderrFile.Name() + + // Start eslogger with filesystem events + fork for process tree tracking. + // eslogger outputs one JSON object per line to stdout. + cmd := exec.Command("eslogger", "open", "create", "write", "unlink", "rename", "link", "truncate", "fork") //nolint:gosec // daemon-controlled command + cmd.Stdout = logFile + cmd.Stderr = stderrFile + + if err := cmd.Start(); err != nil { + _ = logFile.Close() + _ = stderrFile.Close() + _ = os.Remove(logPath) //nolint:gosec // logPath from os.CreateTemp, not user input + _ = os.Remove(stderrPath) //nolint:gosec // stderrPath from os.CreateTemp, not user input + return Response{OK: false, Error: fmt.Sprintf("failed to start eslogger: %v", err)} + } + + // Generate learning ID + learningID, err := generateSessionID() + if err != nil { + _ = cmd.Process.Kill() + _ = logFile.Close() + _ = stderrFile.Close() + _ = os.Remove(logPath) //nolint:gosec // logPath from os.CreateTemp, not user input + _ = os.Remove(stderrPath) //nolint:gosec // stderrPath from os.CreateTemp, not user input + return Response{OK: false, Error: fmt.Sprintf("failed to generate learning ID: %v", err)} + } + + // Wait briefly for eslogger to initialize, then check if it exited early + // (e.g., missing Full Disk Access permission). + exitCh := make(chan error, 1) + go func() { + exitCh <- cmd.Wait() + }() + + select { + case waitErr := <-exitCh: + // eslogger exited during startup — read stderr for the error message + _ = stderrFile.Close() + stderrContent, _ := os.ReadFile(stderrPath) //nolint:gosec // stderrPath from os.CreateTemp + _ = os.Remove(stderrPath) //nolint:gosec + _ = logFile.Close() + _ = os.Remove(logPath) //nolint:gosec + errMsg := strings.TrimSpace(string(stderrContent)) + if errMsg == "" { + errMsg = fmt.Sprintf("eslogger exited: %v", waitErr) + } + if strings.Contains(errMsg, "Full Disk Access") { + errMsg += "\n\nGrant Full Disk Access to /usr/local/bin/greywall:\n" + + " System Settings → Privacy & Security → Full Disk Access → add /usr/local/bin/greywall\n" + + "Then reinstall the daemon: sudo greywall daemon uninstall -f && sudo greywall daemon install" + } + return Response{OK: false, Error: fmt.Sprintf("eslogger failed to start: %s", errMsg)} + + case <-time.After(500 * time.Millisecond): + // eslogger is still running after 500ms — good, it initialized successfully + } + + s.esloggerCmd = cmd + s.esloggerLogPath = logPath + s.esloggerDone = exitCh + s.learningID = learningID + + // Clean up stderr file now that eslogger is running + _ = stderrFile.Close() + _ = os.Remove(stderrPath) //nolint:gosec + + Logf("Learning session started: id=%s log=%s pid=%d", learningID, logPath, cmd.Process.Pid) + + return Response{ + OK: true, + LearningID: learningID, + LearningLog: logPath, + } +} + +// handleStopLearning stops the eslogger trace for a learning session. +func (s *Server) handleStopLearning(req Request) Response { + s.mu.Lock() + defer s.mu.Unlock() + + if req.LearningID == "" { + return Response{OK: false, Error: "learning_id is required"} + } + + if s.learningID == "" || s.learningID != req.LearningID { + return Response{OK: false, Error: fmt.Sprintf("learning session %q not found", req.LearningID)} + } + + if s.esloggerCmd != nil && s.esloggerCmd.Process != nil { + // Send SIGINT to eslogger for graceful shutdown (flushes buffers) + _ = s.esloggerCmd.Process.Signal(syscall.SIGINT) + + // Reuse the wait channel from startup (cmd.Wait already called there) + if s.esloggerDone != nil { + select { + case <-s.esloggerDone: + // Exited cleanly + case <-time.After(5 * time.Second): + // Force kill after timeout + _ = s.esloggerCmd.Process.Kill() + <-s.esloggerDone + } + } + } + + Logf("Learning session stopped: id=%s", s.learningID) + + s.esloggerCmd = nil + s.esloggerDone = nil + s.learningID = "" + // Don't remove the log file — the CLI needs to read it + s.esloggerLogPath = "" + + return Response{OK: true} +} + // handleStatus returns the current daemon status including whether it is running // and how many sessions are active. func (s *Server) handleStatus() Response { diff --git a/internal/sandbox/learning.go b/internal/sandbox/learning.go index aa79540..baef9f5 100644 --- a/internal/sandbox/learning.go +++ b/internal/sandbox/learning.go @@ -10,6 +10,13 @@ import ( "strings" ) +// TraceResult holds parsed read and write paths from a system trace log +// (strace on Linux, eslogger on macOS). +type TraceResult struct { + WritePaths []string + ReadPaths []string +} + // wellKnownParents are directories under $HOME where applications typically // create their own subdirectory (e.g., ~/.cache/opencode, ~/.config/opencode). var wellKnownParents = []string{ @@ -52,14 +59,9 @@ func SanitizeTemplateName(name string) string { return sanitized } -// GenerateLearnedTemplate parses an strace log, collapses paths, and saves a template. +// GenerateLearnedTemplate takes a parsed trace result, collapses paths, and saves a template. // Returns the path where the template was saved. -func GenerateLearnedTemplate(straceLogPath, cmdName string, debug bool) (string, error) { - result, err := ParseStraceLog(straceLogPath, debug) - if err != nil { - return "", fmt.Errorf("failed to parse strace log: %w", err) - } - +func GenerateLearnedTemplate(result *TraceResult, cmdName string, debug bool) (string, error) { home, _ := os.UserHomeDir() // Filter write paths: remove default writable and sensitive paths @@ -231,8 +233,9 @@ func CollapsePaths(paths []string) []string { } } - // Sort and deduplicate (remove sub-paths of other paths) + // Sort, remove exact duplicates, then remove sub-paths of other paths sort.Strings(result) + result = removeDuplicates(result) result = deduplicateSubPaths(result) return result @@ -364,6 +367,20 @@ func ListLearnedTemplates() ([]LearnedTemplateInfo, error) { return templates, nil } +// removeDuplicates removes exact duplicate strings from a sorted slice. +func removeDuplicates(paths []string) []string { + if len(paths) <= 1 { + return paths + } + result := []string{paths[0]} + for i := 1; i < len(paths); i++ { + if paths[i] != paths[i-1] { + result = append(result, paths[i]) + } + } + return result +} + // deduplicateSubPaths removes paths that are sub-paths of other paths in the list. // Assumes the input is sorted. func deduplicateSubPaths(paths []string) []string { diff --git a/internal/sandbox/learning_darwin.go b/internal/sandbox/learning_darwin.go new file mode 100644 index 0000000..f17b2f1 --- /dev/null +++ b/internal/sandbox/learning_darwin.go @@ -0,0 +1,459 @@ +//go:build darwin + +package sandbox + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "strings" + + "gitea.app.monadical.io/monadical/greywall/internal/daemon" +) + +// opClass classifies a filesystem operation. +type opClass int + +const ( + opSkip opClass = iota + opRead + opWrite +) + +// fwriteFlag is the macOS FWRITE flag value (O_WRONLY or O_RDWR includes this). +const fwriteFlag = 0x0002 + +// eslogger JSON types — mirrors the real Endpoint Security framework output. +// eslogger emits one JSON object per line to stdout. +// +// Key structural details from real eslogger output: +// - event_type is an integer (e.g., 10=open, 11=fork, 13=create, 32=unlink, 33=write, 41=truncate) +// - Event data is nested under event.{event_name} (e.g., event.open, event.fork) +// - write/unlink/truncate use "target" not "file" +// - create uses destination.existing_file +// - fork child has full process info including audit_token + +// esloggerEvent is the top-level event from eslogger. +type esloggerEvent struct { + EventType int `json:"event_type"` + Process esloggerProcess `json:"process"` + Event map[string]json.RawMessage `json:"event"` +} + +type esloggerProcess struct { + AuditToken esloggerAuditToken `json:"audit_token"` + Executable esloggerExec `json:"executable"` + PPID int `json:"ppid"` +} + +type esloggerAuditToken struct { + PID int `json:"pid"` +} + +type esloggerExec struct { + Path string `json:"path"` + PathTruncated bool `json:"path_truncated"` +} + +// Event-specific types. + +type esloggerOpenEvent struct { + File esloggerFile `json:"file"` + Fflag int `json:"fflag"` +} + +type esloggerTargetEvent struct { + Target esloggerFile `json:"target"` +} + +type esloggerCreateEvent struct { + DestinationType int `json:"destination_type"` + Destination esloggerCreateDest `json:"destination"` +} + +type esloggerCreateDest struct { + ExistingFile *esloggerFile `json:"existing_file,omitempty"` + NewPath *esloggerNewPath `json:"new_path,omitempty"` +} + +type esloggerNewPath struct { + Dir esloggerFile `json:"dir"` + Filename string `json:"filename"` +} + +type esloggerRenameEvent struct { + Source esloggerFile `json:"source"` + Destination esloggerFile `json:"destination_new_path"` // TODO: verify actual field name +} + +type esloggerForkEvent struct { + Child esloggerForkChild `json:"child"` +} + +type esloggerForkChild struct { + AuditToken esloggerAuditToken `json:"audit_token"` + Executable esloggerExec `json:"executable"` + PPID int `json:"ppid"` +} + +type esloggerLinkEvent struct { + Source esloggerFile `json:"source"` + TargetDir esloggerFile `json:"target_dir"` +} + +type esloggerFile struct { + Path string `json:"path"` + PathTruncated bool `json:"path_truncated"` +} + +// CheckLearningAvailable verifies that eslogger exists and the daemon is running. +func CheckLearningAvailable() error { + if _, err := os.Stat("/usr/bin/eslogger"); err != nil { + return fmt.Errorf("eslogger not found at /usr/bin/eslogger (requires macOS 13+): %w", err) + } + + client := daemon.NewClient(daemon.DefaultSocketPath, false) + if !client.IsRunning() { + return fmt.Errorf("greywall daemon is not running (required for macOS learning mode)\n\n" + + " Install and start: sudo greywall daemon install\n" + + " Check status: greywall daemon status") + } + return nil +} + +// eventName extracts the event name string from the event map. +// eslogger nests event data under event.{name}, e.g., event.open, event.fork. +func eventName(ev *esloggerEvent) string { + for key := range ev.Event { + return key + } + return "" +} + +// ParseEsloggerLog reads an eslogger JSON log, builds the process tree from +// fork events starting at rootPID, then filters filesystem events by the PID set. +// Uses a two-pass approach: pass 1 scans fork events to build the PID tree, +// pass 2 filters filesystem events by the PID set. +func ParseEsloggerLog(logPath string, rootPID int, debug bool) (*TraceResult, error) { + home, _ := os.UserHomeDir() + seenWrite := make(map[string]bool) + seenRead := make(map[string]bool) + result := &TraceResult{} + + // Pass 1: Build the PID set from fork events. + pidSet := map[int]bool{rootPID: true} + forkEvents, err := scanForkEvents(logPath) + if err != nil { + return nil, err + } + + // BFS: expand PID set using fork parent→child relationships. + // We may need multiple rounds since a child can itself fork. + changed := true + for changed { + changed = false + for _, fe := range forkEvents { + if pidSet[fe.parentPID] && !pidSet[fe.childPID] { + pidSet[fe.childPID] = true + changed = true + } + } + } + + if debug { + fmt.Fprintf(os.Stderr, "[greywall] eslogger PID tree from root %d: %d PIDs\n", rootPID, len(pidSet)) + } + + // Pass 2: Scan filesystem events, filter by PID set. + f, err := os.Open(logPath) //nolint:gosec // daemon-controlled temp file path + if err != nil { + return nil, fmt.Errorf("failed to open eslogger log: %w", err) + } + defer func() { _ = f.Close() }() + + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 256*1024), 4*1024*1024) + + lineCount := 0 + matchedLines := 0 + writeCount := 0 + readCount := 0 + + for scanner.Scan() { + line := scanner.Bytes() + lineCount++ + + var ev esloggerEvent + if err := json.Unmarshal(line, &ev); err != nil { + continue + } + + name := eventName(&ev) + + // Skip fork events (already processed in pass 1) + if name == "fork" { + continue + } + + // Filter by PID set + pid := ev.Process.AuditToken.PID + if !pidSet[pid] { + continue + } + matchedLines++ + + // Extract path and classify operation + paths, class := classifyEsloggerEvent(&ev, name) + if class == opSkip || len(paths) == 0 { + continue + } + + for _, path := range paths { + if shouldFilterPathMacOS(path, home) { + continue + } + + switch class { + case opWrite: + writeCount++ + if !seenWrite[path] { + seenWrite[path] = true + result.WritePaths = append(result.WritePaths, path) + } + case opRead: + readCount++ + if !seenRead[path] { + seenRead[path] = true + result.ReadPaths = append(result.ReadPaths, path) + } + } + } + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading eslogger log: %w", err) + } + + if debug { + fmt.Fprintf(os.Stderr, "[greywall] Parsed eslogger log: %d lines, %d matched PIDs, %d writes, %d reads, %d unique write paths, %d unique read paths\n", + lineCount, matchedLines, writeCount, readCount, len(result.WritePaths), len(result.ReadPaths)) + } + + return result, nil +} + +// forkRecord stores a parent→child PID relationship from a fork event. +type forkRecord struct { + parentPID int + childPID int +} + +// scanForkEvents reads the log and extracts all fork parent→child PID pairs. +func scanForkEvents(logPath string) ([]forkRecord, error) { + f, err := os.Open(logPath) //nolint:gosec // daemon-controlled temp file path + if err != nil { + return nil, fmt.Errorf("failed to open eslogger log: %w", err) + } + defer func() { _ = f.Close() }() + + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 256*1024), 4*1024*1024) + + var forks []forkRecord + for scanner.Scan() { + line := scanner.Bytes() + + // Quick pre-check to avoid parsing non-fork lines. + // Fork events have "fork" as a key in the event object. + if !strings.Contains(string(line), `"fork"`) { + continue + } + + var ev esloggerEvent + if err := json.Unmarshal(line, &ev); err != nil { + continue + } + + forkRaw, ok := ev.Event["fork"] + if !ok { + continue + } + + var fe esloggerForkEvent + if err := json.Unmarshal(forkRaw, &fe); err != nil { + continue + } + + forks = append(forks, forkRecord{ + parentPID: ev.Process.AuditToken.PID, + childPID: fe.Child.AuditToken.PID, + }) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading eslogger log for fork events: %w", err) + } + + return forks, nil +} + +// classifyEsloggerEvent extracts paths and classifies the operation from an eslogger event. +// The event name is the key inside the event map (e.g., "open", "fork", "write"). +func classifyEsloggerEvent(ev *esloggerEvent, name string) ([]string, opClass) { + eventRaw, ok := ev.Event[name] + if !ok { + return nil, opSkip + } + + switch name { + case "open": + var oe esloggerOpenEvent + if err := json.Unmarshal(eventRaw, &oe); err != nil { + return nil, opSkip + } + path := oe.File.Path + if path == "" || oe.File.PathTruncated { + return nil, opSkip + } + if oe.Fflag&fwriteFlag != 0 { + return []string{path}, opWrite + } + return []string{path}, opRead + + case "create": + var ce esloggerCreateEvent + if err := json.Unmarshal(eventRaw, &ce); err != nil { + return nil, opSkip + } + // create events use destination.existing_file or destination.new_path + if ce.Destination.ExistingFile != nil { + path := ce.Destination.ExistingFile.Path + if path != "" && !ce.Destination.ExistingFile.PathTruncated { + return []string{path}, opWrite + } + } + if ce.Destination.NewPath != nil { + dir := ce.Destination.NewPath.Dir.Path + filename := ce.Destination.NewPath.Filename + if dir != "" && filename != "" { + return []string{dir + "/" + filename}, opWrite + } + } + return nil, opSkip + + case "write", "unlink", "truncate": + // These events use "target" not "file" + var te esloggerTargetEvent + if err := json.Unmarshal(eventRaw, &te); err != nil { + return nil, opSkip + } + path := te.Target.Path + if path == "" || te.Target.PathTruncated { + return nil, opSkip + } + return []string{path}, opWrite + + case "rename": + var re esloggerRenameEvent + if err := json.Unmarshal(eventRaw, &re); err != nil { + return nil, opSkip + } + var paths []string + if re.Source.Path != "" && !re.Source.PathTruncated { + paths = append(paths, re.Source.Path) + } + if re.Destination.Path != "" && !re.Destination.PathTruncated { + paths = append(paths, re.Destination.Path) + } + if len(paths) == 0 { + return nil, opSkip + } + return paths, opWrite + + case "link": + var le esloggerLinkEvent + if err := json.Unmarshal(eventRaw, &le); err != nil { + return nil, opSkip + } + var paths []string + if le.Source.Path != "" && !le.Source.PathTruncated { + paths = append(paths, le.Source.Path) + } + if le.TargetDir.Path != "" && !le.TargetDir.PathTruncated { + paths = append(paths, le.TargetDir.Path) + } + if len(paths) == 0 { + return nil, opSkip + } + return paths, opWrite + + default: + return nil, opSkip + } +} + +// shouldFilterPathMacOS returns true if a path should be excluded from macOS learning results. +func shouldFilterPathMacOS(path, home string) bool { + if path == "" || !strings.HasPrefix(path, "/") { + return true + } + + // macOS system path prefixes to filter + systemPrefixes := []string{ + "/dev/", + "/private/var/run/", + "/private/var/db/", + "/private/var/folders/", + "/System/", + "/Library/", + "/usr/lib/", + "/usr/share/", + "/private/etc/", + "/tmp/", + "/private/tmp/", + } + for _, prefix := range systemPrefixes { + if strings.HasPrefix(path, prefix) { + return true + } + } + + // Filter .dylib files (macOS shared libraries) + if strings.HasSuffix(path, ".dylib") { + return true + } + + // Filter greywall infrastructure files + if strings.Contains(path, "greywall-") { + return true + } + + // Filter paths outside home directory + if home != "" && !strings.HasPrefix(path, home+"/") { + return true + } + + // Filter exact home directory match + if path == home { + return true + } + + // Filter shell infrastructure directories (PATH lookups, plugin dirs) + if home != "" { + shellInfraPrefixes := []string{ + home + "/.antigen/", + home + "/.oh-my-zsh/", + home + "/.pyenv/shims/", + home + "/.bun/bin/", + home + "/.local/bin/", + } + for _, prefix := range shellInfraPrefixes { + if strings.HasPrefix(path, prefix) { + return true + } + } + } + + return false +} diff --git a/internal/sandbox/learning_darwin_test.go b/internal/sandbox/learning_darwin_test.go new file mode 100644 index 0000000..f6d6b80 --- /dev/null +++ b/internal/sandbox/learning_darwin_test.go @@ -0,0 +1,557 @@ +//go:build darwin + +package sandbox + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" +) + +// makeEsloggerLine builds a single JSON line matching real eslogger output format. +// event_type is an int, and event data is nested under event.{eventName}. +func makeEsloggerLine(eventName string, eventTypeInt int, pid int, eventData interface{}) string { + eventJSON, _ := json.Marshal(eventData) + ev := map[string]interface{}{ + "event_type": eventTypeInt, + "process": map[string]interface{}{ + "audit_token": map[string]interface{}{ + "pid": pid, + }, + "executable": map[string]interface{}{ + "path": "/usr/bin/test", + "path_truncated": false, + }, + "ppid": 1, + }, + "event": map[string]json.RawMessage{ + eventName: json.RawMessage(eventJSON), + }, + } + data, _ := json.Marshal(ev) + return string(data) +} + +func TestClassifyEsloggerEvent(t *testing.T) { + tests := []struct { + name string + eventName string + eventData interface{} + expectPaths []string + expectClass opClass + }{ + { + name: "open read-only", + eventName: "open", + eventData: map[string]interface{}{ + "file": map[string]interface{}{"path": "/Users/test/file.txt", "path_truncated": false}, + "fflag": 0x0001, // FREAD only + }, + expectPaths: []string{"/Users/test/file.txt"}, + expectClass: opRead, + }, + { + name: "open with write flag", + eventName: "open", + eventData: map[string]interface{}{ + "file": map[string]interface{}{"path": "/Users/test/file.txt", "path_truncated": false}, + "fflag": 0x0003, // FREAD | FWRITE + }, + expectPaths: []string{"/Users/test/file.txt"}, + expectClass: opWrite, + }, + { + name: "create event with existing_file", + eventName: "create", + eventData: map[string]interface{}{ + "destination_type": 0, + "destination": map[string]interface{}{ + "existing_file": map[string]interface{}{"path": "/Users/test/new.txt", "path_truncated": false}, + }, + }, + expectPaths: []string{"/Users/test/new.txt"}, + expectClass: opWrite, + }, + { + name: "write event uses target", + eventName: "write", + eventData: map[string]interface{}{ + "target": map[string]interface{}{"path": "/Users/test/data.db", "path_truncated": false}, + }, + expectPaths: []string{"/Users/test/data.db"}, + expectClass: opWrite, + }, + { + name: "unlink event uses target", + eventName: "unlink", + eventData: map[string]interface{}{ + "target": map[string]interface{}{"path": "/Users/test/old.txt", "path_truncated": false}, + }, + expectPaths: []string{"/Users/test/old.txt"}, + expectClass: opWrite, + }, + { + name: "truncate event uses target", + eventName: "truncate", + eventData: map[string]interface{}{ + "target": map[string]interface{}{"path": "/Users/test/trunc.log", "path_truncated": false}, + }, + expectPaths: []string{"/Users/test/trunc.log"}, + expectClass: opWrite, + }, + { + name: "rename event with source and destination", + eventName: "rename", + eventData: map[string]interface{}{ + "source": map[string]interface{}{"path": "/Users/test/old.txt", "path_truncated": false}, + "destination_new_path": map[string]interface{}{"path": "/Users/test/new.txt", "path_truncated": false}, + }, + expectPaths: []string{"/Users/test/old.txt", "/Users/test/new.txt"}, + expectClass: opWrite, + }, + { + name: "truncated path is skipped", + eventName: "open", + eventData: map[string]interface{}{ + "file": map[string]interface{}{"path": "/Users/test/very/long/path", "path_truncated": true}, + "fflag": 0x0001, + }, + expectPaths: nil, + expectClass: opSkip, + }, + { + name: "empty path is skipped", + eventName: "write", + eventData: map[string]interface{}{ + "target": map[string]interface{}{"path": "", "path_truncated": false}, + }, + expectPaths: nil, + expectClass: opSkip, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + eventJSON, _ := json.Marshal(tt.eventData) + ev := &esloggerEvent{ + EventType: 0, + Event: map[string]json.RawMessage{ + tt.eventName: json.RawMessage(eventJSON), + }, + } + + paths, class := classifyEsloggerEvent(ev, tt.eventName) + if class != tt.expectClass { + t.Errorf("class = %d, want %d", class, tt.expectClass) + } + if tt.expectPaths == nil { + if len(paths) != 0 { + t.Errorf("paths = %v, want nil", paths) + } + } else { + if len(paths) != len(tt.expectPaths) { + t.Errorf("paths = %v, want %v", paths, tt.expectPaths) + } else { + for i, p := range paths { + if p != tt.expectPaths[i] { + t.Errorf("paths[%d] = %q, want %q", i, p, tt.expectPaths[i]) + } + } + } + } + }) + } +} + +func TestParseEsloggerLog(t *testing.T) { + home, _ := os.UserHomeDir() + + // Root PID is 100; it forks child PID 101, which forks grandchild 102. + // PID 200 is an unrelated process. + lines := []string{ + // Fork: root (100) -> child (101) + makeEsloggerLine("fork", 11, 100, map[string]interface{}{ + "child": map[string]interface{}{ + "audit_token": map[string]interface{}{"pid": 101}, + "executable": map[string]interface{}{"path": "/usr/bin/child", "path_truncated": false}, + "ppid": 100, + }, + }), + // Fork: child (101) -> grandchild (102) + makeEsloggerLine("fork", 11, 101, map[string]interface{}{ + "child": map[string]interface{}{ + "audit_token": map[string]interface{}{"pid": 102}, + "executable": map[string]interface{}{"path": "/usr/bin/grandchild", "path_truncated": false}, + "ppid": 101, + }, + }), + // Write by root process (should be included) — write uses "target" + makeEsloggerLine("write", 33, 100, map[string]interface{}{ + "target": map[string]interface{}{"path": filepath.Join(home, ".cache/testapp/db.sqlite"), "path_truncated": false}, + }), + // Create by child (should be included) — create uses destination.existing_file + makeEsloggerLine("create", 13, 101, map[string]interface{}{ + "destination_type": 0, + "destination": map[string]interface{}{ + "existing_file": map[string]interface{}{"path": filepath.Join(home, ".config/testapp/conf.json"), "path_truncated": false}, + }, + }), + // Open (read-only) by grandchild (should be included as read) + makeEsloggerLine("open", 10, 102, map[string]interface{}{ + "file": map[string]interface{}{"path": filepath.Join(home, ".config/testapp/extra.json"), "path_truncated": false}, + "fflag": 0x0001, + }), + // Open (write) by grandchild (should be included as write) + makeEsloggerLine("open", 10, 102, map[string]interface{}{ + "file": map[string]interface{}{"path": filepath.Join(home, ".cache/testapp/version"), "path_truncated": false}, + "fflag": 0x0003, + }), + // Write by unrelated PID 200 (should NOT be included) + makeEsloggerLine("write", 33, 200, map[string]interface{}{ + "target": map[string]interface{}{"path": filepath.Join(home, ".cache/otherapp/data"), "path_truncated": false}, + }), + // System path write by root PID (should be filtered) + makeEsloggerLine("write", 33, 100, map[string]interface{}{ + "target": map[string]interface{}{"path": "/dev/null", "path_truncated": false}, + }), + // Unlink by child (should be included) — unlink uses "target" + makeEsloggerLine("unlink", 32, 101, map[string]interface{}{ + "target": map[string]interface{}{"path": filepath.Join(home, ".cache/testapp/old.tmp"), "path_truncated": false}, + }), + } + + logContent := strings.Join(lines, "\n") + logFile := filepath.Join(t.TempDir(), "eslogger.log") + if err := os.WriteFile(logFile, []byte(logContent), 0o600); err != nil { + t.Fatal(err) + } + + result, err := ParseEsloggerLog(logFile, 100, false) + if err != nil { + t.Fatalf("ParseEsloggerLog() error: %v", err) + } + + // Check write paths + expectedWrites := map[string]bool{ + filepath.Join(home, ".cache/testapp/db.sqlite"): false, + filepath.Join(home, ".config/testapp/conf.json"): false, + filepath.Join(home, ".cache/testapp/version"): false, + filepath.Join(home, ".cache/testapp/old.tmp"): false, + } + for _, p := range result.WritePaths { + if _, ok := expectedWrites[p]; ok { + expectedWrites[p] = true + } + } + for p, found := range expectedWrites { + if !found { + t.Errorf("WritePaths missing expected: %q, got: %v", p, result.WritePaths) + } + } + + // Check that unrelated PID 200 paths were not included + for _, p := range result.WritePaths { + if strings.Contains(p, "otherapp") { + t.Errorf("WritePaths should not contain otherapp path: %q", p) + } + } + + // Check read paths + expectedReads := map[string]bool{ + filepath.Join(home, ".config/testapp/extra.json"): false, + } + for _, p := range result.ReadPaths { + if _, ok := expectedReads[p]; ok { + expectedReads[p] = true + } + } + for p, found := range expectedReads { + if !found { + t.Errorf("ReadPaths missing expected: %q, got: %v", p, result.ReadPaths) + } + } +} + +func TestParseEsloggerLogForkChaining(t *testing.T) { + home, _ := os.UserHomeDir() + + // Test deep fork chains: 100 -> 101 -> 102 -> 103 + lines := []string{ + makeEsloggerLine("fork", 11, 100, map[string]interface{}{ + "child": map[string]interface{}{ + "audit_token": map[string]interface{}{"pid": 101}, + "executable": map[string]interface{}{"path": "/bin/sh", "path_truncated": false}, + "ppid": 100, + }, + }), + makeEsloggerLine("fork", 11, 101, map[string]interface{}{ + "child": map[string]interface{}{ + "audit_token": map[string]interface{}{"pid": 102}, + "executable": map[string]interface{}{"path": "/usr/bin/node", "path_truncated": false}, + "ppid": 101, + }, + }), + makeEsloggerLine("fork", 11, 102, map[string]interface{}{ + "child": map[string]interface{}{ + "audit_token": map[string]interface{}{"pid": 103}, + "executable": map[string]interface{}{"path": "/usr/bin/ruby", "path_truncated": false}, + "ppid": 102, + }, + }), + // Write from the deepest child + makeEsloggerLine("write", 33, 103, map[string]interface{}{ + "target": map[string]interface{}{"path": filepath.Join(home, ".cache/app/deep.log"), "path_truncated": false}, + }), + } + + logContent := strings.Join(lines, "\n") + logFile := filepath.Join(t.TempDir(), "eslogger.log") + if err := os.WriteFile(logFile, []byte(logContent), 0o600); err != nil { + t.Fatal(err) + } + + result, err := ParseEsloggerLog(logFile, 100, false) + if err != nil { + t.Fatalf("ParseEsloggerLog() error: %v", err) + } + + // The deep child's write should be included + found := false + for _, p := range result.WritePaths { + if strings.Contains(p, "deep.log") { + found = true + break + } + } + if !found { + t.Errorf("WritePaths should include deep child write, got: %v", result.WritePaths) + } +} + +func TestShouldFilterPathMacOS(t *testing.T) { + home := "/Users/testuser" + tests := []struct { + path string + expected bool + }{ + {"/dev/null", true}, + {"/private/var/run/syslog", true}, + {"/private/var/db/something", true}, + {"/private/var/folders/xx/yy", true}, + {"/System/Library/Frameworks/foo", true}, + {"/Library/Preferences/com.apple.foo", true}, + {"/usr/lib/libSystem.B.dylib", true}, + {"/usr/share/zoneinfo/UTC", true}, + {"/private/etc/hosts", true}, + {"/tmp/somefile", true}, + {"/private/tmp/somefile", true}, + {"/usr/local/lib/libfoo.dylib", true}, // .dylib + {"/other/user/file", true}, // outside home + {"/Users/testuser", true}, // exact home match + {"", true}, // empty + {"relative/path", true}, // relative + {"/Users/testuser/.cache/app/db", false}, + {"/Users/testuser/project/main.go", false}, + {"/Users/testuser/.config/app/conf.json", false}, + {"/tmp/greywall-eslogger-abc.log", true}, // greywall infrastructure + {"/Users/testuser/.antigen/bundles/rupa/z/zig", true}, // shell infra + {"/Users/testuser/.oh-my-zsh/plugins/git/git.plugin.zsh", true}, // shell infra + {"/Users/testuser/.pyenv/shims/ruby", true}, // shell infra + {"/Users/testuser/.bun/bin/node", true}, // shell infra + {"/Users/testuser/.local/bin/rg", true}, // shell infra + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + got := shouldFilterPathMacOS(tt.path, home) + if got != tt.expected { + t.Errorf("shouldFilterPathMacOS(%q, %q) = %v, want %v", tt.path, home, got, tt.expected) + } + }) + } +} + +func TestCheckLearningAvailable(t *testing.T) { + err := CheckLearningAvailable() + if err != nil { + t.Logf("learning not available (expected when daemon not running): %v", err) + } +} + +func TestParseEsloggerLogEmpty(t *testing.T) { + logFile := filepath.Join(t.TempDir(), "empty.log") + if err := os.WriteFile(logFile, []byte(""), 0o600); err != nil { + t.Fatal(err) + } + + result, err := ParseEsloggerLog(logFile, 100, false) + if err != nil { + t.Fatalf("ParseEsloggerLog() error: %v", err) + } + + if len(result.WritePaths) != 0 { + t.Errorf("expected 0 write paths, got %d", len(result.WritePaths)) + } + if len(result.ReadPaths) != 0 { + t.Errorf("expected 0 read paths, got %d", len(result.ReadPaths)) + } +} + +func TestParseEsloggerLogMalformedJSON(t *testing.T) { + lines := []string{ + "not valid json at all", + "{partial json", + makeEsloggerLine("write", 33, 100, map[string]interface{}{ + "target": map[string]interface{}{"path": "/Users/test/.cache/app/good.txt", "path_truncated": false}, + }), + } + + logContent := strings.Join(lines, "\n") + logFile := filepath.Join(t.TempDir(), "malformed.log") + if err := os.WriteFile(logFile, []byte(logContent), 0o600); err != nil { + t.Fatal(err) + } + + // Should not error — malformed lines are skipped + result, err := ParseEsloggerLog(logFile, 100, false) + if err != nil { + t.Fatalf("ParseEsloggerLog() error: %v", err) + } + _ = result +} + +func TestScanForkEvents(t *testing.T) { + lines := []string{ + makeEsloggerLine("fork", 11, 100, map[string]interface{}{ + "child": map[string]interface{}{ + "audit_token": map[string]interface{}{"pid": 101}, + "executable": map[string]interface{}{"path": "/bin/sh", "path_truncated": false}, + "ppid": 100, + }, + }), + makeEsloggerLine("write", 33, 100, map[string]interface{}{ + "target": map[string]interface{}{"path": "/Users/test/file.txt", "path_truncated": false}, + }), + makeEsloggerLine("fork", 11, 101, map[string]interface{}{ + "child": map[string]interface{}{ + "audit_token": map[string]interface{}{"pid": 102}, + "executable": map[string]interface{}{"path": "/usr/bin/node", "path_truncated": false}, + "ppid": 101, + }, + }), + } + + logContent := strings.Join(lines, "\n") + logFile := filepath.Join(t.TempDir(), "forks.log") + if err := os.WriteFile(logFile, []byte(logContent), 0o600); err != nil { + t.Fatal(err) + } + + forks, err := scanForkEvents(logFile) + if err != nil { + t.Fatalf("scanForkEvents() error: %v", err) + } + + if len(forks) != 2 { + t.Fatalf("expected 2 fork records, got %d", len(forks)) + } + + expected := []forkRecord{ + {parentPID: 100, childPID: 101}, + {parentPID: 101, childPID: 102}, + } + for i, f := range forks { + if f.parentPID != expected[i].parentPID || f.childPID != expected[i].childPID { + t.Errorf("fork[%d] = {parent:%d, child:%d}, want {parent:%d, child:%d}", + i, f.parentPID, f.childPID, expected[i].parentPID, expected[i].childPID) + } + } +} + +func TestFwriteFlag(t *testing.T) { + if fwriteFlag != 0x0002 { + t.Errorf("fwriteFlag = 0x%04x, want 0x0002", fwriteFlag) + } + + tests := []struct { + name string + fflag int + isWrite bool + }{ + {"FREAD only", 0x0001, false}, + {"FWRITE only", 0x0002, true}, + {"FREAD|FWRITE", 0x0003, true}, + {"FREAD|FWRITE|O_CREAT", 0x0203, true}, + {"zero", 0x0000, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.fflag&fwriteFlag != 0 + if got != tt.isWrite { + t.Errorf("fflag 0x%04x & FWRITE = %v, want %v", tt.fflag, got, tt.isWrite) + } + }) + } +} + +func TestParseEsloggerLogLink(t *testing.T) { + home, _ := os.UserHomeDir() + + lines := []string{ + makeEsloggerLine("link", 42, 100, map[string]interface{}{ + "source": map[string]interface{}{"path": filepath.Join(home, ".cache/app/source.txt"), "path_truncated": false}, + "target_dir": map[string]interface{}{"path": filepath.Join(home, ".cache/app/links"), "path_truncated": false}, + }), + } + + logContent := strings.Join(lines, "\n") + logFile := filepath.Join(t.TempDir(), "link.log") + if err := os.WriteFile(logFile, []byte(logContent), 0o600); err != nil { + t.Fatal(err) + } + + result, err := ParseEsloggerLog(logFile, 100, false) + if err != nil { + t.Fatalf("ParseEsloggerLog() error: %v", err) + } + + expectedWrites := map[string]bool{ + filepath.Join(home, ".cache/app/source.txt"): false, + filepath.Join(home, ".cache/app/links"): false, + } + for _, p := range result.WritePaths { + if _, ok := expectedWrites[p]; ok { + expectedWrites[p] = true + } + } + for p, found := range expectedWrites { + if !found { + t.Errorf("WritePaths missing expected: %q, got: %v", p, result.WritePaths) + } + } +} + +func TestParseEsloggerLogDebugOutput(t *testing.T) { + home, _ := os.UserHomeDir() + + lines := []string{ + makeEsloggerLine("write", 33, 100, map[string]interface{}{ + "target": map[string]interface{}{"path": filepath.Join(home, ".cache/app/test.txt"), "path_truncated": false}, + }), + } + + logContent := strings.Join(lines, "\n") + logFile := filepath.Join(t.TempDir(), "debug.log") + if err := os.WriteFile(logFile, []byte(logContent), 0o600); err != nil { + t.Fatal(err) + } + + // Just verify debug=true doesn't panic + _, err := ParseEsloggerLog(logFile, 100, true) + if err != nil { + t.Fatalf("ParseEsloggerLog() with debug=true error: %v", err) + } +} diff --git a/internal/sandbox/learning_linux.go b/internal/sandbox/learning_linux.go index 4dcfa09..4ee7b42 100644 --- a/internal/sandbox/learning_linux.go +++ b/internal/sandbox/learning_linux.go @@ -20,14 +20,8 @@ var straceSyscallRegex = regexp.MustCompile( // openatWriteFlags matches O_WRONLY, O_RDWR, O_CREAT, O_TRUNC, O_APPEND flags in strace output. var openatWriteFlags = regexp.MustCompile(`O_(?:WRONLY|RDWR|CREAT|TRUNC|APPEND)`) -// StraceResult holds parsed read and write paths from an strace log. -type StraceResult struct { - WritePaths []string - ReadPaths []string -} - -// CheckStraceAvailable verifies that strace is installed and accessible. -func CheckStraceAvailable() error { +// CheckLearningAvailable verifies that strace is installed and accessible. +func CheckLearningAvailable() error { _, err := exec.LookPath("strace") if err != nil { return fmt.Errorf("strace is required for learning mode but not found: %w\n\nInstall it with: sudo apt install strace (Debian/Ubuntu) or sudo pacman -S strace (Arch)", err) @@ -36,7 +30,7 @@ func CheckStraceAvailable() error { } // ParseStraceLog reads an strace output file and extracts unique read and write paths. -func ParseStraceLog(logPath string, debug bool) (*StraceResult, error) { +func ParseStraceLog(logPath string, debug bool) (*TraceResult, error) { f, err := os.Open(logPath) //nolint:gosec // user-controlled path from temp file - intentional if err != nil { return nil, fmt.Errorf("failed to open strace log: %w", err) @@ -46,7 +40,7 @@ func ParseStraceLog(logPath string, debug bool) (*StraceResult, error) { home, _ := os.UserHomeDir() seenWrite := make(map[string]bool) seenRead := make(map[string]bool) - result := &StraceResult{} + result := &TraceResult{} scanner := bufio.NewScanner(f) // Increase buffer for long strace lines diff --git a/internal/sandbox/learning_linux_test.go b/internal/sandbox/learning_linux_test.go index d230fb8..8790149 100644 --- a/internal/sandbox/learning_linux_test.go +++ b/internal/sandbox/learning_linux_test.go @@ -233,10 +233,10 @@ func TestExtractReadPath(t *testing.T) { } } -func TestCheckStraceAvailable(t *testing.T) { +func TestCheckLearningAvailable(t *testing.T) { // This test just verifies the function doesn't panic. // The result depends on whether strace is installed on the test system. - err := CheckStraceAvailable() + err := CheckLearningAvailable() if err != nil { t.Logf("strace not available (expected in some CI environments): %v", err) } diff --git a/internal/sandbox/learning_stub.go b/internal/sandbox/learning_stub.go index c07acd0..80ca1f6 100644 --- a/internal/sandbox/learning_stub.go +++ b/internal/sandbox/learning_stub.go @@ -1,21 +1,10 @@ -//go:build !linux +//go:build !linux && !darwin package sandbox import "fmt" -// StraceResult holds parsed read and write paths from an strace log. -type StraceResult struct { - WritePaths []string - ReadPaths []string -} - -// CheckStraceAvailable returns an error on non-Linux platforms. -func CheckStraceAvailable() error { - return fmt.Errorf("learning mode is only available on Linux (requires strace and bubblewrap)") -} - -// ParseStraceLog returns an error on non-Linux platforms. -func ParseStraceLog(logPath string, debug bool) (*StraceResult, error) { - return nil, fmt.Errorf("strace log parsing is only available on Linux") +// CheckLearningAvailable returns an error on unsupported platforms. +func CheckLearningAvailable() error { + return fmt.Errorf("learning mode is only available on Linux (requires strace) and macOS (requires eslogger + daemon)") } diff --git a/internal/sandbox/learning_test.go b/internal/sandbox/learning_test.go index 6c254ac..c111396 100644 --- a/internal/sandbox/learning_test.go +++ b/internal/sandbox/learning_test.go @@ -421,22 +421,21 @@ func TestGenerateLearnedTemplate(t *testing.T) { tmpDir := t.TempDir() t.Setenv("XDG_CONFIG_HOME", tmpDir) - // Create a fake strace log home, _ := os.UserHomeDir() - logContent := strings.Join([]string{ - `12345 openat(AT_FDCWD, "` + filepath.Join(home, ".cache/testapp/db.sqlite") + `", O_WRONLY|O_CREAT, 0644) = 3`, - `12345 openat(AT_FDCWD, "` + filepath.Join(home, ".cache/testapp/version") + `", O_WRONLY|O_CREAT, 0644) = 3`, - `12345 mkdirat(AT_FDCWD, "` + filepath.Join(home, ".config/testapp") + `", 0755) = 0`, - `12345 openat(AT_FDCWD, "/tmp/somefile", O_WRONLY|O_CREAT, 0644) = 3`, - `12345 openat(AT_FDCWD, "/proc/self/maps", O_RDONLY) = 3`, - }, "\n") - logFile := filepath.Join(tmpDir, "strace.log") - if err := os.WriteFile(logFile, []byte(logContent), 0o600); err != nil { - t.Fatal(err) + // Build a TraceResult directly (platform-independent test) + result := &TraceResult{ + WritePaths: []string{ + filepath.Join(home, ".cache/testapp/db.sqlite"), + filepath.Join(home, ".cache/testapp/version"), + filepath.Join(home, ".config/testapp"), + }, + ReadPaths: []string{ + filepath.Join(home, ".config/testapp/conf.json"), + }, } - templatePath, err := GenerateLearnedTemplate(logFile, "testapp", false) + templatePath, err := GenerateLearnedTemplate(result, "testapp", false) if err != nil { t.Fatalf("GenerateLearnedTemplate() error: %v", err) } diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index 8000fec..5f28643 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -30,12 +30,16 @@ type Manager struct { debug bool monitor bool initialized bool - learning bool // learning mode: permissive sandbox with strace - straceLogPath string // host-side temp file for strace output + learning bool // learning mode: permissive sandbox with strace/eslogger + straceLogPath string // host-side temp file for strace output (Linux) commandName string // name of the command being learned // macOS daemon session fields daemonClient *daemon.Client daemonSession *DaemonSession + // macOS learning mode fields + learningID string // daemon learning session ID + learningLog string // eslogger log file path + learningRootPID int // root PID of the command being learned } // NewManager creates a new sandbox manager. @@ -77,6 +81,28 @@ func (m *Manager) Initialize() error { return fmt.Errorf("sandbox is not supported on platform: %s", platform.Detect()) } + // On macOS in learning mode, use the daemon for eslogger tracing only. + // No TUN/pf/DNS session needed — the command runs unsandboxed. + if platform.Detect() == platform.MacOS && m.learning { + client := daemon.NewClient(daemon.DefaultSocketPath, m.debug) + if !client.IsRunning() { + return fmt.Errorf("greywall daemon is not running (required for macOS learning mode)\n\n" + + " Install and start: sudo greywall daemon install\n" + + " Check status: greywall daemon status") + } + m.logDebug("Daemon is running, requesting learning session") + resp, err := client.StartLearning() + if err != nil { + return fmt.Errorf("failed to start learning session: %w", err) + } + m.daemonClient = client + m.learningID = resp.LearningID + m.learningLog = resp.LearningLog + m.logDebug("Learning session started: id=%s log=%s", m.learningID, m.learningLog) + m.initialized = true + return nil + } + // On macOS, the daemon is required for transparent proxying. // Without it, env-var proxying is unreliable (only works for tools that // honor HTTP_PROXY) and gives users a false sense of security. @@ -187,6 +213,10 @@ func (m *Manager) WrapCommand(command string) (string, error) { plat := platform.Detect() switch plat { case platform.MacOS: + if m.learning { + // In learning mode, run command directly (no sandbox-exec wrapping) + return command, nil + } return WrapCommandMacOS(m.config, command, m.exposedPorts, m.daemonSession, m.debug) case platform.Linux: if m.learning { @@ -220,26 +250,30 @@ func (m *Manager) wrapCommandLearning(command string) (string, error) { }) } -// GenerateLearnedTemplate generates a config template from the strace log collected during learning. +// GenerateLearnedTemplate generates a config template from the trace log collected during learning. +// Platform-specific implementation in manager_linux.go / manager_darwin.go. func (m *Manager) GenerateLearnedTemplate(cmdName string) (string, error) { - if m.straceLogPath == "" { - return "", fmt.Errorf("no strace log available (was learning mode enabled?)") - } + return m.generateLearnedTemplatePlatform(cmdName) +} - templatePath, err := GenerateLearnedTemplate(m.straceLogPath, cmdName, m.debug) - if err != nil { - return "", err - } - - // Clean up strace log since we've processed it - _ = os.Remove(m.straceLogPath) - m.straceLogPath = "" - - return templatePath, nil +// SetLearningRootPID records the root PID of the command being learned. +// The eslogger log parser uses this to build the process tree from fork events. +func (m *Manager) SetLearningRootPID(pid int) { + m.learningRootPID = pid + m.logDebug("Set learning root PID: %d", pid) } // Cleanup stops the proxies and cleans up resources. func (m *Manager) Cleanup() { + // Stop macOS learning session if active + if m.daemonClient != nil && m.learningID != "" { + m.logDebug("Stopping learning session %s", m.learningID) + if err := m.daemonClient.StopLearning(m.learningID); err != nil { + m.logDebug("Warning: failed to stop learning session: %v", err) + } + m.learningID = "" + } + // Destroy macOS daemon session if active. if m.daemonClient != nil && m.daemonSession != nil { m.logDebug("Destroying daemon session %s", m.daemonSession.SessionID) @@ -247,9 +281,11 @@ func (m *Manager) Cleanup() { m.logDebug("Warning: failed to destroy daemon session: %v", err) } m.daemonSession = nil - m.daemonClient = nil } + // Clear daemon client after all daemon interactions + m.daemonClient = nil + if m.reverseBridge != nil { m.reverseBridge.Cleanup() } @@ -266,6 +302,10 @@ func (m *Manager) Cleanup() { _ = os.Remove(m.straceLogPath) m.straceLogPath = "" } + if m.learningLog != "" { + _ = os.Remove(m.learningLog) + m.learningLog = "" + } m.logDebug("Sandbox manager cleaned up") } diff --git a/internal/sandbox/manager_darwin.go b/internal/sandbox/manager_darwin.go new file mode 100644 index 0000000..65aca1d --- /dev/null +++ b/internal/sandbox/manager_darwin.go @@ -0,0 +1,42 @@ +//go:build darwin + +package sandbox + +import ( + "fmt" + "os" +) + +// generateLearnedTemplatePlatform stops the daemon eslogger session, +// parses the eslogger log with PID-based process tree filtering, +// and generates a template (macOS). +func (m *Manager) generateLearnedTemplatePlatform(cmdName string) (string, error) { + if m.learningLog == "" { + return "", fmt.Errorf("no eslogger log available (was learning mode enabled?)") + } + + // Stop daemon learning session + if m.daemonClient != nil && m.learningID != "" { + if err := m.daemonClient.StopLearning(m.learningID); err != nil { + m.logDebug("Warning: failed to stop learning session: %v", err) + } + } + + // Parse eslogger log with root PID for process tree tracking + result, err := ParseEsloggerLog(m.learningLog, m.learningRootPID, m.debug) + if err != nil { + return "", fmt.Errorf("failed to parse eslogger log: %w", err) + } + + templatePath, err := GenerateLearnedTemplate(result, cmdName, m.debug) + if err != nil { + return "", err + } + + // Clean up eslogger log + _ = os.Remove(m.learningLog) + m.learningLog = "" + m.learningID = "" + + return templatePath, nil +} diff --git a/internal/sandbox/manager_linux.go b/internal/sandbox/manager_linux.go new file mode 100644 index 0000000..81414a2 --- /dev/null +++ b/internal/sandbox/manager_linux.go @@ -0,0 +1,31 @@ +//go:build linux + +package sandbox + +import ( + "fmt" + "os" +) + +// generateLearnedTemplatePlatform parses the strace log and generates a template (Linux). +func (m *Manager) generateLearnedTemplatePlatform(cmdName string) (string, error) { + if m.straceLogPath == "" { + return "", fmt.Errorf("no strace log available (was learning mode enabled?)") + } + + result, err := ParseStraceLog(m.straceLogPath, m.debug) + if err != nil { + return "", fmt.Errorf("failed to parse strace log: %w", err) + } + + templatePath, err := GenerateLearnedTemplate(result, cmdName, m.debug) + if err != nil { + return "", err + } + + // Clean up strace log since we've processed it + _ = os.Remove(m.straceLogPath) + m.straceLogPath = "" + + return templatePath, nil +} diff --git a/internal/sandbox/manager_stub.go b/internal/sandbox/manager_stub.go new file mode 100644 index 0000000..0302946 --- /dev/null +++ b/internal/sandbox/manager_stub.go @@ -0,0 +1,10 @@ +//go:build !linux && !darwin + +package sandbox + +import "fmt" + +// generateLearnedTemplatePlatform returns an error on unsupported platforms. +func (m *Manager) generateLearnedTemplatePlatform(cmdName string) (string, error) { + return "", fmt.Errorf("learning mode is not supported on this platform") +}