fix: resolve ENXIO error and skip template on failed learning runs
Some checks failed
Build and test / Build (push) Successful in 12s
Build and test / Test (macOS) (push) Has been cancelled
Build and test / Lint (push) Failing after 1m23s
Build and test / Test (Linux) (push) Failing after 46s

Skip --new-session in learning mode so interactive programs can access
/dev/tty, and run strace in the foreground to preserve terminal stdin.
Also skip template generation when the traced command exits non-zero,
since the strace trace would be incomplete.
This commit is contained in:
2026-02-11 18:38:26 -06:00
parent 7e85083c38
commit a470f86ee4
2 changed files with 33 additions and 42 deletions

View File

@@ -355,18 +355,24 @@ func runCommand(cmd *cobra.Command, args []string) error {
}()
// Wait for command to finish
commandFailed := false
if err := execCmd.Wait(); err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
// Set exit code but don't os.Exit() here - let deferred cleanup run
exitCode = exitErr.ExitCode()
// Continue to template generation even if command exited non-zero
commandFailed = true
} else {
return fmt.Errorf("command failed: %w", err)
}
}
// Generate learned template after command completes
// Generate learned template after command completes successfully.
// Skip template generation if the command failed — the strace trace
// is likely incomplete and would produce an unreliable template.
if learning && manager.IsLearning() {
if commandFailed {
fmt.Fprintf(os.Stderr, "[greywall] Skipping template generation: command exited with code %d\n", exitCode)
} else {
fmt.Fprintf(os.Stderr, "[greywall] Analyzing filesystem access patterns...\n")
templatePath, genErr := manager.GenerateLearnedTemplate(cmdName)
if genErr != nil {
@@ -376,6 +382,7 @@ func runCommand(cmd *cobra.Command, args []string) error {
fmt.Fprintf(os.Stderr, "[greywall] Next run will auto-load this template.\n")
}
}
}
return nil
}

View File

@@ -422,9 +422,15 @@ func WrapCommandLinuxWithOptions(cfg *config.Config, command string, proxyBridge
// Build bwrap args with filesystem restrictions
bwrapArgs := []string{
"bwrap",
"--new-session",
"--die-with-parent",
}
// --new-session calls setsid() which detaches from the controlling terminal.
// Skip it in learning mode so interactive programs (TUIs, prompts) can
// read from /dev/tty. Learning mode already relaxes security constraints
// (no seccomp, no landlock), so skipping new-session is acceptable.
if !opts.Learning {
bwrapArgs = append(bwrapArgs, "--new-session")
}
bwrapArgs = append(bwrapArgs, "--die-with-parent")
// Always use --unshare-net when available (network namespace isolation)
// Inside the namespace, tun2socks will provide transparent proxy access
@@ -886,43 +892,21 @@ sleep 0.3
`)
// In learning mode, wrap the command with strace to trace syscalls.
// strace -f follows forked children, which means it hangs if the app spawns
// long-lived child processes (LSP servers, file watchers, etc.).
// To handle this, we run strace in the background and spawn a monitor that
// detects when the main command (strace's direct child) exits by polling
// /proc/STRACE_PID/task/STRACE_PID/children, then kills strace.
// Run strace in the foreground so the traced command retains terminal
// access (stdin, /dev/tty) for interactive programs like TUIs.
// If the app spawns long-lived child processes, strace -f may hang
// after the main command exits; the user can Ctrl+C to stop it.
// A SIGCHLD trap kills strace once its direct child exits, handling
// the common case of background daemons (LSP servers, watchers).
if opts.Learning && opts.StraceLogPath != "" {
innerScript.WriteString(fmt.Sprintf(`# Learning mode: trace filesystem access
strace -f -qq -I2 -e trace=openat,open,creat,mkdir,mkdirat,unlinkat,renameat,renameat2,symlinkat,linkat -o %s -- %s &
GREYWALL_STRACE_PID=$!
# Monitor: detect when the main command exits, then kill strace.
# strace's direct child is the command. When it exits, the children file
# becomes empty (grandchildren are reparented to init in the PID namespace).
(
sleep 1
while kill -0 $GREYWALL_STRACE_PID 2>/dev/null; do
CHILDREN=$(cat /proc/$GREYWALL_STRACE_PID/task/$GREYWALL_STRACE_PID/children 2>/dev/null)
if [ -z "$CHILDREN" ]; then
sleep 0.5
kill $GREYWALL_STRACE_PID 2>/dev/null
break
fi
sleep 1
done
) &
GREYWALL_MONITOR_PID=$!
trap 'kill -INT $GREYWALL_STRACE_PID 2>/dev/null' INT
trap 'kill -TERM $GREYWALL_STRACE_PID 2>/dev/null' TERM
wait $GREYWALL_STRACE_PID 2>/dev/null
kill $GREYWALL_MONITOR_PID 2>/dev/null
wait $GREYWALL_MONITOR_PID 2>/dev/null
innerScript.WriteString(fmt.Sprintf(`# Learning mode: trace filesystem access (foreground for terminal access)
strace -f -qq -I2 -e trace=openat,open,creat,mkdir,mkdirat,unlinkat,renameat,renameat2,symlinkat,linkat -o %s -- %s
GREYWALL_STRACE_EXIT=$?
# Kill any orphaned child processes (LSP servers, file watchers, etc.)
# that were spawned by the traced command and reparented to PID 1.
# Without this, greywall hangs until they exit (they hold pipe FDs open).
kill -TERM -1 2>/dev/null
sleep 0.1
exit $GREYWALL_STRACE_EXIT
`,
ShellQuoteSingle(opts.StraceLogPath), command,
))