// zero-pi — filesystem-wide scan guard, pure-logic module. // // An SDD subagent (most often `zero-veredicto`) sometimes tries to *rediscover* // where the code lives by running an unbounded `find` from the filesystem root: // // find / -maxdepth 12 -type d -iname "*admin-data-keys*" // // On Windows this does not merely run slow — it hangs effectively forever. // Git Bash's `/` traversal reaches the user tree under OneDrive, and `find` // blocks indefinitely forcing the hydration of every cloud-only placeholder it // touches. A real run was caught wedged on exactly this command for 6+ hours // with no output, stalling the whole pipeline. The phase prompt already tells // the agent not to do full-tree scans, but a prompt is guidance, not // enforcement — a model under pressure ignored it. // // This module is the enforcement half: a pure, dependency-free classifier that // decides whether a shell command contains a filesystem-wide scan rooted at a // top-level location. The pi wiring (the `tool_call` handler that reads the // command and returns `{ block, reason }`) lives in `scan-guard-extension.ts`. // No pi imports, no filesystem, no side effects — testable with plain strings. // // Design intent — block the dangerous class, never legitimate scoped work: // • Only `find` / `grep -r` / `rg` *rooted at a filesystem root* are blocked // (`/`, a bare drive mount like `/c`, `C:\`, `~`, `$HOME`, …). // • A scan scoped to a real subtree (`find /e/zero/.sdd -name …`, // `rg foo src/`) is always allowed — the root token must be the *entire* // search path, not a prefix of it. // • `.` / `./` (the cwd, i.e. the code root) is always allowed. /** * The guard's decision for one shell command. * * - `block: false` — allow the command (the common case). * - `block: true` — refuse it; `reason` explains why and how to scope it, and * the wiring surfaces it back to the model as the blocked-tool reason. */ export interface ScanGuardDecision { block: boolean; reason?: string; } /** Shell tool names whose `command` this guard inspects. */ export const GUARDED_TOOLS: ReadonlySet = new Set(["bash", "shell", "sh"]); // --------------------------------------------------------------------------- // Root-path detection // --------------------------------------------------------------------------- /** * Whether `token` denotes a filesystem *root* — a location so broad that a * recursive scan of it traverses the whole machine (and, on Windows, hangs on * OneDrive placeholder hydration). * * Matches, ignoring a single trailing slash: * • POSIX root `/` * • Git Bash drive mount `/c`, `/d`, … (a single letter, nothing deeper) * • Windows drive root `C:`, `C:\`, `c:/` * • Home `~`, `$HOME`, `${HOME}`, `%USERPROFILE%`, `%HOMEPATH%` * * Crucially it does NOT match a *scoped* path that merely starts at a root, * e.g. `/e/zero/.sdd` or `C:\Users\gonza\proj` — those are bounded and allowed. */ export function isRootPath(token: string): boolean { // Strip surrounding quotes a shell would remove, and one trailing slash. let t = token.trim().replace(/^['"]|['"]$/g, ""); if (t.length > 1) t = t.replace(/[/\\]$/, ""); if (t === "/" || t === "\\") return true; if (t === "~") return true; // Git Bash single-letter drive mount: /c, /d, … but not /c/foo. if (/^\/[a-zA-Z]$/.test(t)) return true; // Windows drive root: C:, C:\, c:/ — but not C:\Users. if (/^[a-zA-Z]:[\\/]?$/.test(t)) return true; // Home-directory environment variables, with or without braces. const home = t.replace(/[/\\]$/, ""); if (home === "$HOME" || home === "${HOME}") return true; if (/^%(USERPROFILE|HOMEPATH|HOMEDRIVE|HOME)%$/i.test(home)) return true; return false; } // --------------------------------------------------------------------------- // Per-segment classification // --------------------------------------------------------------------------- /** Split a shell line into segments on `;`, `&&`, `||`, `|`, and newlines. */ export function splitSegments(command: string): string[] { return command .split(/\n|;|&&|\|\||\|/) .map((s) => s.trim()) .filter((s) => s.length > 0); } /** Tokenize one segment on whitespace (quotes kept; good enough for path ops). */ function tokenize(segment: string): string[] { return segment.split(/\s+/).filter((t) => t.length > 0); } /** * Whether a single command segment is a root-rooted `find`. * * `find`'s path operands are the tokens after `find` and before the first * expression primary (a token starting with `-`, `(`, `!`). If any path operand * is a root path, the scan is unbounded. */ function isRootedFind(tokens: string[]): boolean { // Locate the `find` argv0, allowing an env-prefix like `command` is overkill; // a leading path such as `/usr/bin/find` still ends in `find`. const idx = tokens.findIndex((t) => t === "find" || /[/\\]find$/.test(t)); if (idx === -1) return false; for (let i = idx + 1; i < tokens.length; i++) { const tok = tokens[i]; if (tok.startsWith("-") || tok === "(" || tok === "!" || tok === ")") break; // expression begins if (isRootPath(tok)) return true; } return false; } /** Whether `tokens` invoke `grep` recursively (`-r`/`-R`/`--recursive`, incl. combined flags like `-rn`). */ function isRecursiveGrep(tokens: string[]): boolean { const idx = tokens.findIndex((t) => t === "grep" || /[/\\]grep$/.test(t)); if (idx === -1) return false; return tokens .slice(idx + 1) .some((t) => t === "--recursive" || /^-[a-zA-Z]*[rR]/.test(t)); } /** * Whether a `grep -r` / `rg` segment targets a root path. * * ripgrep recurses by default, so any root target is dangerous; plain `grep` * is only dangerous with a recursive flag. The path target of either is taken * to be any non-flag operand that is a root path. */ function isRootedRecursiveSearch(tokens: string[]): boolean { const isRg = tokens.some((t) => t === "rg" || /[/\\]rg$/.test(t)); const isGrep = tokens.some((t) => t === "grep" || /[/\\]grep$/.test(t)); if (!isRg && !isGrep) return false; if (isGrep && !isRg && !isRecursiveGrep(tokens)) return false; // Any bare (non-flag) operand that is a root path is a whole-machine scan. return tokens.some((t) => !t.startsWith("-") && isRootPath(t)); } // --------------------------------------------------------------------------- // Public classifier // --------------------------------------------------------------------------- /** The reason returned to the model when a root-rooted scan is blocked. */ export function blockReason(command: string): string { const offending = splitSegments(command).find((seg) => { const tokens = tokenize(seg); return isRootedFind(tokens) || isRootedRecursiveSearch(tokens); }); return ( `zero scan-guard: blocked a filesystem-wide scan` + (offending ? ` (\`${offending}\`)` : "") + `. On Windows a \`find\`/\`grep -r\`/\`rg\` rooted at \`/\`, a drive root, or \`~\` ` + `hangs indefinitely hydrating OneDrive placeholders — it does not just run slow. ` + `Do not rediscover code with a full-tree scan: read the code root from the plan ` + `(\`## Code roots\` in design.md, or the \`Code root:\` line in tasks.md / your task input) ` + `and scope the search to that absolute path (e.g. \`find -name …\`).` ); } /** * Classify a shell command into an allow/block decision. * * Pure and total — never throws. Blocks when any `;`/`&&`/`|`-separated segment * is a `find`, recursive `grep`, or `rg` whose search path is a filesystem root * (see {@link isRootPath}). A non-string or empty command is allowed (the guard * never invents a reason to block). */ export function classifyShellCommand(command: unknown): ScanGuardDecision { if (typeof command !== "string" || command.trim() === "") { return { block: false }; } for (const segment of splitSegments(command)) { const tokens = tokenize(segment); if (isRootedFind(tokens) || isRootedRecursiveSearch(tokens)) { return { block: true, reason: blockReason(command) }; } } return { block: false }; }