#!/usr/bin/env bash # # Model id convention (see AGENTS.md Hard Rule #1): # - User-facing examples use the qualified form `pi-shell-acp/` # (e.g. `pi-shell-acp/claude-sonnet-4-6`); the prefix routes to this provider # so `--provider` is redundant and is dropped in docs. # - Smoke helpers that feed `ensureBridgeSession({modelId})` directly (cancel, # model-switch) pass BARE backend ids (`claude-sonnet-4-6`, `gpt-5.4`) # because the bridge library contract is bare. Smoke helpers that invoke pi # via the CLI still pin `--provider pi-shell-acp` and can accept either # bare or qualified model, but we keep bare here to match the bridge-level # dispatch tables. # set -euo pipefail REPO_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) PROJECT_DIR_DEFAULT=$(pwd) TARGET_PROJECT_DIR=${2:-$PROJECT_DIR_DEFAULT} # npm publish identity. Scoped 2026-05-18 — bare `pi-shell-acp` was not on npm # and we adopted the same `@junghanacs` scope as the OpenClaw plugin sibling # (`@junghanacs/openclaw-pi-shell-acp`) for source-of-origin parity. This # variable documents intent; check-pack-install hardcodes the tarball name # and install path against the same scope for traceability. PACKAGE_NAME="@junghanacs/pi-shell-acp" # Runtime provider id — DO NOT change. Embedded in model strings # (`pi-shell-acp/claude-sonnet-4-6`), settings keys (`piShellAcpProvider`), # log prefixes (`[pi-shell-acp:bootstrap]`), and the `--provider pi-shell-acp` # CLI surface. Renaming this would break every consumer transcript and every # saved session anchor. PROVIDER_ID="pi-shell-acp" usage() { cat <<'EOF' Usage: ./run.sh setup [project-dir] # pnpm install + sync auth + install + smoke-all + Axis 1 gates (bridge, native async, session-messaging, sentinel) ./run.sh release-gate [project-dir] [--allow-skip-gemini] # SINGLE release gate: full static (pnpm check) + every live per-invariant gate on the claude-only floor (0.11.0; gemini/codex live tests dropped). TWO-TIER summary: MUST (release-blocking, owns the exit code — "green" applies here) + BEHAVIOR (advisory, non-blocking: model-in-loop autonomous MCP tool-selection — sentinel + RGG positive; S7 Bash-bypass stays hard-fail but never blocks the cut). --allow-skip-gemini accepted-but-ignored (back-compat). final cut authorization is GLG's. ./run.sh xt-tool-surface # ACP backend exclude-tools policy: -xt fail-fast per backend (declared==actual), extension exclusion honored ./run.sh smoke [project-dir] # Claude runtime smoke (backward-compatible default) ./run.sh smoke-claude [project-dir] # explicit Claude runtime smoke ./run.sh smoke-codex [project-dir] # explicit Codex runtime smoke ./run.sh smoke-gemini [project-dir] # explicit Gemini runtime smoke (requires `gemini` on PATH) ./run.sh smoke-all [project-dir] # claude-only runtime smoke (0.11.0 floor; codex/gemini via smoke-codex / smoke-gemini) ./run.sh smoke-continuity [project-dir] # strict dual-backend persisted bootstrap gate (Claude=resume, Codex=load) ./run.sh smoke-cancel [project-dir] # strict cancel/abort cleanup observability gate (Claude + Codex) ./run.sh smoke-model-switch [project-dir] # strict model-switch lock gate — same-backend + cross-backend reuse mismatch must throw ModelSwitchLockedError (issue #14) ./run.sh smoke-entwurf-resume [project-dir] # bridge-level entwurf-style continuity gate (Claude=resume, Codex=load) ./run.sh check-bridge # pi-tools-bridge direct MCP smoke + protocol/negative-path test.sh (backend tool-callability lives in smoke-async-resume + sentinel) ./run.sh check-pi-tools-bridge-boot # deterministic gate (5d-5-pre, G1a/G1b, IN pnpm check): boot start.sh under strip-types + assert v2 fence graph loads + entwurf_v2 registered/schema; tools/list only, no auth/side-effect ./run.sh check-native-async # pi-native async entwurf spawn smoke (pi -e pi-extensions/entwurf.ts) ./run.sh sentinel [args...] # entwurf 6-cell diagonal matrix (sync+resume × parent×target) ./run.sh session-messaging [args...] # 4-case session-messaging smoke (native/ACP cross-matrix) ./run.sh smoke-compaction-policy [--step=NN] # 0.5.0 compaction-policy verification (LIVE=1 to include backend observation steps) ./run.sh check-mcp # local deterministic check of normalizeMcpServers() — no Claude/ACP subprocess ./run.sh check-model-lock # deterministic unit test for pi-extensions/model-lock.ts (4-quadrant + edge cases, no API) ./run.sh check-shell-quote # POSIX-safety gate for shellQuote (remote SSH arg quoting in entwurf paths) — source parity + behavior matrix, no SSH ./run.sh check-entwurf-session-identity # deterministic gate for locked garden session identity & name grammar (sessionId/buildSessionName/parse/collision), no API ./run.sh check-meta-session # deterministic gate (#30 step 2): meta-record mint/serialize/parse + scanByNativeId body-authority + idempotent decideUpsert, no API ./run.sh check-meta-record-v2 # deterministic golden gate (0.11 Stage 0 step 3A): synthetic v1 fixture → normalizeMetaIdentity v2 identity golden + dual-read version fences, no API ./run.sh check-mailbox-receipt-state # deterministic gate (0.11 Stage 0 step 3B): mailbox receipt state schema + store (stamp→persist→read-back) in a temp mailbox, strict keyset, no API ./run.sh check-entwurf-capabilities # deterministic gate (0.11 Stage 0 step 3C): backend capability registry (pi/entwurf-capabilities.json) — coverage==META_BACKENDS_V2 + agrees with live META_BACKEND_DESCRIPTORS + strict keyset, no API ./run.sh check-meta-dual-read # deterministic gate (0.11 Stage 0 step 3D-1): v2 write shape (serializeMetaIdentity) + dual-read dispatcher (parseMetaRecordAny/parseMetaIdentity) + write→read round-trip, pure, no API ./run.sh check-meta-mailbox-state-write # deterministic gate (0.11 Stage 0 step 3D-4 commit2): post-cut receipt is state-only — meta-record file byte-identical across enqueue/read, state carries lastEnqueuedAt/lastReadAt (field isolation), empty inbox no-op on record+state, drift surfaces; no API ./run.sh check-meta-receiver-marker # deterministic gate (SE-2 slice 2b): meta-receiver presence marker — write/read round-trip garden-id keyed + atomic 0600, dead-owner start-key guard reads null, armProvenance limited to arm-capable events (UserPromptSubmit can't mint presence), reader doesn't gate on record existence ./run.sh check-meta-migration # deterministic gate (0.11 Stage 0 step 3D-4 commit2): v1→v2 delivery-receipt migration (per-field state-wins, 3 timestamps, no-op when nothing to fill) + crash-order inside upsert (migrate before v2 rewrite; drift throws with record still v1), no API ./run.sh check-meta-dual-consumers # deterministic gate (0.11 Stage 0 step 3D-4): delivery-agnostic dual-read seam — readMetaIdentityByGardenId + scanIdentityByNativeId read v1 AND v2, cross-schema duplicate = ambiguity throw (G1); v1-only raw readers remain for v1-fixture gates, no API ./run.sh check-meta-capability-source # deterministic gate (0.11 Stage 0 step 3D-3): capability-source cut-over — mint/parse read wakeMode/deliveryLevel from the registry (metaCapabilityFor, registry-driven via injection), not META_BACKEND_DESCRIPTORS; behaviour-preserving (registry ≡ const), slot stays (3D-4), no API ./run.sh check-socket-probe # deterministic gate (0.11 Stage 0, F3): three-valued control-socket liveness (alive|dead|indeterminate) — GC reclaims dead only, indeterminate survives; pure classify + 2-socket integration, no API ./run.sh check-project-trust-handler # deterministic gate (0.11 Stage 0, Trust 2층): project_trust handler — decideProjectTrust matrix (escape=inherited-false+interactive+trust-here→{yes,remember:true}; non-interactive→undecided; never undefined) + adapter single-writer, fake prompt, no UI ./run.sh check-entwurf-v2-contract # deterministic gate (0.11 Stage 0 step 4-pre, 동결결정 10 + Fable R1-R5): FROZEN entwurf_v2 contract — R1 backend liveness domain (pi only; claude/codex/agy=unsupported, not folded), 6-cell intent×liveness table (single verdict, 2 allow/4 reject), N1 indeterminate-no-spawn, Q2 owned-live-no-autosend, R3 table↔receipt round-trip, R5 taxonomy, schema↔types drift; pure, no API ./run.sh check-entwurf-v2-lock # deterministic gate (0.11 Stage 0 step 5a, 버킷 B F2): per-gid dispatch LOCK primitive — openSync wx atomic acquire, second-acquire=target-locked conflict (holder JSON for human cleanup), nonce-owned release (successor survives late release), stale reclaim same-host+ESRCH-only (EPERM/remote/alive/unknown fail-closed), empty/corrupt=conflict not auto-deleted, F2-P1 malformed gid throws; real temp dir, deps injected ./run.sh check-entwurf-v2-decider # deterministic gate (0.11 Stage 0 step 5b): PURE dispatch decider decideDispatch — frozen 7-step order over injected fakes, lock acquire+release tracked so reject⇒no-plan-no-lock proven; pre-probe rejects observedLiveness=null, send/resume execute keep lock + mailbox no-lock (？7), resume plan no mode/provider/model, invalid gid throws (F2-P1); pure, no IO ./run.sh check-entwurf-v2-matrix # deterministic gate (0.11 Stage 0 step 5d-5 a): REACHABILITY + LOCK SSOT table — drives REAL decideDispatch over fakes, fixes every (target kind → transport → lock class) cell as one table (control-socket/meta-mailbox/spawn-bg + bad-target/conflict/locked/undeliverable/owned-live/dormant/indeterminate rejects), coverage pass fails on a dropped cell; thin coverage not a decider re-impl; pure, no IO ./run.sh check-entwurf-v2-release # deterministic gate (0.11 Stage 0 step 5c-1): PURE release-policy reducer (decideReleasePolicy + reduceRelease) — Fable-3 release-after-observation as a state machine; spawn-started is NOT a release event, release on first socket-alive ∨ child-exited (any code) or failed start, socket↔exit race idempotent (single release), lock-nullness invariant enforced; pure, no IO ./run.sh check-entwurf-v2-send # deterministic gate (0.11 Stage 0 step 5c-2a): control-socket SEND hand (executeControlSocketSend) wiring transport IO onto the 5c-1 reducer — ack→sent, in-band reject→rejected (no fallback), dead→same-lock one-shot re-resolve (control retry / mailbox enqueue), indeterminate→failed+rethrow with NO fallback (no double-delivery); release exactly once, releaseLock throw never masks the send error; IO-via-dep ./run.sh check-entwurf-v2-send-fallback # deterministic gate (0.11 Stage 0 step 5c-2b): same-lock re-resolve RESOLVER (resolveDeadControlSendFallback) — fire-and-forget re-resolve: alive→control retry, dead→reject (NEVER spawn-bg), indeterminate→reject, unsupported+deliverable→mailbox plan, undeliverable/bad-target/conflict→reject; resolver never releases, mis-wire fails loud, inspect/probe throws propagate; no IO (fakes) ./run.sh check-entwurf-v2-runner # deterministic gate (0.11 Stage 0 step 5d-1): execute-router (executeDispatch) routing an already-decided DispatchDecision to its 5c transport hand → one outcome-rich EntwurfV2RunResult. reject→rejected (no hand) / control/spawn/mailbox→matching hand with decision.lock verbatim / spawn lock-retained rides executed (fail-closed) / N3 rejectReason carried / N1 SendDeliveredReleaseFailedError→execution-failed{finalizedOutcome,releaseFailed,retrySafe:false}; fake hands, no IO ./run.sh check-entwurf-v2-mailbox # deterministic gate (0.11 Stage 0 step 5c-4, LAST 5c transport slice): ENQUEUE-ONLY meta-mailbox SEND body (executeMetaMailboxSend) + production sendViaMailbox adapter — sender→formatMetaMailboxBody with plan.wantsReply threaded (divergence from legacy hard false), sender absent→raw plan.message, enqueue opts EXACTLY {gardenId,body,sessionsDir,mailboxDir}, enqueue throw PROPAGATES (no success:false fold — mailbox has no in-band refuse); adapter NEVER touches lock (release is the hand's job); source guard: no release/routing seam ./run.sh check-entwurf-v2-spawn # deterministic gate (0.11 Stage 0 step 5c-3a): spawn-bg RESUME watcher hand (executeSpawnBgResume) wiring spawn + socket-observe IO onto the 5c-1 reducer — Fable-3: TIMEOUT IS NOT A RELEASE (bare observeTimeout→killChild, release 0; bounded killGrace then real socket-alive ∨ child-exited releases ×1); spawnChild throw→spawn-start-failed; no observation obtainable (grace elapses / post-spawn watch dep throws)→lock-retained fail-closed (released:false, evidence surfaced), NO direct-release hatch; IO-via-dep, controlled promises ./run.sh check-entwurf-resume-args # deterministic gate (0.11 Stage 0 step 5c-3b): resume-argv SSOT (buildResumePiArgs) shared by the legacy async worker and the v2 spawn-bg resident citizen — A1: legacy=--no-extensions + no --entwurf-control (one-shot pi -p exits), v2-control=--entwurf-control + no --no-extensions (keep-alive is the goal, resumed session stays addressable); BOTH keep --mode json -p + prompt-as-turn (-p NOT dropped in v2); explicitExtensionArgs preserved once (#29); v2 includes plan.launchArgs (--approve); null provider→no --provider; no cross-contamination ./run.sh check-entwurf-v2-spawn-production # deterministic gate (0.11 Stage 0 step 5c-3c): production SpawnBgResumeDeps factory (makeProductionSpawnBgResumeDeps) wiring the 5c-3a watcher's 6 IO seams — no real pi/socket/timer (that=opt-in smoke-entwurf-v2-spawn-live, OUT of pnpm check). socketWatchVerdict: address-conflict→forged (reject, never wait)/alive→alive/dead·indeterminate→wait; spawnChild builds v2-control argv (--entwurf-control, no --no-extensions, -p+prompt, --approve, cwd authority); awaitSocketAlive connectable→resolve / symlink→reject without connect / dead→wait→alive / abort-clears; awaitChildExit code + listener cleanup; awaitTimeout schedule + abort-clear; killChild=SIGTERM; proc-less child fails loud ./run.sh check-entwurf-v2-only # deterministic gate (0.11.0 B): PI_SHELL_ACP_V2_ONLY=1 v2-only mode — pure helper (exact "1" only; off→allowed / on→blocked w/ flag+entwurf_v2+"unavailable"; assert throws on) + all 10 v1 entrypoint guards (9 surface groups; /entwurf tool+command) + MCP entwurf_resume & control-RPC spawn_async_resume BOTH guarded + v2 core flag-clean. v1 NOT deleted (호출 차단만; 0.12 lane). ./run.sh smoke-entwurf-v2-spawn-live # LIVE phase gate (0.11 Stage 0 step 5c-3c, D5) — OUT of pnpm check, needs LIVE=1. Exercises the production SpawnBgResumeDeps against REAL OS objects: S1 real unix socket → awaitSocketAlive resolves (real lstat+probe), symlink→forged, absent→abort settles; S2 real child → spawn-event resolve + SIGTERM kill + exit-code capture; S3 watcher integration → real timeout→kill→child-exited→release ×1. Does NOT spawn a real pi resume (that=5d matrix). Run before 5d: LIVE=1 ./run.sh smoke-entwurf-v2-spawn-live ./run.sh smoke-entwurf-v2-spawn-resume-live # 0.11.0 (A) ACCEPTANCE gate — OUT of pnpm check, needs LIVE=1. The FULL spawn-bg resident lifecycle: mint backend=pi identity → seed a REAL dormant pi session (one-shot into ~/.pi/agent/sessions) → runEntwurfV2(owned-outcome) routes dormant→spawn-bg resume → a REAL detached pi --entwurf-control child stands its socket up, resumes, DOES a model turn. Asserts executed/spawn-bg/socket-alive/released + lock released ×1 + no lock file + pid alive + socket connectable + resume USER & assistant OK nonces in the session JSONL. Model-in-loop IN. The gate v1 deprecation (0.12) is predicated on. Model: PI_SHELL_ACP_LIVE_TARGET=/ (default openai-codex/gpt-5.4). LIVE=1 ./run.sh smoke-entwurf-v2-spawn-resume-live ./run.sh smoke-entwurf-v2-matrix-live # LIVE sentinel (0.11 Stage 0 step 5d-5, D4-b) — OUT of pnpm check, needs LIVE=1. Drives REAL production runEntwurfV2 deps over REAL OS objects, 4 cells: C1 control-socket (real pi --entwurf-control resident → RPC send → lock acquire→release ×1), C1b socket-only (record-less live pi → control-socket sent / owned→bad-target, A1 narrow), C2 meta-mailbox deliverable (armed self-fetch citizen → real .msg enqueue, lock-free), C3 meta-mailbox guard (no armed receiver → reject, no garbage). Model-in-loop OUT (transport/lock/enqueue gate, GPT Q2); negative/timeout stay deterministic. Model: PI_SHELL_ACP_LIVE_TARGET=/ (default openai-codex/gpt-5.4). LIVE=1 ./run.sh smoke-entwurf-v2-matrix-live ./run.sh check-entwurf-facts # deterministic gate (0.11 Stage 0 step 4, fact-provider slice 1+2): PURE PeerFact core + resolveFactList union — R1 out-of-domain→unsupported, R3b pi 4-value, facts-only keyset; union: PeerFact+SocketOnlyFact by gardenId, dormant→dead, F3 indeterminate preserved, non-pi+socket fail-loud; pure, no IO ./run.sh check-socket-discovery # deterministic gate (0.11 Stage 0 step 4, fact-provider slice 3): SOCKET-axis scanSocketProbes — probes (dir sockets) ∪ (in-domain citizen canonical paths) 3-valued; dormant citizen no-file → dead (resumable, not unprobed), stall → indeterminate (F3), dir hygiene/dedup/missing-dir + e2e → resolveFactList; readdir/probe injected, no IO ./run.sh check-meta-listing # deterministic gate (0.11 Stage 0 step 4, fact-provider slice 4a): META-STORE axis listAllMetaIdentities — explicit-partial: parse failure / body-filename drift → explicit {filename,message} error (verbatim, no synthetic fields), valid records still listed (corrupt doesn't blind); mode strict throws / collect partial; entries/readRecord injected, no IO ./run.sh check-entwurf-fact-provider # deterministic gate (0.11 Stage 0 step 4, fact-provider slice 4b): ASSEMBLY listEntwurfFacts — listAllMetaIdentities→scanSocketProbes→pre-quarantine non-pi/socket conflicts→resolveFactList(clean)→{facts,diagnostics}; C-원칙: expected corruption (parse/collision)→diagnostics (listing survives), impossible invariant (dup/unprobed)→throw; collision quarantines BOTH PeerFact+socket; deps injected, no IO ./run.sh check-entwurf-peers-surface # deterministic gate (0.11 Stage 0 step 4, fact-provider slice 4c): MCP entwurf_peers RENDER renderEntwurfPeers — legacy `sessions` = projection of facts (alive only, no 2nd scan), socketPath via controlSocketPath (SSOT), count=projection length, three distinct arrays, NO verb-routing key (JSON deep scan) NOR word (text), diagnostics both surfaces, empty→(none), unsupported shown, enrich→(not enriched); WIRING guard: bridge calls provider+render, getLiveSessions gone; facts fabricated, no IO ./run.sh check-entwurf-send-mailbox-fallback # deterministic gate: pi-native entwurf_send → meta-bridge mailbox fallback (transport 2). ENOENT/ECONNREFUSED falls back, timeout does not; shared formatMetaMailboxBody; no-socket citizen → .msg enqueue; WIRING guard (pi-native calls fallback, bridge uses shared formatter) ./run.sh check-entwurf-self-address # deterministic gate (SE-1/SE-2 slice 1): self-addressability honesty predicate computeSelfAddressability — pi replyable ⟺ live socket; meta ⟺ recordBacked ∧ ownerAlive ∧ watchArmed (regression-proof record-present rows); SOURCE GUARD buildStrictPiSenderEnvelope drops hardcoded replyable:true + existsSync-probes socket, entwurf_self renders alive vs expected. meta watchArmed wired in slice 2 (same release block) ./run.sh check-entwurf-deliverability # deterministic gate (SE-1/SE-2 slice 2c): conversational-mailbox deliverability predicate — computeMetaReceiverActive (recordBacked ∧ ownerAlive ∧ watchArmed) + mailboxConversationalDeliverable (self-fetch AND active); direct-inject pi refused (SE-1), self-fetch dead/unarmed refused (SE-2); self-address shares the same atom ./run.sh check-entwurf-mailbox-guard # deterministic gate (SE-1/SE-2 slice 2d): guarded mailbox enqueue — PURE 0-call (undeliverable target leaves injected enqueue uncalled) + TMPDIR snapshot (refused send leaves mailbox byte-identical, accepted writes one .msg) + fact gathering from record/capability/receiver-marker ./run.sh check-plugin-empty-final-recovery # deterministic recovery-decision gate for plugins/openclaw/src/index.ts (issue #20 — no pi process) ./run.sh check-plugin-prompt-format # deterministic shape gate for buildConversationPrompt + stripChatCompletionTail (issue #20 follow-up leak) ./run.sh check-async-resume-gate # deterministic gate for MCP entwurf_resume mode resolution + replyable gate + cwd silent-ignore (0.7.6, 16 assertions) ./run.sh check-package-source-routing # deterministic gate (#29): package-source -> install-root mapping + fail-fast routing (local/git/npm/missing/project/no-source × local+remote, self-root, resume), no backend ./run.sh smoke-installed-entwurf-acp # live counterpart (#29): git+npm package sources plus packed tarball resolve + a --no-extensions child registers provider pi-shell-acp (credential-free --list-models proof) ./run.sh smoke-session-id-name # live 3-turn substrate smoke (Phase 3a): Pi 0.78 --session-id/--name through the bridge — header id/cwd, session_info name, append-not-recreate, spawn-only name, wrong-cwd footgun evidence ./run.sh smoke-async-resume [backends...] # live async-resume smoke (Claude+Codex+Gemini + handler/external cases), required before entwurf releases ./run.sh new-session-id # print one fresh garden-native session id for operator launchers (--session-id) ./run.sh smoke-resident-garden-guard # live resident --entwurf-control garden guard (negative 0-token; SMOKE_RGG_POSITIVE=1 for positive) ./run.sh smoke-meta-async-drift # 1.0.0 meta-bridge step 1: drift sentinel — version pins + Claude binary undocumented-behavior markers (LIVE=1 adds plugin watch-arm probe) ./run.sh smoke-meta-honesty # 1.0.0 meta-bridge: honesty regression gate (#30 blockers) — doorbell counts ALL msgs honestly + hook logs failures as ERROR (best-effort, no scream). Offline/deterministic (deps: bash+node+python3) ./run.sh smoke-meta-install-state # 1.0.0 meta-bridge Phase 2: stateful install/uninstall + store-doctor regression gate. Offline/deterministic (deps: bash+node+python3) ./run.sh smoke-meta-prune # 1.0.0 meta-bridge Phase 4: listing-only store janitor regression gate — classify keep/orphan/stale/ambiguous, delete nothing. Offline/deterministic (deps: bash+node) ./run.sh smoke-meta-keyset-guard # 0.10.0 meta-bridge: keyset-owner guard regression — check-keyset-overlap + managed-keys SSOT (disjoint passes, collisions fail). Offline/hermetic (deps: bash+python3) ./run.sh smoke-meta-mailbox # 0.10.0 meta-bridge defense C: mailbox messaging E2E — entwurf_send fallback → enqueue → entwurf_inbox_read + receipt (replyable/external), zero Claude turns. Offline/hermetic (deps: bash+node+python3) ./run.sh smoke-meta-sender-identity # 0.10.0 meta-bridge: native SENDER identity E2E — parent-pid sender marker promotes anonymous MCP send to replyable meta-session (garden-id), REQUIRE_META_SENDER refuses anonymous. Offline/hermetic (deps: bash+node+python3) ./run.sh install-meta-bridge # 1.0.0 meta-bridge Phase 2: stateful GLOBAL install (plugin + USER MCP + settings keyset, honest uninstall state) ./run.sh uninstall-meta-bridge # 1.0.0 meta-bridge Phase 2: stateful GLOBAL uninstall (restore only keys/items captured in install-state) ./run.sh doctor-meta-bridge # 1.0.0 meta-bridge Phase 2: fail-loud doctor — toolchain + state + plugin/MCP + store scan + hook errors + SessionStart evidence + writer-version parity (source↔assembled↔installed: FAIL on a stale deployed meta-record writer) ./run.sh meta-bridge-prune # 1.0.0 meta-bridge Phase 4: LISTING-ONLY store hygiene — classify orphan/stale/ambiguous/keep, print manual rm commands, delete NOTHING ([dir] [--ttl-days N]) ./run.sh meta-bridge-managed-keys # 0.10.0 meta-bridge: print the SSOT of settings keys pi-shell-acp OWNS (consumers read this to stay disjoint — keyset-owner invariant) ./run.sh check-keyset-overlap # 0.10.0 meta-bridge: PREVENTIVE keyset guard — fail if a consumer fragment collides with any pi-owned key (cross-repo; not in pnpm check) ./run.sh check-backends # local deterministic check of backend launch resolution + backend-specific _meta shape ./run.sh check-registration # local deterministic check of per-runtime provider registration semantics ./run.sh check-dep-versions # local deterministic check that version pins (package.json/run.sh/README.md + pi devDeps/peer pins) agree ./run.sh check-auth-boundary # local deterministic guard (#26): no legacy-ENV apiKey literal (e.g. "ANTHROPIC_API_KEY") in root bridge code (index.ts/acp-bridge.ts) ./run.sh check-sdk-surface # static gate: every (connection as any) cast in acp-bridge.ts is annotated SDK_CAST_OK or SDK_CAST_DEBT ./run.sh check-pack # publish gate (dry-run): npm pack --dry-run + tarball invariants (runtime-critical present, dev residue absent) ./run.sh check-pack-install # heavy publish gate (prepublishOnly): actual npm pack + tar -tf + fresh-temp install smoke with 0.79.x peers ./run.sh check-models # local deterministic check of MODELS contextWindow defaults (sonnet 200K, opus 1M) + override ./run.sh check-claude-sessions [project-dir] # compare pi persisted sessions vs Claude SDK session visibility ./run.sh verify-resume [project-dir] # exact pi -> ACP -> Claude continuity check with visible acpSessionId diagnostics ./run.sh verify-transcript-poison # deterministic smoke for #12 (classifier + persisted-record invalidation, no API) ./run.sh sync-auth # copy ~/.pi/agent/auth.json anthropic OAuth credentials to pi-shell-acp alias ./run.sh install [project-dir] # install this local package into project .pi/settings.json ./run.sh setup:links [--force] # repair ~/.pi/agent/entwurf-targets.json link (use --force to replace a stale operator file or wrong symlink; a .bak is taken) ./run.sh remove [project-dir] # remove pi-shell-acp entries from project .pi/settings.json Notes: - project-dir defaults to current directory - Claude Code login should already exist (e.g. ~/.claude.json) - smoke-all is the operator-facing runtime verification path — claude-only as of the 0.11.0 floor (codex/gemini via smoke-codex / smoke-gemini) — and is what setup runs - API key is optional; this bridge is intended to work with Claude Code auth EOF } require_cmd() { command -v "$1" >/dev/null 2>&1 || { echo "Missing command: $1" >&2 exit 1 } } log() { echo " $*"; } ok() { echo " ✅ $*"; } warn() { echo " ⚠ $*"; } fail() { echo " ❌ $*"; } section() { echo ""; echo "=== $* ==="; } normalize_project_dir() { python3 - "$1" <<'PY' import os, sys print(os.path.abspath(os.path.expanduser(sys.argv[1]))) PY } sync_auth() { local auth_path="$HOME/.pi/agent/auth.json" python3 - "$auth_path" "$PROVIDER_ID" <<'PY' import json, os, sys from pathlib import Path auth_path = Path(sys.argv[1]).expanduser() provider_id = sys.argv[2] auth_path.parent.mkdir(parents=True, exist_ok=True) if auth_path.exists(): data = json.loads(auth_path.read_text()) if not isinstance(data, dict): raise SystemExit("auth.json is not an object") else: data = {} anthropic = data.get("anthropic") if not isinstance(anthropic, dict): print("sync-auth: skipped (no anthropic OAuth credentials in ~/.pi/agent/auth.json)") raise SystemExit(0) before = json.dumps(data.get(provider_id), sort_keys=True) after = json.dumps(anthropic, sort_keys=True) if before == after: print(f"sync-auth: already synced ({provider_id})") raise SystemExit(0) data[provider_id] = anthropic backup = auth_path.with_suffix(auth_path.suffix + ".bak") if auth_path.exists(): backup.write_text(auth_path.read_text()) auth_path.write_text(json.dumps(data, indent=2) + "\n") print(f"sync-auth: wrote {provider_id} alias to {auth_path}") if backup.exists(): print(f"sync-auth: backup -> {backup}") PY } install_local_package() { local project_dir project_dir=$(normalize_project_dir "$1") mkdir -p "$project_dir/.pi" python3 - "$project_dir/.pi/settings.json" "$REPO_DIR" <<'PY' import json, sys from pathlib import Path settings_path = Path(sys.argv[1]) repo_dir = str(Path(sys.argv[2]).resolve()) settings_path.parent.mkdir(parents=True, exist_ok=True) if settings_path.exists(): data = json.loads(settings_path.read_text()) if not isinstance(data, dict): raise SystemExit("settings.json is not an object") else: data = {} # --- packages[] registration -------------------------------------------------- packages = data.get("packages") if not isinstance(packages, list): packages = [] filtered = [] for item in packages: source = item.get("source") if isinstance(item, dict) else item if isinstance(source, str) and ("pi-shell-acp" in source) and source != repo_dir: continue filtered.append(item) if repo_dir not in filtered: filtered.append(repo_dir) data["packages"] = filtered # --- piShellAcpProvider.mcpServers bundled entries --------------------------- # Ship the two in-repo MCP adapters pre-wired so `pi install` produces a # working setup without the consumer hand-editing settings.json. User-authored # overrides (different command/args) are preserved untouched. provider = data.setdefault("piShellAcpProvider", {}) if not isinstance(provider, dict): raise SystemExit("piShellAcpProvider is not an object") servers = provider.setdefault("mcpServers", {}) if not isinstance(servers, dict): raise SystemExit("piShellAcpProvider.mcpServers is not an object") BUNDLED = ("pi-tools-bridge",) # 0.4.14: session-bridge MCP was retracted (issue #7 — unified entwurf surface). # Install path now only wires pi-tools-bridge. Existing operator settings that # carry the old bundled session-bridge entry are pruned below during install; # `./run.sh remove` also keeps session-bridge in its cleanup tuple so legacy # entries are removed on uninstall too. for name in BUNDLED: desired_cmd = f"{repo_dir}/mcp/{name}/start.sh" desired = {"command": desired_cmd, "args": []} existing = servers.get(name) if existing is None: servers[name] = desired print(f"install: added piShellAcpProvider.mcpServers.{name}") elif isinstance(existing, dict) and existing.get("command") == desired_cmd: # Already managed by us at the current repo path. Add the default args # field when missing, but never overwrite user-customized args. if "args" not in existing: existing["args"] = [] print(f"install: normalized piShellAcpProvider.mcpServers.{name}.args -> []") elif existing.get("args") != []: print(f"install: preserved piShellAcpProvider.mcpServers.{name}.args (custom args)") else: cmd_repr = existing.get("command") if isinstance(existing, dict) else existing print(f"install: preserved piShellAcpProvider.mcpServers.{name} (user override: {cmd_repr})") # 0.4.14 migration: also prune any session-bridge entry that this install wrote # in a prior version. We only remove entries whose command matches the bundled # start.sh path (user-customized commands are left alone). LEGACY_BUNDLED = ("session-bridge",) for name in LEGACY_BUNDLED: existing = servers.get(name) if not isinstance(existing, dict): continue cmd = existing.get("command") if not isinstance(cmd, str): continue if cmd == f"{repo_dir}/mcp/{name}/start.sh" or cmd.endswith(f"/pi-shell-acp/mcp/{name}/start.sh"): del servers[name] print(f"install: pruned legacy piShellAcpProvider.mcpServers.{name} (retracted in 0.4.14, issue #7)") settings_path.write_text(json.dumps(data, indent=2) + "\n") print(f"install: updated {settings_path}") print(f"install: package source -> {repo_dir}") PY ensure_agent_dir_symlinks } # Ensure agent-level resources that pi-shell-acp code reads from # ~/.pi/agent/ are wired up at install time. Currently: # - entwurf-targets.json — pi-extensions/lib/entwurf-core.ts reads # ~/.pi/agent/entwurf-targets.json. The package ships the canonical # version at $REPO_DIR/pi/entwurf-targets.json. Without this symlink # any entwurf tool call throws EntwurfRegistryError (lazy load — no # surface during plain `pi --model ...` runs but blocks delegation # immediately when the operator first calls entwurf). # # Fail-fast policy (added v0.5.0): drift between canonical and the # operator's link/file is treated as a bug to fix, not noise to preserve. # The v0.4.x oracle install regression came from a stale operator-copied # entwurf-targets.json that previous installs silently kept "as operator # file"; the only signal was a sentinel failure several minutes later. # # Two explicit exits are honored: # 1. `./run.sh setup:links --force` — back up + overwrite with canonical # 2. `PI_ENTWURF_TARGETS_PATH=/path/to/custom.json` — tells entwurf-core # to read elsewhere, freeing this slot from any policy obligation # # Idempotent for the happy path. Lazy registry load means a corrected # file/symlink is picked up on the next entwurf call without restarting # pi or the MCP bridge process. ensure_agent_dir_symlinks() { local agent_dir="$HOME/.pi/agent" mkdir -p "$agent_dir" local target="$REPO_DIR/pi/entwurf-targets.json" local link="$agent_dir/entwurf-targets.json" local force="${1:-}" if [ ! -f "$target" ]; then fail "canonical registry missing at $target — repo install is broken" exit 1 fi if [ -L "$link" ]; then local current current=$(readlink "$link") if [ "$current" = "$target" ]; then return 0 # already correct, silent fi if [ "$force" = "--force" ]; then rm -f "$link" ln -s "$target" "$link" echo "install: relinked $link -> $target (was -> $current)" return 0 fi fail "stale entwurf-targets symlink at $link" echo " points to: $current" >&2 echo " expected: $target" >&2 echo " Fix with one of:" >&2 echo " ./run.sh setup:links --force # relink to canonical" >&2 echo " export PI_ENTWURF_TARGETS_PATH=$current # honor your override explicitly" >&2 exit 1 fi if [ -e "$link" ]; then if cmp -s "$link" "$target"; then return 0 # operator copy is byte-identical to canonical, silent fi if [ "$force" = "--force" ]; then local backup="${link}.bak.$(date +%Y%m%d-%H%M%S)" mv "$link" "$backup" ln -s "$target" "$link" echo "install: replaced stale $link with symlink -> $target (backup: $backup)" return 0 fi fail "stale entwurf-targets file at $link (drifts from canonical)" echo " canonical: $target" >&2 echo " diff (link vs canonical):" >&2 diff -u "$link" "$target" | sed 's/^/ /' >&2 || true echo " Fix with one of:" >&2 echo " ./run.sh setup:links --force # back up + replace with symlink to canonical" >&2 echo " export PI_ENTWURF_TARGETS_PATH=$link # honor your file as an explicit override" >&2 exit 1 fi ln -s "$target" "$link" echo "install: linked $link -> $target" } remove_local_package() { local project_dir project_dir=$(normalize_project_dir "$1") python3 - "$project_dir/.pi/settings.json" "$REPO_DIR" <<'PY' import json, sys from pathlib import Path settings_path = Path(sys.argv[1]) repo_dir = str(Path(sys.argv[2]).resolve()) if not settings_path.exists(): print(f"remove: nothing to do ({settings_path} missing)") raise SystemExit(0) data = json.loads(settings_path.read_text()) if not isinstance(data, dict): raise SystemExit("settings.json is not an object") # --- packages[] cleanup ------------------------------------------------------- packages = data.get("packages") pkg_removed = 0 if isinstance(packages, list): filtered = [] for item in packages: source = item.get("source") if isinstance(item, dict) else item if isinstance(source, str) and ("pi-shell-acp" in source): pkg_removed += 1 continue filtered.append(item) data["packages"] = filtered # --- piShellAcpProvider.mcpServers cleanup ------------------------------------ # Only remove entries that look like they came from ./run.sh install: either # the command matches the current $REPO_DIR anchor exactly, or it ends with # the bundled "/pi-shell-acp/mcp//start.sh" pattern (covers a rebuilt # checkout under a different directory). Anything else is treated as a user # override and left in place. BUNDLED = ("pi-tools-bridge", "session-bridge") provider = data.get("piShellAcpProvider") mcp_removed = 0 if isinstance(provider, dict): servers = provider.get("mcpServers") if isinstance(servers, dict): for name in BUNDLED: existing = servers.get(name) if not isinstance(existing, dict): continue cmd = existing.get("command") if not isinstance(cmd, str): continue exact = cmd == f"{repo_dir}/mcp/{name}/start.sh" pattern = cmd.endswith(f"/pi-shell-acp/mcp/{name}/start.sh") if exact or pattern: del servers[name] mcp_removed += 1 print(f"remove: removed piShellAcpProvider.mcpServers.{name}") else: print(f"remove: preserved piShellAcpProvider.mcpServers.{name} (user override: {cmd})") if not servers: provider.pop("mcpServers", None) if not provider: data.pop("piShellAcpProvider", None) settings_path.write_text(json.dumps(data, indent=2) + "\n") print(f"remove: removed {pkg_removed} packages[] entries, {mcp_removed} mcpServers entries from {settings_path}") PY } smoke_test() { local project_dir backend model model_id session_key project_dir=$(normalize_project_dir "$1") backend=${2:-${PI_SHELL_ACP_BACKEND:-claude}} model=${3:-${PI_SHELL_ACP_MODEL:-}} if [[ -z "$model" ]]; then case "$backend" in claude) model="pi-shell-acp/claude-sonnet-4-6" ;; codex) model="pi-shell-acp/gpt-5.4" ;; gemini) model="pi-shell-acp/gemini-3.1-pro-preview" ;; *) echo "[smoke] unknown backend: $backend" >&2 exit 1 ;; esac fi model_id=${PI_SHELL_ACP_MODEL_ID:-$model} if [[ "$model_id" == "$PROVIDER_ID/"* ]]; then model_id=${model_id#${PROVIDER_ID}/} fi session_key="run-sh-smoke:${backend}:${model_id}" require_cmd pi echo "[smoke] project: $project_dir" echo "[smoke] repo: $REPO_DIR" echo "[smoke] backend: $backend" echo "[smoke] model: $model" echo "[smoke] model-id: $model_id" echo "[smoke] session-key: $session_key" (cd "$project_dir" && pi -e "$REPO_DIR" --list-models pi-shell-acp >/dev/null) echo "[smoke] provider models: ok" ( cd "$REPO_DIR" PI_SHELL_ACP_SMOKE_BACKEND="$backend" PI_SHELL_ACP_MODEL_ID="$model_id" PI_SHELL_ACP_SMOKE_SESSION_KEY="$session_key" node --input-type=module <<'EOF' import { ensureBridgeSession, sendPrompt, setActivePromptHandler, closeBridgeSession, normalizeMcpServers } from './acp-bridge.ts'; const sessionKey = process.env.PI_SHELL_ACP_SMOKE_SESSION_KEY || 'run-sh-smoke'; const backend = process.env.PI_SHELL_ACP_SMOKE_BACKEND; const modelId = process.env.PI_SHELL_ACP_MODEL_ID || 'claude-sonnet-4-6'; if (!backend) { throw new Error('PI_SHELL_ACP_SMOKE_BACKEND is required for direct ensureBridgeSession smoke.'); } const emptyMcpHash = normalizeMcpServers(undefined).hash; const session = await ensureBridgeSession({ sessionKey, cwd: process.cwd(), backend, modelId, systemPromptAppend: '간단히 답하세요.', settingSources: ['user'], strictMcpConfig: false, mcpServers: [], tools: ['Read', 'Bash', 'Edit', 'Write'], skillPlugins: [], permissionAllow: ['Read(*)', 'Bash(*)', 'Edit(*)', 'Write(*)', 'mcp__*'], disallowedTools: [], codexDisabledFeatures: [], bridgeConfigSignature: JSON.stringify({ backend, appendSystemPrompt: false, settingSources: ['user'], strictMcpConfig: false, mcpServersHash: emptyMcpHash }), contextMessageSignatures: [`smoke:${backend}:user:ok만 답하세요.`], }); let text = ''; setActivePromptHandler(session, (event) => { if (event.type !== 'session_notification') return; const update = event.notification.update; if (update?.sessionUpdate === 'agent_message_chunk' && update.content?.type === 'text') { text += update.content.text; } }); const result = await sendPrompt(session, [{ type: 'text', text: 'ok만 답하세요.' }]); setActivePromptHandler(session, undefined); await closeBridgeSession(sessionKey); if (result.stopReason !== 'end_turn') { throw new Error(`unexpected stopReason: ${result.stopReason}`); } if (!text.trim()) { throw new Error('empty bridge response'); } console.log(`[smoke] bridge response (${backend}/${modelId}): ${text.trim()}`); EOF ) echo "[smoke] bridge prompt: ok" } smoke_all() { local project_dir project_dir=$(normalize_project_dir "$1") # 0.11.0 release floor is claude-only (sonnet). Gemini (CLI deprecated) and # Codex live runtime smokes were dropped from the gate; the codex/gemini # smoke_test branches stay reachable for manual `./run.sh smoke-codex|gemini` # debugging, but the automated floor exercises Claude only. echo "[smoke-all] claude-only runtime verification starting (sonnet)" smoke_test "$project_dir" claude echo "[smoke-all] Claude runtime smoke: ok (codex/gemini live tests dropped — 0.11.0 claude-only floor)" } smoke_continuity_single() { local project_dir=$1 local backend=$2 local model=$3 local expected_path=$4 local session_file session_file=$(mktemp /tmp/pi-shell-acp-continuity-XXXXXX.jsonl) echo "[smoke-continuity/$backend] model=$model expected-turn2=$expected_path session=$session_file" local turn1_log if ! turn1_log=$(cd "$project_dir" && PI_SHELL_ACP_STRICT_BOOTSTRAP=1 pi -e "$REPO_DIR" --session "$session_file" --provider pi-shell-acp --model "$model" -p 'READY 만 답해' 2>&1); then echo "[smoke-continuity/$backend] turn1 pi invocation failed:" >&2 echo "$turn1_log" >&2 rm -f "$session_file" exit 1 fi if ! grep -q "^\[pi-shell-acp:bootstrap\] path=new backend=$backend" <<< "$turn1_log"; then echo "[smoke-continuity/$backend] turn1 expected path=new, got:" >&2 echo "$turn1_log" >&2 rm -f "$session_file" exit 1 fi echo "[smoke-continuity/$backend] turn1 path=new: ok" local turn2_log if ! turn2_log=$(cd "$project_dir" && PI_SHELL_ACP_STRICT_BOOTSTRAP=1 pi -e "$REPO_DIR" --session "$session_file" --provider pi-shell-acp --model "$model" -p 'OK 만 답해' 2>&1); then echo "[smoke-continuity/$backend] turn2 pi invocation failed (strict bootstrap throw?):" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi if ! grep -q "^\[pi-shell-acp:bootstrap\] path=$expected_path backend=$backend" <<< "$turn2_log"; then echo "[smoke-continuity/$backend] turn2 expected path=$expected_path, got:" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi if grep -q "^\[pi-shell-acp:bootstrap-invalidate\]" <<< "$turn2_log"; then echo "[smoke-continuity/$backend] turn2 unexpected invalidation on happy continuity:" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi echo "[smoke-continuity/$backend] turn2 path=$expected_path: ok" rm -f "$session_file" } smoke_continuity() { local project_dir project_dir=$(normalize_project_dir "$1") require_cmd pi echo "[smoke-continuity] claude-only persisted bootstrap gate (sonnet)" echo "[smoke-continuity] project: $project_dir" echo "[smoke-continuity] repo: $REPO_DIR" smoke_continuity_single "$project_dir" claude claude-sonnet-4-6 resume echo "[smoke-continuity] Claude(resume) continuity: ok (codex dropped — claude-only floor)" } smoke_cancel_single() { local project_dir=$1 local backend=$2 local model=$3 local backend_pattern case "$backend" in claude) backend_pattern="claude-agent-acp" ;; codex) backend_pattern="codex-acp" ;; *) echo "[smoke-cancel/$backend] unknown backend" >&2 exit 1 ;; esac echo "[smoke-cancel/$backend] model=$model pattern=$backend_pattern" local before before=$(pgrep -cf "$backend_pattern" 2>/dev/null) || before=0 echo "[smoke-cancel/$backend] baseline process count: $before" local log_file log_file=$(mktemp /tmp/pi-shell-acp-cancel-XXXXXX.log) local rc=0 ( cd "$REPO_DIR" PI_SHELL_ACP_SMOKE_BACKEND="$backend" PI_SHELL_ACP_MODEL_ID="$model" \ node --input-type=module 2>"$log_file" <<'EOF' import { ensureBridgeSession, sendPrompt, setActivePromptHandler, closeBridgeSession, cancelActivePrompt, normalizeMcpServers, } from './acp-bridge.ts'; const backend = process.env.PI_SHELL_ACP_SMOKE_BACKEND; const modelId = process.env.PI_SHELL_ACP_MODEL_ID; if (!backend || !modelId) throw new Error('backend/model env required'); const sessionKey = `smoke-cancel:${backend}:${modelId}`; const emptyMcpHash = normalizeMcpServers(undefined).hash; const baseParams = { sessionKey, cwd: process.cwd(), backend, modelId, systemPromptAppend: '간단히 답하세요.', settingSources: ['user'], strictMcpConfig: false, mcpServers: [], tools: ['Read', 'Bash', 'Edit', 'Write'], skillPlugins: [], permissionAllow: ['Read(*)', 'Bash(*)', 'Edit(*)', 'Write(*)', 'mcp__*'], disallowedTools: [], codexDisabledFeatures: [], bridgeConfigSignature: JSON.stringify({ backend, appendSystemPrompt: false, settingSources: ['user'], strictMcpConfig: false, mcpServersHash: emptyMcpHash, }), contextMessageSignatures: [`smoke-cancel:${backend}`], }; const session = await ensureBridgeSession(baseParams); let firstChunkSeen = false; let cancelled = false; setActivePromptHandler(session, async (event) => { if (event.type !== 'session_notification') return; const update = event.notification?.update; if (!update) return; if (update.sessionUpdate === 'agent_message_chunk' || update.sessionUpdate === 'agent_thought_chunk') { if (!firstChunkSeen) { firstChunkSeen = true; setTimeout(() => { if (!cancelled) { cancelled = true; void cancelActivePrompt(session); } }, 50); } } }); // fallback cancel in case no chunk ever arrives const failSafe = setTimeout(() => { if (!cancelled) { cancelled = true; void cancelActivePrompt(session); } }, 4000); failSafe.unref?.(); let longResult; try { longResult = await sendPrompt(session, [{ type: 'text', text: '1부터 300까지 숫자를 하나씩 새 줄에 적어주세요. 각 숫자 옆에 짧은 단어도 하나 붙여주세요.', }]); } catch (error) { longResult = { stopReason: 'threw', error: error instanceof Error ? error.message : String(error) }; } clearTimeout(failSafe); console.error(`[smoke-cancel] long prompt stopReason=${longResult.stopReason}`); // session reuse: short prompt must succeed after cancel setActivePromptHandler(session, () => { /* drop chunks */ }); const reuse = await sendPrompt(session, [{ type: 'text', text: 'ok만 답하세요.' }]); setActivePromptHandler(session, undefined); if (reuse.stopReason !== 'end_turn') { throw new Error(`reuse failed: stopReason=${reuse.stopReason}`); } console.error('[smoke-cancel] session reuse: ok'); await closeBridgeSession(sessionKey, { closeRemote: true, invalidatePersisted: true }); console.error('[smoke-cancel] explicit close: ok'); EOF ) || rc=$? # drain any late exit sleep 1 if ! grep -q '^\[pi-shell-acp:cancel\]' "$log_file"; then echo "[smoke-cancel/$backend] missing [pi-shell-acp:cancel] log line" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi if grep -q '^\[pi-shell-acp:cancel\] .*outcome=failed' "$log_file"; then echo "[smoke-cancel/$backend] cancel outcome=failed" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi if ! grep -qE '^\[pi-shell-acp:cancel\] .*outcome=(dispatched|unsupported)' "$log_file"; then echo "[smoke-cancel/$backend] cancel outcome not in {dispatched, unsupported}" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi if ! grep -q '^\[pi-shell-acp:shutdown\]' "$log_file"; then echo "[smoke-cancel/$backend] missing [pi-shell-acp:shutdown] log line" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi if [[ "$rc" != "0" ]]; then echo "[smoke-cancel/$backend] node subprocess failed rc=$rc" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi local after after=$(pgrep -cf "$backend_pattern" 2>/dev/null) || after=0 local delta=$((after - before)) if [[ $delta -gt 0 ]]; then echo "[smoke-cancel/$backend] backend process leak delta=$delta (before=$before after=$after)" >&2 pgrep -af "$backend_pattern" >&2 || true cat "$log_file" >&2 rm -f "$log_file" exit 1 fi if [[ $delta -lt 0 ]]; then echo "[smoke-cancel/$backend] backend process count dropped during smoke (delta=$delta, before=$before after=$after) — treating as external cleanup, not a leak" >&2 fi echo "[smoke-cancel/$backend] ok (cancel logged, session reused, no positive process leak)" rm -f "$log_file" } smoke_cancel() { local project_dir project_dir=$(normalize_project_dir "$1") require_cmd pi echo "[smoke-cancel] claude-only cancel cleanup gate (sonnet)" echo "[smoke-cancel] project: $project_dir" echo "[smoke-cancel] repo: $REPO_DIR" smoke_cancel_single "$project_dir" claude claude-sonnet-4-6 echo "[smoke-cancel] Claude cancel cleanup: ok (codex dropped — claude-only floor)" } smoke_model_switch_single() { local project_dir=$1 local backend_a=$2 local model_a=$3 local backend_b=${4:-$backend_a} local model_b=$5 local label="$backend_a" if [[ "$backend_a" != "$backend_b" ]]; then label="$backend_a->$backend_b" fi echo "[smoke-model-switch/$label] models: $model_a -> $model_b" local log_file log_file=$(mktemp /tmp/pi-shell-acp-model-switch-XXXXXX.log) local rc=0 ( cd "$REPO_DIR" PI_SHELL_ACP_SMOKE_BACKEND_A="$backend_a" \ PI_SHELL_ACP_MODEL_A="$model_a" \ PI_SHELL_ACP_SMOKE_BACKEND_B="$backend_b" \ PI_SHELL_ACP_MODEL_B="$model_b" \ node --input-type=module 2>"$log_file" <<'EOF' import { ensureBridgeSession, sendPrompt, setActivePromptHandler, closeBridgeSession, normalizeMcpServers, ModelSwitchLockedError, } from './acp-bridge.ts'; const backendA = process.env.PI_SHELL_ACP_SMOKE_BACKEND_A; const backendB = process.env.PI_SHELL_ACP_SMOKE_BACKEND_B; const modelA = process.env.PI_SHELL_ACP_MODEL_A; const modelB = process.env.PI_SHELL_ACP_MODEL_B; if (!backendA || !backendB || !modelA || !modelB) { throw new Error('backendA/backendB/modelA/modelB required'); } const emptyMcpHash = normalizeMcpServers(undefined).hash; const makeParams = (sessionKey, backend, modelId) => ({ sessionKey, cwd: process.cwd(), backend, modelId, systemPromptAppend: '간단히 답하세요.', settingSources: ['user'], strictMcpConfig: false, mcpServers: [], tools: ['Read', 'Bash', 'Edit', 'Write'], skillPlugins: [], permissionAllow: ['Read(*)', 'Bash(*)', 'Edit(*)', 'Write(*)', 'mcp__*'], disallowedTools: [], codexDisabledFeatures: [], bridgeConfigSignature: JSON.stringify({ backend, appendSystemPrompt: false, settingSources: ['user'], strictMcpConfig: false, mcpServersHash: emptyMcpHash, }), contextMessageSignatures: [`smoke-model-switch:${backend}`], }); async function runOneTurn(session, label) { setActivePromptHandler(session, () => {}); const result = await sendPrompt(session, [{ type: 'text', text: 'ok만 답하세요.' }]); setActivePromptHandler(session, undefined); if (result.stopReason !== 'end_turn') { throw new Error(`${label} turn stopReason=${result.stopReason}`); } console.error(`[smoke-model-switch] ${label} turn ok`); } // 0.5.x — pi-shell-acp session locks its model for lifetime (issue #14). // Scenario: // 1. ensureBridgeSession(backendA/modelA) -> session A bootstrap (path=new) // 2. one turn on session A succeeds // 3. ensureBridgeSession(backendB/modelB) on the SAME sessionKey -> bridge // MUST throw ModelSwitchLockedError, regardless of whether backendA == // backendB. Pre-0.5.x behavior fell through isSessionCompatible on // cross-backend mismatch and silent-respawned the new backend — the // cross-backend case covers that hole. // 4. session A is still alive and serves one more turn under modelA const sessionKey = `smoke-ms-locked:${backendA}-${backendB}`; const sessionA = await ensureBridgeSession(makeParams(sessionKey, backendA, modelA)); await runOneTurn(sessionA, 'locked/turn1'); let lockedError; try { await ensureBridgeSession(makeParams(sessionKey, backendB, modelB)); } catch (err) { lockedError = err; } if (!(lockedError instanceof ModelSwitchLockedError)) { throw new Error( `locked scenario: expected ModelSwitchLockedError, got ${lockedError ? lockedError.constructor?.name : 'no throw'}`, ); } if (lockedError.fromModel !== modelA || lockedError.toModel !== modelB) { throw new Error( `locked scenario: error payload mismatch from=${lockedError.fromModel} to=${lockedError.toModel}`, ); } if (lockedError.fromBackend !== backendA || lockedError.toBackend !== backendB) { throw new Error( `locked scenario: backend payload mismatch from=${lockedError.fromBackend} to=${lockedError.toBackend}`, ); } console.error('[smoke-model-switch] locked/throw observed as expected'); // Lock means the existing session is still serving — confirm with a second // turn under the original (backendA, modelA). await runOneTurn(sessionA, 'locked/turn2-post-refusal-same-model'); await closeBridgeSession(sessionKey, { closeRemote: true, invalidatePersisted: true }); console.error('[smoke-model-switch] locked scenario done'); EOF ) || rc=$? if [[ "$rc" != "0" ]]; then echo "[smoke-model-switch/$label] node subprocess failed rc=$rc" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi # 0.5.x locked-policy expected log line — the lock fires BEFORE the compat # check, so backend=$backend_a (the live session's backend) is what shows # up on the model-switch log line even for a cross-backend attempt. local pattern="^\\[pi-shell-acp:model-switch\\] path=reuse outcome=locked .*backend=$backend_a .*fromModel=$model_a toModel=$model_b reason=pi_shell_acp_session_locked_to_starting_model" if ! grep -qE "$pattern" "$log_file"; then echo "[smoke-model-switch/$label] missing log matching: $pattern" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi # Lock policy: exactly one bootstrap (path=new) for backend_a — the # refused second ensureBridgeSession must NOT trigger a second bootstrap. # A regression to the old "respawn" semantics would show >=2. local new_bootstraps_a new_bootstraps_a=$(grep -cE "^\\[pi-shell-acp:bootstrap\\] path=new backend=$backend_a" "$log_file" || true) if [[ "$new_bootstraps_a" -ne 1 ]]; then echo "[smoke-model-switch/$label] expected exactly 1 bootstrap path=new backend=$backend_a, got $new_bootstraps_a" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi # No legacy "respawn" outcome anywhere — guards against partial revert. if grep -qE "^\\[pi-shell-acp:model-switch\\] .*outcome=respawn" "$log_file"; then echo "[smoke-model-switch/$label] legacy outcome=respawn line found — locked policy violated" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi # Cross-backend regression guard: a backend-changing switch must NOT # bootstrap a fresh session for $backend_b. Pre-0.5.x behavior fell # through isSessionCompatible and silently bootstrapped the other backend # — this is the exact wire-level evidence captured during the issue #14 # investigation (Claude sonnet → Codex gpt-5.4). if [[ "$backend_a" != "$backend_b" ]]; then if grep -qE "^\\[pi-shell-acp:bootstrap\\] path=new backend=$backend_b" "$log_file"; then echo "[smoke-model-switch/$label] cross-backend regression: backend=$backend_b session was bootstrapped despite lock" >&2 cat "$log_file" >&2 rm -f "$log_file" exit 1 fi fi echo "[smoke-model-switch/$label] ok (reuse mismatch locked, ModelSwitchLockedError thrown, existing session continues)" rm -f "$log_file" } smoke_model_switch() { local project_dir project_dir=$(normalize_project_dir "$1") require_cmd pi echo "[smoke-model-switch] claude-only model switch observability gate (negative: swap refused)" echo "[smoke-model-switch] project: $project_dir" echo "[smoke-model-switch] repo: $REPO_DIR" # Within-backend model switch (same backend, different model) — the NEGATIVE # check that swapping the model on an existing session is refused # (ModelSwitchLockedError), not silently respawned. Cheap: the swap is rejected # before a model turn, so no real opus inference runs. Claude-only floor: the # codex within-backend row and the claude→codex cross-backend row are dropped. smoke_model_switch_single "$project_dir" claude claude-sonnet-4-6 claude claude-opus-4-8 echo "[smoke-model-switch] Claude model-switch lock observability: ok (codex/cross-backend dropped — claude-only floor)" } smoke_entwurf_resume_single() { local project_dir=$1 local backend=$2 local model=$3 local expected_path=$4 local label=$5 local session_file session_file=$(mktemp /tmp/pi-shell-acp-entwurf-resume-XXXXXX.jsonl) echo "[smoke-entwurf-resume/$backend] ${label}" echo "[smoke-entwurf-resume/$backend] model=$model expected-turn2=$expected_path session=$session_file" local turn1_log turn1_rc=0 turn1_log=$(cd "$project_dir" && PI_SHELL_ACP_STRICT_BOOTSTRAP=1 pi \ --mode json -p --no-extensions \ -e "$REPO_DIR" \ --provider pi-shell-acp \ --model "$model" \ --session "$session_file" \ 'READY 만 답해' 2>&1) || turn1_rc=$? if [[ "$turn1_rc" != "0" ]]; then echo "[smoke-entwurf-resume/$backend] turn1 pi invocation failed rc=$turn1_rc:" >&2 echo "$turn1_log" >&2 rm -f "$session_file" exit 1 fi if ! grep -q "^\[pi-shell-acp:bootstrap\] path=new backend=$backend" <<< "$turn1_log"; then echo "[smoke-entwurf-resume/$backend] turn1 expected path=new, got:" >&2 echo "$turn1_log" >&2 rm -f "$session_file" exit 1 fi local turn1_acp # `set -euo pipefail`: when the inner `acpSessionId=` grep finds nothing it exits 1, # pipefail propagates, and the assignment aborts the script BEFORE the `[[ -z ]]` guard # below can emit the intended "acpSessionId not extractable" diagnostic. Swallow the # pipeline's nonzero (oracle's doctor :153/:89 idiom) so emptiness flows to that guard. turn1_acp=$(grep -oE "^\[pi-shell-acp:bootstrap\] path=new backend=$backend [^$]*" <<< "$turn1_log" \ | head -1 | grep -oE 'acpSessionId=[^ ]+' | head -1 | cut -d= -f2 || true) if [[ -z "$turn1_acp" ]]; then echo "[smoke-entwurf-resume/$backend] turn1 acpSessionId not extractable:" >&2 echo "$turn1_log" >&2 rm -f "$session_file" exit 1 fi if ! grep -qE '"role":"assistant"' "$session_file"; then echo "[smoke-entwurf-resume/$backend] turn1 session file has no assistant message" >&2 cat "$session_file" >&2 rm -f "$session_file" exit 1 fi echo "[smoke-entwurf-resume/$backend] turn1 path=new acpSessionId=$turn1_acp: ok" local turn2_log turn2_rc=0 turn2_log=$(cd "$project_dir" && PI_SHELL_ACP_STRICT_BOOTSTRAP=1 pi \ --mode json -p --no-extensions \ -e "$REPO_DIR" \ --provider pi-shell-acp \ --model "$model" \ --session "$session_file" \ 'OK 만 답해' 2>&1) || turn2_rc=$? if [[ "$turn2_rc" != "0" ]]; then echo "[smoke-entwurf-resume/$backend] turn2 pi invocation failed rc=$turn2_rc:" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi if ! grep -q "^\[pi-shell-acp:bootstrap\] path=$expected_path backend=$backend" <<< "$turn2_log"; then echo "[smoke-entwurf-resume/$backend] turn2 expected path=$expected_path, got:" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi local turn2_acp # Same pipefail-abort class as turn1_acp above: swallow the pipeline's nonzero so an # unextractable acpSessionId flows to the `[[ ... != ... ]]` mismatch guard below # instead of aborting the smoke mid-assignment. turn2_acp=$(grep -oE "^\[pi-shell-acp:bootstrap\] path=$expected_path backend=$backend [^$]*" <<< "$turn2_log" \ | head -1 | grep -oE 'acpSessionId=[^ ]+' | head -1 | cut -d= -f2 || true) if [[ "$turn2_acp" != "$turn1_acp" ]]; then echo "[smoke-entwurf-resume/$backend] acpSessionId mismatch turn1=$turn1_acp turn2=$turn2_acp" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi if grep -q "^\[pi-shell-acp:bootstrap-invalidate\]" <<< "$turn2_log"; then echo "[smoke-entwurf-resume/$backend] turn2 unexpected bootstrap-invalidate:" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi if grep -q "^\[pi-shell-acp:bootstrap-fallback\]" <<< "$turn2_log"; then echo "[smoke-entwurf-resume/$backend] turn2 unexpected bootstrap-fallback:" >&2 echo "$turn2_log" >&2 rm -f "$session_file" exit 1 fi # assistant message from turn2 must land in the same session file local assistant_count assistant_count=$(grep -cE '"role":"assistant"' "$session_file" || true) if [[ "${assistant_count:-0}" -lt 2 ]]; then echo "[smoke-entwurf-resume/$backend] expected >=2 assistant messages in session file, got ${assistant_count:-0}" >&2 cat "$session_file" >&2 rm -f "$session_file" exit 1 fi # last assistant payload must be non-empty (guards against role-only or blank content records) local last_assistant_len last_assistant_len=$(SESSION_FILE="$session_file" node --input-type=module -e ' import { readFileSync } from "node:fs"; const lines = readFileSync(process.env.SESSION_FILE, "utf8").split("\n").filter(Boolean); let last = null; for (const raw of lines) { try { const rec = JSON.parse(raw); const msg = rec && rec.message ? rec.message : rec; if (msg && msg.role === "assistant") last = msg; } catch {} } if (!last) { console.log(0); process.exit(0); } const content = last.content; let len = 0; if (typeof content === "string") len = content.trim().length; else if (Array.isArray(content)) { for (const part of content) { if (typeof part === "string") len += part.trim().length; else if (part && typeof part.text === "string") len += part.text.trim().length; } } else if (content && typeof content.text === "string") len = content.text.trim().length; console.log(len); ' 2>/dev/null || echo 0) if [[ "${last_assistant_len:-0}" -lt 1 ]]; then echo "[smoke-entwurf-resume/$backend] last assistant payload is empty (len=${last_assistant_len:-0})" >&2 cat "$session_file" >&2 rm -f "$session_file" exit 1 fi echo "[smoke-entwurf-resume/$backend] turn2 path=$expected_path acpSessionId=$turn2_acp (same as turn1, last-assistant-len=$last_assistant_len): ok" rm -f "$session_file" } smoke_entwurf_resume() { local project_dir project_dir=$(normalize_project_dir "$1") require_cmd pi echo "[smoke-entwurf-resume] bridge-level dual-backend continuity gate" echo "[smoke-entwurf-resume] project: $project_dir" echo "[smoke-entwurf-resume] repo: $REPO_DIR" echo "[smoke-entwurf-resume] scope: bridge carries same-session turn1->turn2 via resume(Claude) / load(Codex)" echo " — spawn authority / target selection / parent×target matrix live in" echo " this repo's entwurf surface (pi/entwurf-targets.json + mcp/pi-tools-bridge)." echo " This smoke validates BRIDGE carry only; orchestration is validated separately." smoke_entwurf_resume_single "$project_dir" claude claude-sonnet-4-6 resume "bridge continuity (Claude → resumeSession)" echo "[smoke-entwurf-resume] Claude(resume) bridge continuity: ok (codex dropped — claude-only floor)" } check_model_lock() { # Deterministic policy unit test for pi-extensions/model-lock.ts. # No pi process, no network, no API cost. Mocks ExtensionAPI/Context and # drives the model_select handler through every quadrant + edge case # (see scripts/check-model-lock.ts header for the full matrix). (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-model-lock.ts) } check_shell_quote() { # POSIX-safety gate for the shellQuote helper used in remote SSH command # builders (entwurf.ts + entwurf-core.ts). Verifies source-string parity # across the two duplication sites AND behavioral correctness on the # payload classes that caused the 2026-05-18 remote entwurf incident # (backtick / $(...) / $VAR / korean tokens). No process spawn, no SSH. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-shell-quote.ts) } check_entwurf_session_identity() { # Deterministic gate for the locked garden session identity & name grammar # (NEXT.md "Locked — session identity & name grammar"): sessionId validator, # buildSessionName/parseSessionName round-trip incl. `.`-bearing registry # models, titleSlug canonicalization, registry exact-tuple membership, name= # info-only invariants, and header-scan collision pre-check. Isolates registry # + sessions base to a temp dir. No backend, no API, no spawn. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-session-identity.ts) } check_meta_session() { # Deterministic gate for the 1.0.0 meta-bridge record authority (#30 step 2): # mint/serialize/parse round-trip + crash-on-malformed, scanByNativeId lookup # authority BY RECORD BODY (not filename, proven with a decoy filename in a # real temp dir), idempotent existence-keyed decideUpsert + identity-drift # refusal, and the pre-drilled read-receipt mutators. Pure functions; no # backend, no hook, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-session.ts) } check_meta_record_v2() { # Deterministic golden gate for 0.11 Stage 0 step 3A: the v1→v2 identity # normalize seam. A synthetic, sanitized v1 fixture normalizes to a # hand-written v2 identity literal (golden), plus dual-read version fences # and v2 field-contract crashes. Reader/normalizer only — no v2 writer yet. # Kept separate from check-meta-session so 3D's v1-gate rewrites leave this # back-compat golden untouched. Pure functions; no backend, no hook, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-record-v2.ts) } check_mailbox_receipt_state() { # Deterministic gate for 0.11 Stage 0 step 3B: the mailbox receipt state # schema + store — the new home for the read-receipt before v2 drops # record.delivery (NEXT.md 고정순서 4). Pure schema round-trip + strict # keyset, then the fs store (stamp → persist → read-back) in a temp mailbox # dir. Schema/store only — no live enqueue/read dual-write (that is 3D). No # backend, no hook, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-mailbox-receipt-state.ts) } check_entwurf_capabilities() { # Deterministic gate for 0.11 Stage 0 step 3C: the backend capability source # (pi/entwurf-capabilities.json) — the new home for wakeMode/deliveryLevel/ # nativeIdLabel before v2 drops them from the record (frozen decision 1). # Asserts coverage == META_BACKENDS_V2 (pi included), agreement with the live # META_BACKEND_DESCRIPTORS for the three existing backends (drift guard), and # strict keyset/coverage/field crashes. Parser/gate only — no live routing, # no record/descriptor consumer change (that is 3D). No backend, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-capabilities.ts) } check_meta_dual_read() { # Deterministic gate for 0.11 Stage 0 step 3D-1: the v2 write shape # (serializeMetaIdentity) + the dual-read dispatcher (parseMetaRecordAny / # parseMetaIdentity). Canonical serialize + round-trip + version dispatch + # unknown-version crash. Pure functions only — no fs upsert wiring, no # readMetaInbox/enqueueMetaMessage change, no record.delivery removal (3D-2/3/4). # No backend, no hook, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-dual-read.ts) } check_meta_mailbox_state_write() { # Deterministic gate for 0.11 Stage 0 step 3D-4 commit2 (the cut). Renamed from # check-meta-mailbox-dualwrite: after the cut the receipt is no longer dual-written # (record.delivery is gone from the v2 record) — it lives SOLELY in the mailbox # state store. Asserts the meta-record FILE is byte-identical before/after enqueue # AND read (enqueue/read no longer touch the record — invariant ⑤), the state # carries lastEnqueuedAt/lastReadAt with field isolation, lastDeliveredAt is never # invented, an empty inbox is a no-op on BOTH record and state (⑥), and a state # drift surfaces fail-loud. v2 citizen seeded via upsertMetaSession. No API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-mailbox-state-write.ts) } check_meta_receiver_marker() { # Deterministic gate for the meta-receiver presence marker (SE-2 slice 2b). The # active-receiver signal a self-fetch backend (Claude Code) needs: a meta-record # proves a session once existed; this marker proves a live watch owner is still # there to be woken, so a terminated session's lingering record does not read as a # ghost active receiver (mailbox garbage). Asserts: write→read round-trip keyed by # GARDEN id, atomic 0600; dead-owner/pid-reuse start-key guard reads null (distinct # from "no marker"); armProvenance constrained to the arm-capable events so # UserPromptSubmit cannot mint presence; reader does NOT gate on record existence # (recordBacked is the deliverability predicate's fact). Real tmpdir, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-receiver-marker.ts) } check_meta_migration() { # Deterministic gate for 0.11 Stage 0 step 3D-4 commit2: the v1→v2 delivery-receipt # migration (migrateV1DeliveryReceipts) + its crash-order inside upsert. Per-field # STATE WINS, 3 timestamps only; v1-all-null / state-already-wins are no-ops (no # state.json). Crash-order: a v1 record's receipts migrate to state BEFORE the v2 # rewrite (proven via upsert attach), and a drift'd state makes migrate throw with # the record STILL v1 (recoverable: next attach re-migrates). Temp dir, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-migration.ts) } check_meta_dual_consumers() { # Deterministic gate for 0.11 Stage 0 step 3D-4: the delivery-agnostic dual-read # seam. readMetaIdentityByGardenId + scanIdentityByNativeId read v1 AND v2 records and # return normalized identity, so the live consumers (enqueue/read, MCP marker, prune, # store-doctor, the v2 upsert's existence scan) survive the v2 cut. Proves cross-schema # match + THE G1 invariant (a nativeSessionId duplicated across a v1 AND v2 file is # authority ambiguity → throw, so the v2 upsert never duplicate-mints) + v1 normalize + # body/filename drift fail-fast. The v1-only raw readers remain for v1-fixture gates. # Temp dir, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-dual-consumers.ts) } check_meta_capability_source() { # Deterministic gate for 0.11 Stage 0 step 3D-3: the capability-source cut-over. # mint/parse now read backend honesty metadata (wakeMode/deliveryLevel) from the # capability registry (3C) via metaCapabilityFor, NOT META_BACKEND_DESCRIPTORS. # Proves the seam is registry-DRIVEN (a doctored registry injection is followed), # mint sources delivery metadata through it, the parse drift guard is now # registry-sourced, and the cut-over preserves behaviour (registry ≡ const for the # 3 backends). The record.delivery.wakeMode SLOT stays (removal is 3D-4); only the # SOURCE moves. check-entwurf-capabilities still owns the registry ≡ const drift # guard. Pure — no fs writes, no backend, no hook, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-capability-source.ts) } check_socket_probe() { # Deterministic gate for 0.11 Stage 0 (F3 fix): three-valued control-socket # liveness. classifyConnectError is a pure boundary (ECONNREFUSED/ENOENT → # dead; timeout/EACCES/unknown → indeterminate). GC reclaims dead only; # indeterminate (a load-stalled live socket) survives the sweep — the F3 # invariant. Listing lists alive only. Pure classify + GC/listing policy + # a two-socket integration (live listener → alive survives; nonexistent → # dead GC-eligible). No wire timeout fixture, no backend, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-socket-probe.ts) } check_project_trust_handler() { # Deterministic gate for 0.11 Stage 0 (Trust 2층 active-prompt escape): the # project_trust handler. Pure decideProjectTrust over real preflight outcomes — # approve/trusted-no-arg→{yes,remember:false}; direct distrust→{no}; inherited # distrust + interactive + "trust-here"→{yes,remember:true} (THE escape, beats # the ancestor false); non-interactive (pi -p / rpc)→{undecided}, never prompts; # never undefined. Plus the thin adapter: fake ctx.ui.select, single-writer # (handler never calls store.set — pi persists on remember:true), F5a evidence # in the prompt title. No real UI, no backend, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-project-trust-handler.ts) } check_entwurf_v2_contract() { # Deterministic gate for 0.11 Stage 0 step 4-pre: the FROZEN entwurf_v2 # contract (동결결정 10 + 버킷 B F1/F4/F6 + Fable R1-R5). The intent×liveness # decision table is a constant; the "table cell ↔ dispatch receipt" round-trip # is asserted exhaustively — THE executable proof F6 demands ("산문 금지"). # R1 backend liveness domain (pi only; claude-code/codex/antigravity = # unsupported, never folded into dead/indeterminate). 6-cell table, single # verdict per cell (Q2), 2 allow / 4 reject. N1 indeterminate never spawns; # Q2 owned-outcome+live never auto-sends. R5 taxonomy covers table reasons + # pre-claims bad-target/untrusted-fail-fast/target-locked (bucket B F2). Plus # a schema↔types drift guard on the TypeBox input/receipt. Pure, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-contract.ts) } check_entwurf_v2_lock() { # Deterministic gate for 0.11 Stage 0 step 5a (버킷 B F2): the per-gid dispatch # LOCK primitive — the only guard against a double-spawn of the same dormant # target (pi self-guards CREATE but not RESUME, 검증원장 F2). acquire = # openSync(lockPath,"wx") atomic; a second acquire without release = # target-locked conflict carrying the holder JSON (F2-P2 human cleanup). release # = unlink ONLY when the on-disk nonce is still ours (a successor's re-acquire # survives a late release). Stale reclaim ONLY for same host + ESRCH; EPERM # (other user's live pid) / different host / alive pid / unknown error all # fail-closed to conflict. Empty/corrupt lockfile = conflict, never # auto-deleted. F2-P1: a malformed gid throws before any path is built. Real # temp dir (wx atomicity under test); clock/nonce/pid/host/kill injected. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-lock.ts) } check_entwurf_v2_decider() { # Deterministic gate for 0.11 Stage 0 step 5b: the PURE dispatch decider # decideDispatch. Drives the frozen 7-step order over INJECTED fakes (target # lookup / lock / socket inspect+probe / preflight / capability), tracking lock # acquire+release so "reject ⇒ no plan AND no lock retained" is PROVEN. Covers: # bad-target/target-locked/target-address-conflict carry observedLiveness=null # (pre-probe), every other reject + untrusted-fail-fast carry a measured value; # control-socket send + spawn-bg resume execute KEEP the lock, meta-mailbox send # takes NO lock (？7); resume plan has no mode/wantsReply/provider/model but has # expectedSocketPath/observeTimeoutMs/releaseWhen; an invalid gid throws before # any lookup (F2-P1). Pure, no IO, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-decider.ts) } check_entwurf_v2_matrix() { # Deterministic gate for 0.11 Stage 0 step 5d-5 (a): the REACHABILITY + LOCK SSOT # TABLE. Drives the REAL decideDispatch over minimal injected fakes and fixes, as # one readable table, every (target kind → transport → lock class) cell the 5d-5 # claim covers: bad-target/address-conflict/target-locked rejects, unsupported # meta-mailbox (deliverable) vs mailbox-undeliverable (inactive) vs owned reject, # in-domain control-socket (live) / spawn-bg (dormant owned) / released rejects # (owned-live, ff-dormant, indeterminate, under-lock conflict). A coverage pass # FAILS if any transport / lock class / pre-probe reject is missing — a dropped # decider cell cannot pass silently. Thin coverage, NOT a decider re-impl; surface # parity stays in check-entwurf-v2-surface. Pure, no IO, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-matrix.ts) } check_entwurf_v2_release() { # Deterministic gate for 0.11 Stage 0 step 5c-1: the PURE release-policy reducer # (decideReleasePolicy + reduceRelease) for the 5c transport hand. Proves the # Fable-3 "release-after-observation" timing as a pure state machine BEFORE any # spawn/send IO: meta-mailbox=never release (no lock), control-socket=release once # on send-final, spawn-bg=spawn-started is NOT a release event (load-bearing) → # release on the FIRST observed transition (socket-alive ∨ child-exited any code) # or a failed start; socket↔exit race idempotent (single release either order); # decideReleasePolicy enforces the lock-nullness invariant (？7). Pure, no IO. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-release.ts) } check_entwurf_v2_send() { # Deterministic gate for 0.11 Stage 0 step 5c-2a: the control-socket SEND hand # (executeControlSocketSend) that WIRES real transport IO onto the 5c-1 release # reducer. Proves the send->outcome->release ordering over injected fakes (no socket): # ack->sent / in-band reject->rejected (no fallback) / dead->same-lock one-shot # re-resolve (control retry or mailbox enqueue)->fallback-sent|rejected|failed / # indeterminate->failed+rethrow with deadFallback+mailbox NEVER called (no # double-delivery on an alive-but-stalled socket). Release fires exactly once per # send-final; a releaseLock throw never masks the send failure (5b). IO-via-dep. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-send.ts) } check_entwurf_v2_send_fallback() { # Deterministic gate for 0.11 Stage 0 step 5c-2b: the same-lock re-resolve RESOLVER # (resolveDeadControlSendFallback) the 5c-2a hand calls on a dead connect. Proves the # fire-and-forget re-resolve routing over injected fakes (no filesystem): alive-> # control-socket retry (inspected socketPath) / dead(absent)->reject (dormant-fire- # forget-unsupported, NEVER spawn-bg) / indeterminate->reject / unsupported+deliverable # ->meta-mailbox plan (mini-table, no inspect/probe) / unsupported+undeliverable->reject # / bad-target + address-conflict->reject pre-probe. Mis-wire (plan/lock gid) fails loud # before IO; inspect/probe throws PROPAGATE (the hand owns failed+release); the resolver # has NO release seam; every execute plan keeps the held gid and is never spawn-bg. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-send-fallback.ts) } check_entwurf_v2_mailbox() { # Deterministic gate for 0.11 Stage 0 step 5c-4 (the LAST 5c transport slice): the # ENQUEUE-ONLY meta-mailbox SEND body (executeMetaMailboxSend) + its production # sendViaMailbox adapter (makeProductionSendViaMailbox). Proves the wiring over an # injected fake enqueue (no filesystem): sender present -> formatMetaMailboxBody with # plan.wantsReply threaded (yes/no in body, the deliberate divergence from legacy's # hard-coded false) / sender absent -> raw plan.message / enqueue opts EXACTLY # {gardenId: plan.targetGardenId, body, sessionsDir, mailboxDir} (no re-derivation) / # enqueue throw PROPAGATES (never folded into success:false — a mailbox has no in-band # refuse) / success -> {success:true}. Production adapter resolves {success:true}, # consults senderProvider once, and NEVER touches the lock (a poison LockClaim whose # every access throws still resolves). Source guard: the lib code has NO release seam # and NO routing seam (no releaseLock / inspect / probe / resolve) — a lock leak or # re-route is structurally impossible. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-mailbox.ts) } check_entwurf_v2_runner() { # Deterministic gate for 0.11 Stage 0 step 5d-1: the execute-router (executeDispatch) that # routes an already-decided DispatchDecision to its 5c transport hand and maps the outcome # to one outcome-rich EntwurfV2RunResult. Proves over injected fake hands (no socket/spawn/ # timer): reject -> rejected (receipt+diagnostic carried, NO hand called) / control-socket -> # sendControl(plan, lock) / spawn-bg -> resumeSpawnBg (socket-alive AND lock-retained both # ride `executed`, fail-closed is not a failure) / meta-mailbox -> sendMailbox(plan, NULL # lock, ？7). Carry-overs: N3 control `rejected` carries rejectReason verbatim; N1 # SendDeliveredReleaseFailedError -> execution-failed{finalizedOutcome, releaseFailed, # retrySafe:false}; a plain hand throw -> execution-failed{retrySafe:false} with no # finalizedOutcome. Exactly one hand runs per execute. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-runner.ts) } check_entwurf_v2_surface() { # Deterministic gate for 0.11 Stage 0 step 5d-3a: the ctx-free surface adapter # (entwurf-v2-surface.ts) + the entwurf-control.ts wiring contract. Proves the pure parts: # toDispatchInput (wants_reply→wantsReply, absent mode/wants_reply undefined) / renderEntwurfV2Result # per result kind ({text,isError} surfacing reject diagnostic, control N3 rejectReason, spawn # lock-retained, N1 delivered-but-dirty) / surface ctx-free source guard / entwurf-control # registers entwurf_v2 + reaches the fence via a NON-LITERAL dynamic import (no static fence # import → TS5097 stays closed) + decorates sender origin:pi-session/replyable:true. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-surface.ts) } check_pi_tools_bridge_boot() { # Deterministic gate for 0.11 step 5d-5-pre (G1a/G1b): boots the pi-tools-bridge MCP server # as it ships (start.sh → node --experimental-strip-types, no build) and asserts what the # source-shape gate check-entwurf-v2-surface cannot — that the whole v2 fence graph LOADS at # boot under strip-types (G1a: a parseable tools/list proves it) and that entwurf_v2 is # registered on the runtime surface with its schema (G1b). tools/list only → no tools/call, # no lock/fs side effect, no auth → safe in pnpm check. Broad protocol/negative suite stays # in check-bridge/test.sh (D1=A안). (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-pi-tools-bridge-boot.ts) } check_entwurf_v2_production() { # Deterministic gate for 0.11 Stage 0 step 5d-2b: makeProductionEntwurfV2Deps — the ctx-free # PRODUCTION assembly of runEntwurfV2's deps. Proves the wiring over fake leaf-IO spies (no # real socket/lock/spawn/meta-record): decide wraps decideDispatch and acquires under the # wired lockDir / control sendOverSocket builds the RpcSendCommand + maps + releases under # lockDir / QB3 the spawn watcher releases via the SHARED lockDir release (not the spawn # factory default) / the mailbox hand enqueues onto the wired dirs / a dead control send # re-resolves to the SAME sendViaMailbox instance on the SAME dirs (Q3+Q5 no drift). (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-production.ts) } check_entwurf_control_rpc() { # Gate for 0.11 Stage 0 step 5d-2 (RPC-helper extraction micro-slice): the --entwurf-control # socket protocol (wire types + the newline-JSON client sendRpcCommand) moved to the ctx-free # SSOT lib/entwurf-control-rpc.ts behaviour-preservingly. Proves: lib is ctx-free (no # ExtensionContext/ExtensionAPI/@earendil-works/pi-ai) / entwurf-control.ts imports # sendRpcCommand from the lib and no longer defines its own / real short unix-socket round-trip # (write command -> matched {type:response,command,success:true} -> resolve) / close-before- # response rejects 'connection closed before response'. net.Server only, no model/pi process. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-control-rpc.ts) } check_entwurf_v2_spawn() { # Deterministic gate for 0.11 Stage 0 step 5c-3a: the spawn-bg RESUME watcher hand # (executeSpawnBgResume) wiring spawn + socket-observe IO onto the 5c-1 reducer. Proves # Fable-3 over injected deferred promises (no real child/socket/timer): TIMEOUT IS NOT A # RELEASE — a bare observeTimeout escalates to killChild (release 0), then a BOUNDED # killGrace waits for a real socket-alive / child-exited to release. socket-alive ∨ # child-exited(any code incl. null) -> release exactly once; the loser settling later is a # no-op. spawnChild throw -> spawn-start-failed (release, nothing to watch). No observation # obtainable (grace elapses, or a post-spawn watch dep throws and the exit can't be # observed) -> lock-retained fail-closed (released:false, pid/socket/lockPath surfaced) — # there is NO direct-release hatch; deps.releaseLock is reached ONLY via reduceRelease. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-spawn.ts) } check_entwurf_resume_args() { # Deterministic gate for 0.11 Stage 0 step 5c-3b: the resume-argv SSOT # (buildResumePiArgs) that the legacy async worker AND the v2 spawn-bg resident citizen # share so their launch shapes never drift. Pins the load-bearing A1 difference: legacy = # `--no-extensions` + NO `--entwurf-control` (one-shot `pi -p` can exit); v2-control = # `--entwurf-control` + NO `--no-extensions` (the keep-alive legacy avoided is the goal — # resumed session stands its socket up and stays addressable). BOTH keep `--mode json -p` # and the prompt-as-turn positional (`-p` NOT dropped in v2); explicitExtensionArgs # preserved exactly once (provider-resolution footgun #29); v2 includes plan.launchArgs # (--approve) before the prompt; null provider emits no --provider; no cross-contamination. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-resume-args.ts) } check_entwurf_v2_spawn_production() { # Deterministic gate for 0.11 Stage 0 step 5c-3c: the production SpawnBgResumeDeps factory # (makeProductionSpawnBgResumeDeps) wiring the 5c-3a watcher's six IO seams onto the real # world — proven WITHOUT a real pi spawn/socket/timer (that is the opt-in # smoke-entwurf-v2-spawn-live, kept OUT of pnpm check). socketWatchVerdict (R2 policy): # address-conflict→forged (reject, never wait), alive→alive, dead/indeterminate→wait. # spawnChild builds the v2-control argv (--entwurf-control, no --no-extensions, -p+prompt, # --approve, ext/provider/model, header cwd). awaitSocketAlive: connectable resolves, forged # (symlink) rejects without connecting, dead→wait→alive, abort clears the sleep. awaitChildExit # resolves the code + removes the listener on abort. awaitTimeout schedules + abort-clears. # killChild=SIGTERM; releaseLock delegates; a proc-less child fails loud (mis-wire). (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-spawn-production.ts) } check_entwurf_v2_only() { # Deterministic gate for 0.11.0 acceptance blocker (B): PI_SHELL_ACP_V2_ONLY=1 v2-only mode. # Two layers, no spawn/socket/timer. A) pure helper (entwurf-v2-only.ts): isV2OnlyMode is true # ONLY for exact "1" ("true"/"0"/""/missing → false); checkV1EntwurfAllowed off→allowed, # on→blocked with a message naming the flag + entwurf_v2 + "unavailable"; assert throws on, silent # off. B) source/static: all 10 v1 entrypoints (9 surface groups; /entwurf tool+command = two) # carry their guard at the handler head, the MCP entwurf_resume + control-RPC spawn_async_resume # are BOTH guarded (no single-point bypass), and the v2 core (runner+surface) stays flag-CLEAN — # the flag is a legacy-surface gate, never a v2-decision gate. v1 code is NOT deleted (0.12 lane). (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-v2-only.ts) } smoke_entwurf_v2_spawn_live() { # LIVE phase gate for 0.11 Stage 0 step 5c-3c (D5) — kept OUT of `pnpm check`. Exercises the # production SpawnBgResumeDeps against REAL OS objects (a real unix socket, real child # processes, real timers, real abort teardown) to catch what the deterministic gate's fakes # cannot: actual spawn/exit/error event semantics, real lstat+connect liveness, and the # 5c-3a watcher's timeout→kill→child-exited→release integration on a live process. It does # NOT spawn a real `pi --entwurf-control` resume (that is the 5d surface matrix). Run once # before 5d and record the result: LIVE=1 ./run.sh smoke-entwurf-v2-spawn-live if [ "${LIVE:-}" != "1" ]; then echo "[smoke-entwurf-v2-spawn-live] skipped — set LIVE=1 to run (spawns real children + opens a real unix socket)." return 0 fi (cd "$REPO_DIR" && node --experimental-strip-types scripts/smoke-entwurf-v2-spawn-live.ts) } smoke_entwurf_v2_matrix_live() { # LIVE sentinel for 0.11 Stage 0 step 5d-5 (D4-b) — kept OUT of `pnpm check`. The deterministic # sibling (check-entwurf-v2-matrix) fixes every (target kind → transport → lock) cell over fakes # with ZERO IO; this drives the REAL production runEntwurfV2 deps against REAL OS objects on the # substrate happy path across 3 cells: C1 control-socket (a real `pi --entwurf-control` resident # → control-socket RPC send → lock acquire→release ×1), C2 meta-mailbox deliverable (armed # self-fetch citizen → real .msg enqueue, lock-free), C3 meta-mailbox guard (no armed receiver → # reject, no garbage). Model-in-loop is OUT (GPT Q2): "does the sender model call entwurf_send" # is a separate behavior test — this is a transport/lock/enqueue gate. Negative/timeout/contention # stay deterministic. Honest skip when LIVE!=1 so the release-gate is runnable unattended. # Model: PI_SHELL_ACP_LIVE_TARGET=/ (default openai-codex/gpt-5.4). # LIVE=1 ./run.sh smoke-entwurf-v2-matrix-live if [ "${LIVE:-}" != "1" ]; then echo "[smoke-entwurf-v2-matrix-live] skipped — set LIVE=1 to run (spawns a real pi --entwurf-control + opens a real socket)." return 0 fi (cd "$REPO_DIR" && node --experimental-strip-types scripts/smoke-entwurf-v2-matrix-live.ts) } smoke_entwurf_v2_spawn_resume_live() { # The 0.11.0 (A) acceptance gate — kept OUT of `pnpm check`. Unlike matrix-live (a # transport/lock sentinel, model-in-loop OUT) and spawn-live (OS-substrate watcher, no real # pi), this drives the FULL production loop: mint a backend=pi meta identity → seed a REAL # dormant pi session (one-shot `pi --mode json -p --no-extensions` into the REAL # ~/.pi/agent/sessions) → runEntwurfV2(intent=owned-outcome) routes the dormant in-domain pi # citizen to spawn-bg resume → a REAL detached `pi --entwurf-control` child stands its socket # up, resumes, and DOES a model turn. Asserts: executed/spawn-bg/socket-alive/released, lock # released exactly once + no lock file, resident pid alive + socket connectable, and the # resume USER + assistant OK nonces appended to the session JSONL (real work, not just # "process up"). This is the evidence v1 deprecation (0.12) is predicated on. Model-in-loop is # IN. Honest skip when LIVE!=1 (skip = CI safety, NOT an acceptance PASS). # Model: PI_SHELL_ACP_LIVE_TARGET=/ (default openai-codex/gpt-5.4); # PI_SHELL_ACP_SPAWN_RESUME_ASSISTANT_TIMEOUT_MS (default 180000). # LIVE=1 ./run.sh smoke-entwurf-v2-spawn-resume-live if [ "${LIVE:-}" != "1" ]; then echo "[smoke-entwurf-v2-spawn-resume-live] skipped — set LIVE=1 to run (spawns a real pi resume child + opens a real socket)." return 0 fi (cd "$REPO_DIR" && node --experimental-strip-types scripts/smoke-entwurf-v2-spawn-resume-live.ts) } check_entwurf_facts() { # Deterministic gate for 0.11 Stage 0 step 4 (fact-provider slice 1): the PURE # fact core. Locks the PeerFact shape + R1/R3b liveness invariant before any IO # wiring (gate-first). R1: out-of-domain backend (claude-code/codex/antigravity) # → unsupported for EVERY socket input (never coerced to the socket value or # dead). R3b: in-domain pi → alive/dead/indeterminate, null → indeterminate # (no proof ≠ dead). facts-only keyset: identity facts + liveness, NO # verb-routing (resumable/sendable/transport/dispatch/action) and NO # transcriptPath (동결결정 10). Pure, no IO, no API. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-facts.ts) } check_socket_discovery() { # Deterministic gate for 0.11 Stage 0 step 4 (fact-provider slice 3): the # SOCKET-axis wiring scanSocketProbes. Probes the union of (dir sockets) ∪ # (every in-domain pi citizen's canonical path) so a dormant citizen with no # socket file reads dead (ENOENT) → resumable, never an unprobed gap (slice 2 # throws on that). Three-valued throughout — a stalled socket stays # indeterminate (F3), never folded to dead by an alive-only listing. Dir # hygiene (non-.sock / malformed names ignored), dedup, missing-dir, sort, and # an end-to-end scanSocketProbes→resolveFactList. readdir/probe injected, no IO. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-socket-discovery.ts) } check_meta_listing() { # Deterministic gate for 0.11 Stage 0 step 4 (fact-provider slice 4a): the # meta-store axis listAllMetaIdentities. Explicit-partial: a parse failure or # body/filename drift does NOT blind the listing (valid records still surface) # and does NOT throw (0.10 "corrupt blocks registration forever" lesson) — it # becomes an explicit error carrying ONLY {filename, message}, verbatim (a # salvaged gid string as a fact = synthetic backdoor). mode strict throws on # any error, collect returns partial. entries/readRecord injected, no IO. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-meta-listing.ts) } check_entwurf_fact_provider() { # Deterministic gate for 0.11 Stage 0 step 4 (fact-provider slice 4b): the # ASSEMBLY layer listEntwurfFacts. listAllMetaIdentities → scanSocketProbes → # pre-quarantine non-pi/socket conflicts → resolveFactList(clean) → # {facts, diagnostics}. Throw-vs-diagnostics policy (GPT힣 C-원칙): expected # corruption (parse failure / gardenId↔socket collision) → diagnostics, listing # survives; impossible wiring invariant (resolveFactList duplicate/unprobed) → # throw, never swallowed. A collision quarantines BOTH the PeerFact and the # socket (gid is the universal address). meta + socket deps injected, no IO. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-fact-provider.ts) } check_entwurf_peers_surface() { # Deterministic gate for 0.11 Stage 0 step 4 (fact-provider slice 4c): the MCP # entwurf_peers RENDER/PAYLOAD layer renderEntwurfPeers. Legacy `sessions` is a # PROJECTION of facts (alive pi citizens + alive socket-only), NOT a second scan # (a re-run getLiveSessions would bypass the provider quarantine); socketPath via # controlSocketPath (SSOT, no correlation-authority drift); count = projection # length not peers.length; three distinct arrays (peers/socketOnly/diagnostics); # NO verb-routing field in JSON (deep key scan) NOR word in text (title leak); # diagnostics in both surfaces; empty → "(none)"; unsupported shown; enrich null # → "(not enriched)". WIRING guard: bridge calls listEntwurfFacts+renderEntwurfPeers, # getLiveSessions gone. Facts fabricated, no IO (only static source read). (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-peers-surface.ts) } check_entwurf_send_mailbox_fallback() { # Deterministic gate for the pi-native entwurf_send → meta-bridge mailbox # fallback (transport 2). Guards the bug where pi-native send was # control-socket-ONLY, so a pi session could not reply to a Claude/Codex/agy # garden citizen (no socket) — pi → Claude was ENOENT with no fallback, # breaking the garden-id universal-address promise. Asserts: ENOENT/ECONNREFUSED # ("dead") falls back, timeout/indeterminate does NOT (no double-deliver to a # stalled-but-live socket); the SHARED formatMetaMailboxBody renders replyable # correctly; enqueue to a no-socket citizen writes a .msg + signal; and a WIRING # GUARD that pi-native actually calls the fallback and the bridge uses the shared # formatter (no local copy) so neither sender can silently drop the transport. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-send-mailbox-fallback.ts) } check_entwurf_self_address() { # Deterministic gate for the self-addressability honesty predicate (SE-1/SE-2 # slice 1). Guards the bug where the MCP bridge / pi-native claim replyable:true # from env presence alone: a socketless pi session, or a meta citizen whose owner # exited / whose idle-watch was never armed, all advertised replyable while # delivery silently failed (SE-1). Asserts: PURE truth table (pi replyable ⟺ # socketAlive; meta ⟺ recordBacked ∧ ownerAlive ∧ watchArmed; external never), # incl. the two regression-proof rows (record-present + owner-dead / watch-unarmed) # that stay meaningful after slice 3 mints records; SOURCE GUARD that # buildStrictPiSenderEnvelope drops the hardcoded `replyable: true` and existsSync- # probes the socket, and entwurf_self renders alive vs expected (no path lie). # Slice boundary: meta watchArmed is wired from the slice-2 presence marker; do NOT # claim slice 1 green standalone (1+2 close in the same release block). (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-self-address.ts) } check_entwurf_deliverability() { # Deterministic gate for the conversational-mailbox deliverability predicate # (SE-1/SE-2 slice 2c). The predicate the enqueue sites must consult (slice 2d) # before writing a .msg. Asserts: computeMetaReceiverActive (active iff recordBacked # AND ownerAlive AND watchArmed, fail-closed, per-cause reasons); mailboxConversational- # Deliverable (deliverable iff wakeMode self-fetch AND active) — KEY rows: direct-inject # (pi) refused even when active (SE-1, no mailbox drain), self-fetch + dead-owner/unarmed # refused (SE-2, would rot); WIRING that the self-addressability predicate shares the # SAME active-receiver atom (one source of truth). Pure, no IO. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-deliverability.ts) } check_entwurf_mailbox_guard() { # Deterministic gate for the guarded mailbox enqueue (SE-1/SE-2 slice 2d) — the IO # orchestration conversational-reply sites use instead of enqueueMetaMessage directly. # Asserts (GPT Q5, both axes): PURE 0-call — an undeliverable target (dead receiver / # direct-inject pi / absent record) leaves the injected enqueue UNCALLED, a deliverable # one calls it exactly once; TMPDIR SNAPSHOT with the real enqueueMetaMessage — a refused # send leaves the mailbox tree byte-identical (file list + content hash, not just mtime), # an accepted send writes exactly one .msg; plus fact gathering from record/capability/marker. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-entwurf-mailbox-guard.ts) } check_plugin_empty_final_recovery() { # Deterministic recovery-decision gate for the OpenClaw plugin's # `resolveRecoveredFinalMessage` helper (plugins/openclaw/src/index.ts). # Issue #20 — post-#17 regression where a clean exit with # message_end{role:"assistant", content:[]} bypassed both recovery # branches and let OpenClaw surface raw prompt fragments to the user. # The fix unifies the recovery decision; this gate exercises every # branch on synthetic AssistantMessage inputs (no pi process, no # network, no API cost). See scripts/check-plugin-empty-final-recovery.ts # header for the full case matrix. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-plugin-empty-final-recovery.ts) } check_plugin_prompt_format() { # Deterministic shape gate for the OpenClaw plugin's # `buildConversationPrompt` serializer + `stripChatCompletionTail` # output sanitizer (plugins/openclaw/src/index.ts). Issue #20 follow-up # incident — after the empty-final fix landed, oracle bbot verification # observed the model echoing a fabricated `User: ...` next-turn line # plus a Cline-style `` close tag into the visible # body, primed by the earlier `User:` / `Assistant:` transcript form. # The serializer now produces a JSON-array context with an explicit # non-continuation instruction, and the sanitizer strips the two # narrow tail shapes observed in the wild as defense-in-depth. # See scripts/check-plugin-prompt-format.ts header for the full matrix. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-plugin-prompt-format.ts) } smoke_async_resume() { # Live smoke for the async-resume regression repair (Phase A + Phase B). # It verifies the replyable MCP → control-RPC → native async launcher path: # backend calls MCP `entwurf` (sync-only spawn) then `entwurf_resume(mode=async)`, # async ack returns immediately, and successful completion later arrives as a # followUp (`🏁 resume … completed`). The omitted-mode conditional default is # pinned by `check_async_resume_gate`; this live smoke uses explicit async so # model prompt-following cannot hide the async branch. # # Capability dignity across siblings (Hard Rule #7); the live floor is # claude-only as of 0.11.0. Claude, Codex, and Gemini all exercise the same # MCP → control-RPC → native-launcher chain, but the default here collapsed # to claude-only (gemini CLI deprecated, codex live dropped from the release # floor). Pass explicit backends to exercise codex/gemini on demand; they # remain supported surfaces, just not part of the live floor. # # Cases: # A — MCP replyable async (per backend): tmux pi session with --entwurf- # control, prompt backend procedurally (no identity claims) to call # entwurf + entwurf_resume(mode=async), assert async ack AND successful # completion followUp in the pane (`❌ resume` is fail-closed). # B — external non-replyable + explicit mode='async' → reject. Stdio # call to the MCP bridge without PI_SESSION_ID/PI_AGENT_ID env. # C — external + auto-sync shape reachability. Stdio call with mode # omitted; auto-resolution picks sync; assertion is that the # sync path is REACHED (not gated by replyable check). # # Cost: ~$0.10–$0.30 per full Claude+Codex+Gemini run. Required before # release when the entwurf surface changes per AGENTS.md Hard Rule #7. # cwd-preserving: do NOT cd into the repo. The smoke spawns tmux pi sessions # whose cwd determines the pi session dir; forcing repo cwd would litter the # repo's session dir even when release-gate runs from a scratch project cwd. # The script is found by absolute path; its tmux spawns pin -c "$PWD". "$REPO_DIR/scripts/smoke-async-resume.sh" "$@" } check_async_resume_gate() { # Deterministic gate for the MCP `entwurf_resume` mode resolution (Phase B # Step 3 of the async-resume regression repair). The handler at mcp/pi- # tools-bridge/src/index.ts uses `resolveEntwurfResumeMode` (extracted into # mcp/pi-tools-bridge/src/resume-mode.ts) to apply the async-followUp # discriminator: async only for a pi-session caller (owns a control socket), # sync for external hosts AND meta-sessions, reject on explicit async from a # non-pi-session caller. This gate pins the resolution against the # 6 input cases plus 3 invariants so a future edit cannot silently turn the # discriminator back into a static default (which would invert the external # MCP host UX). No process spawn, no socket, no API cost. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-async-resume-gate.ts) } check_package_source_routing() { # Deterministic gate for #29 (package-installed Entwurf ACP routing). Pins # resolveExplicitExtensionSpec()'s package-source -> install-root mapping and # the fail-fast routing contract through the two public routing surfaces # (getRegistryRouting spawn path, getEntwurfExplicitExtensions resume path). # Covers the install matrix: local path / git user / npm user (+version) / # install-missing / project-scope-unseen / no-source, across local + remote, # plus self-root fallback and the resume unresolvedAcpIntent signal. Isolated # via a temp PI_CODING_AGENT_DIR — the real ~/.pi/agent is never touched. No # backend, no spawn, no API cost. The live counterpart that actually spawns a # package-installed Entwurf ACP child is smoke-installed-entwurf-acp. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-package-source-routing.ts) } smoke_installed_entwurf_acp() { # Live counterpart to check-package-source-routing (#29). Reproduces Pi # settings package-install topologies (git + npm user sources) in isolated # agent dirs, drives entwurf-core's REAL resolver to compute the bridge -e, # then spawns an actual pi child with that -e to prove the exact failing # boundary is closed: a `--no-extensions --provider pi-shell-acp` child must # recognize the provider rather than dying with `Unknown provider "pi-shell-acp"`. # # Credential-free: the runtime proof uses `pi --list-models pi-shell-acp`, which # registers the provider via the injected bridge but does NOT spawn a backend — # no auth, no token cost. The deterministic gate covers the resolver math across # the full matrix; this gate covers the one thing it cannot: a real pi process # loading package-installed bridge roots under --no-extensions. The packed # tarball case closes the publish-shape topology, while the symlink cases keep # git/npm source-root routing cheap and explicit. section "smoke: installed Entwurf ACP routing (#29 git+npm package sources + packed tarball)" if ! command -v pi >/dev/null 2>&1; then fail "[smoke-installed-entwurf-acp] pi binary not on PATH — cannot run runtime proof" return 1 fi local tmp_root tmp_root=$(mktemp -d -t psa-installed-entwurf.XXXXXX) trap 'rm -rf "$tmp_root"' RETURN smoke_installed_entwurf_acp_one() { local label="$1" local source="$2" local install_root="$3" local agent_dir="$4" mkdir -p "$agent_dir" printf '{ "packages": ["%s"] }\n' "$source" > "$agent_dir/settings.json" # 1. Resolver step — entwurf-core's own routing under this package topology. # Throws (non-zero) if the bridge cannot be resolved, which is itself a # failure here. local bridge bridge=$(cd "$REPO_DIR" && PI_CODING_AGENT_DIR="$agent_dir" \ node --experimental-strip-types scripts/resolve-acp-bridge.ts) || { fail "[smoke-installed-entwurf-acp] resolver threw under $label topology (bridge unresolved)" return 1 } if [ -z "$bridge" ]; then fail "[smoke-installed-entwurf-acp] resolver injected no -e bridge under $label topology" return 1 fi if [ "$bridge" != "$install_root" ]; then fail "[smoke-installed-entwurf-acp] resolver -e ($bridge) != $label install root ($install_root)" return 1 fi echo "[smoke-installed-entwurf-acp] resolver injected $label bridge: -e $bridge" # 2. Runtime proof — a --no-extensions child with the resolver's -e must # register provider pi-shell-acp. This is the precise #29 boundary. local out out=$(PI_CODING_AGENT_DIR="$agent_dir" pi --no-extensions -e "$bridge" --list-models pi-shell-acp 2>&1) || { fail "[smoke-installed-entwurf-acp] pi --no-extensions -e <$label bridge> --list-models exited non-zero:" echo "$out" | tail -10 | sed 's/^/ /' >&2 return 1 } # Unresolved provider surfaces two ways: `Unknown provider` on the spawn path, # or `No models matching "pi-shell-acp"` on --list-models. Reject both. The # real teeth is the curated-model anchor: claude-sonnet-4-6 only appears when # the injected package bridge actually registers the pi-shell-acp provider. if grep -qi "Unknown provider" <<<"$out"; then fail "[smoke-installed-entwurf-acp] $label child reports Unknown provider (regression — #29 reopened):" echo "$out" | tail -10 | sed 's/^/ /' >&2 return 1 fi if grep -qi "No models matching" <<<"$out"; then fail "[smoke-installed-entwurf-acp] provider pi-shell-acp not registered from $label bridge (No models matching):" echo "$out" | tail -10 | sed 's/^/ /' >&2 return 1 fi if ! grep -q "claude-sonnet-4-6" <<<"$out"; then fail "[smoke-installed-entwurf-acp] pi-shell-acp model surface missing claude-sonnet-4-6 anchor under $label:" echo "$out" | tail -10 | sed 's/^/ /' >&2 return 1 fi ok "[smoke-installed-entwurf-acp] $label source resolves + child recognizes provider pi-shell-acp" } # Symlink topologies: the installed packages are this repo's code, reached via # the canonical user-scope install paths under isolated agent dirs. local git_agent="$tmp_root/git-agent" local git_root="$git_agent/git/github.com/junghan0611/pi-shell-acp" mkdir -p "$(dirname "$git_root")" ln -s "$REPO_DIR" "$git_root" smoke_installed_entwurf_acp_one \ "git" \ "git:github.com/junghan0611/pi-shell-acp" \ "$git_root" \ "$git_agent" || return 1 local npm_agent="$tmp_root/npm-agent" local npm_root="$npm_agent/npm/node_modules/@junghanacs/pi-shell-acp" mkdir -p "$(dirname "$npm_root")" ln -s "$REPO_DIR" "$npm_root" smoke_installed_entwurf_acp_one \ "npm" \ "npm:@junghanacs/pi-shell-acp" \ "$npm_root" \ "$npm_agent" || return 1 # Packed tarball topology: install the actual npm pack artifact into the same # managed-root shape that `npm:@junghanacs/pi-shell-acp` maps to. This is the # credential-free pre-publish proof that the package contents, peer layout, and # resolver topology agree before the registry smoke exists. local pack_agent="$tmp_root/pack-agent" local pack_project="$pack_agent/npm" local pack_root="$pack_project/node_modules/@junghanacs/pi-shell-acp" local version pi_peer tgz_name tgz_path pack_log version=$(node -p "require('${REPO_DIR}/package.json').version") pi_peer=$(node -p "require('${REPO_DIR}/package.json').devDependencies['@earendil-works/pi-ai']") tgz_name="junghanacs-pi-shell-acp-${version}.tgz" tgz_path="$tmp_root/$tgz_name" echo "[smoke-installed-entwurf-acp] npm pack -> $tgz_name (packed topology)" (cd "$REPO_DIR" && npm pack --silent --dry-run=false --pack-destination "$tmp_root" >/dev/null) || { fail "[smoke-installed-entwurf-acp] npm pack failed for packed topology" return 1 } if [ ! -f "$tgz_path" ]; then fail "[smoke-installed-entwurf-acp] packed tarball not produced: $tgz_path" return 1 fi mkdir -p "$pack_project" printf '%s\n' '{ "name": "psa-packed-agent", "version": "0.0.0", "private": true }' > "$pack_project/package.json" pack_log=$(cd "$pack_project" && pnpm add \ "$tgz_path" \ "@earendil-works/pi-ai@${pi_peer}" \ "@earendil-works/pi-coding-agent@${pi_peer}" \ "@earendil-works/pi-tui@${pi_peer}" \ "typebox@latest" \ --ignore-workspace --ignore-scripts 2>&1) || { fail "[smoke-installed-entwurf-acp] pnpm add packed tarball failed:" echo "$pack_log" | tail -10 | sed 's/^/ /' >&2 return 1 } if [ ! -d "$pack_root" ]; then fail "[smoke-installed-entwurf-acp] packed install root missing: $pack_root" return 1 fi smoke_installed_entwurf_acp_one \ "npm-packed" \ "npm:@junghanacs/pi-shell-acp" \ "$pack_root" \ "$pack_agent" || return 1 ok "[smoke-installed-entwurf-acp] git+npm symlink + npm-packed package sources pass" return 0 } smoke_session_id_name() { # LIVE 3-turn substrate smoke (Phase 3a) for Pi 0.78 --session-id/--name, # exercised through the bridge but NOT through the Entwurf tool surface, so it # lands independently of the taskId->sessionId migration. Spawns real cheap # sonnet turns (auth + tokens) and asserts: header id/cwd, session_info name as # info layer, append-not-recreate, spawn-only name, and the wrong-cwd footgun # as documented evidence. Isolated via a temp PI_CODING_AGENT_DIR. section "smoke: --session-id / --name substrate (direct pi, no Entwurf API)" if ! command -v pi >/dev/null 2>&1; then fail "[smoke-session-id-name] pi binary not on PATH — cannot run live substrate proof" return 1 fi (cd "$REPO_DIR" && node --experimental-strip-types scripts/smoke-session-id-name.ts) || { fail "[smoke-session-id-name] live substrate smoke failed" return 1 } ok "[smoke-session-id-name] --session-id/--name substrate proven (append + spawn-only name + wrong-cwd footgun)" return 0 } check_mcp() { (cd "$REPO_DIR" && node --input-type=module <<'EOF' import { normalizeMcpServers, McpServerConfigError } from './acp-bridge.ts'; import { strict as assert } from 'node:assert'; import { createHash } from 'node:crypto'; const emptyHash = createHash('sha256').update('[]').digest('hex'); function expectThrow(label, fn) { try { fn(); } catch (err) { if (!(err instanceof McpServerConfigError)) { throw new Error(`${label}: expected McpServerConfigError, got ${err?.name || err}`); } if (!Array.isArray(err.issues) || err.issues.length === 0) { throw new Error(`${label}: expected non-empty issues[]`); } for (const issue of err.issues) { if (typeof issue.server !== 'string' || issue.server.length === 0) { throw new Error(`${label}: issue missing server name`); } } return err; } throw new Error(`${label}: expected throw, got none`); } // 1. undefined -> empty, deterministic empty hash { const r = normalizeMcpServers(undefined); assert.deepEqual(r.servers, [], '1.undefined: servers empty'); assert.equal(r.hash, emptyHash, '1.undefined: hash matches sha256("[]")'); assert.equal(r.signatureKey, '[]', '1.undefined: signatureKey is "[]"'); } // 2. {} -> same empty shape { const r = normalizeMcpServers({}); assert.deepEqual(r.servers, []); assert.equal(r.hash, emptyHash); } // 3. canonical ordering: z before a in input -> a first in output { const r = normalizeMcpServers({ z: { command: 'bin-z' }, a: { command: 'bin-a' }, }); assert.deepEqual(r.servers.map(s => s.name), ['a', 'z'], '3: sorted by name'); } // 4. deterministic hash — same semantic input -> same hash { const a = normalizeMcpServers({ x: { command: 'c', args: ['1', '2'], env: { B: '2', A: '1' } } }); const b = normalizeMcpServers({ x: { command: 'c', args: ['1', '2'], env: { A: '1', B: '2' } } }); assert.equal(a.hash, b.hash, '4: env key order must not change hash'); } // 5. stdio default shape (no "type") { const r = normalizeMcpServers({ s: { command: 'bin', args: ['--x'], env: { K: 'v' } } }); assert.equal(r.servers.length, 1); const s = r.servers[0]; assert.equal(s.name, 's'); assert.equal(s.command, 'bin'); assert.deepEqual(s.args, ['--x']); assert.deepEqual(s.env, [{ name: 'K', value: 'v' }]); } // 6. http shape { const r = normalizeMcpServers({ h: { type: 'http', url: 'https://e/mcp', headers: { Authorization: 'Bearer x' } }, }); const s = r.servers[0]; assert.equal(s.type, 'http'); assert.equal(s.url, 'https://e/mcp'); assert.deepEqual(s.headers, [{ name: 'Authorization', value: 'Bearer x' }]); } // 7. sse shape { const r = normalizeMcpServers({ e: { type: 'sse', url: 'https://e/sse' } }); const s = r.servers[0]; assert.equal(s.type, 'sse'); assert.equal(s.url, 'https://e/sse'); } // 8. unsupported type throws with server name { const err = expectThrow('8.bad-type', () => normalizeMcpServers({ bad: { type: 'ws', url: 'x' } })); assert.equal(err.issues[0].server, 'bad'); assert.match(err.message, /bad:/); } // 9. empty command throws expectThrow('9.empty-command', () => normalizeMcpServers({ noop: { command: '' } })); // 10. empty url (http) throws expectThrow('10.empty-url-http', () => normalizeMcpServers({ u: { type: 'http', url: '' } })); // 11. invalid env value throws expectThrow('11.env-non-string', () => normalizeMcpServers({ e: { command: 'c', env: { K: 42 } } })); // 12. args with non-string throws expectThrow('12.args-non-string', () => normalizeMcpServers({ a: { command: 'c', args: ['ok', 5] } })); // 13. root non-object (array) throws expectThrow('13.root-array', () => normalizeMcpServers([])); // 14. multiple invalid servers aggregated { const err = expectThrow('14.aggregate', () => normalizeMcpServers({ a: { command: '' }, b: { type: 'ws' }, })); assert.equal(err.issues.length, 2, '14: both issues surfaced'); assert.deepEqual(err.issues.map(i => i.server).sort(), ['a', 'b']); } // 15. headers bad shape throws expectThrow('15.headers-invalid', () => normalizeMcpServers({ h: { type: 'http', url: 'https://x', headers: 'no' }, })); console.log('[check-mcp] 15 assertions ok'); EOF ) } check_backends() { (cd "$REPO_DIR" && node --input-type=module <<'EOF' import { strict as assert } from 'node:assert'; import { buildSessionMetaForBackend, CLAUDE_CONFIG_OVERLAY_DIR, CODEX_CONFIG_OVERLAY_DIR, ensureClaudeConfigOverlay, ensureCodexConfigOverlay, ensureGeminiConfigOverlay, GEMINI_CONFIG_OVERLAY_DIR, GEMINI_CONFIG_OVERLAY_HOME, GEMINI_OVERLAY_ADMIN_POLICY_PATH, GEMINI_OVERLAY_SYSTEM_MD_PATH, resolveAcpBackendLaunch, resolveBridgeEnvDefaults } from './acp-bridge.ts'; import { existsSync, lstatSync, mkdirSync, readFileSync, readdirSync, readlinkSync, rmSync, symlinkSync, writeFileSync } from 'node:fs'; import { homedir, tmpdir } from 'node:os'; import { join } from 'node:path'; const claudeOverride = 'node /tmp/fake-claude-acp.js'; const codexOverride = 'node /tmp/fake-codex-acp.js'; const prevClaude = process.env.CLAUDE_AGENT_ACP_COMMAND; const prevCodex = process.env.CODEX_ACP_COMMAND; process.env.CLAUDE_AGENT_ACP_COMMAND = claudeOverride; process.env.CODEX_ACP_COMMAND = codexOverride; try { const claudeLaunch = resolveAcpBackendLaunch('claude'); assert.equal(claudeLaunch.command, 'bash'); assert.deepEqual(claudeLaunch.args, ['-lc', claudeOverride]); assert.equal(claudeLaunch.source, 'env:CLAUDE_AGENT_ACP_COMMAND'); // Codex override path appends mode flags (-c approval_policy=… // -c sandbox_mode=…) chosen by resolveCodexMode(). Default = "full-access" — // pi-YOLO parity with the Claude side and the only preset that lets // workspace-external files (e.g. ~/.gnupg/ for gogcli) reach the agent. // Order matters: operators who set CODEX_ACP_COMMAND can re-pass earlier // `-c` flags but ours come after, so pi-shell-acp's mode policy always // wins. // // 0.5.0: the bridge does not pin any codex-side compaction knob. // Codex's native context management runs on its own default. The argv // deepEqual below is the single source of truth — anything not in // that expected list is not pinned. (The earlier explicit negative // assertion on a specific knob name was removed in the 0.5.0 // maintainer cleanup; see CHANGELOG.) const codexLaunch = resolveAcpBackendLaunch('codex'); assert.equal(codexLaunch.command, 'bash'); assert.deepEqual(codexLaunch.args, [ '-lc', `${codexOverride} '-c' 'approval_policy=never' '-c' 'sandbox_mode=danger-full-access' '-c' 'web_search="disabled"' '-c' 'tools.view_image=false' '-c' 'memories.generate_memories=false' '-c' 'memories.use_memories=false' '-c' 'history.persistence="none"' '-c' 'features.image_generation=false' '-c' 'features.tool_suggest=false' '-c' 'features.tool_search=false' '-c' 'features.multi_agent=false' '-c' 'features.apps=false' '-c' 'features.memories=false'`, ]); assert.equal(codexLaunch.source, 'env:CODEX_ACP_COMMAND'); // Defense-in-depth: pin web_search=disabled, tools.view_image=false, and // the `features.*=false` flags even though the CODEX_HOME overlay // already strips operator config. codex-rs lets later -c values for the // same key win, so these flags hold even if the operator inlines // counter-values (e.g. `-c features.multi_agent=true`) via // CODEX_ACP_COMMAND — ours come last. // // Each features flag aligns one tool family with pi's advertised // baseline; the assertions name the disabled tool so a regression in // codex-rs's gating path (key rename, default flip) shows up here, not // only in live verification. assert.ok( codexLaunch.args.some((arg) => arg.includes('web_search="disabled"')), 'codex launch must pin web_search=disabled', ); assert.ok( codexLaunch.args.some((arg) => arg.includes('tools.view_image=false')), 'codex launch must attempt to disable tools.view_image (best-effort; codex-rs 0.124.0 has no consumer for this field, kept for forward-compat)', ); assert.ok( codexLaunch.args.some((arg) => arg.includes('features.image_generation=false')), 'codex launch must disable image_generation feature (suppresses image_gen tool)', ); assert.ok( codexLaunch.args.some((arg) => arg.includes('features.tool_suggest=false')), 'codex launch must disable tool_suggest feature (suppresses tool_suggest tool)', ); assert.ok( codexLaunch.args.some((arg) => arg.includes('features.tool_search=false')), 'codex launch must disable tool_search feature (suppresses deferred-MCP tool_search tool)', ); assert.ok( codexLaunch.args.some((arg) => arg.includes('features.multi_agent=false')), 'codex launch must disable multi_agent feature (suppresses spawn_agent / send_input / wait_agent / close_agent / resume_agent collab tools)', ); assert.ok( codexLaunch.args.some((arg) => arg.includes('features.apps=false')), 'codex launch must disable apps feature (suppresses auto-injected mcp__codex_apps__* MCP server bundle, e.g. codex_apps__github)', ); // PI_SHELL_ACP_CODEX_MODE=auto opts into codex-rs's standard mode // (workspace-write sandbox, on-request approvals). The bridge does not // touch compaction; mode args swap approval_policy + sandbox_mode while // the defense-in-depth -c args above stay constant. const prevMode = process.env.PI_SHELL_ACP_CODEX_MODE; process.env.PI_SHELL_ACP_CODEX_MODE = 'auto'; try { const codexLaunchAutoMode = resolveAcpBackendLaunch('codex'); assert.deepEqual(codexLaunchAutoMode.args, [ '-lc', `${codexOverride} '-c' 'approval_policy=on-request' '-c' 'sandbox_mode=workspace-write' '-c' 'web_search="disabled"' '-c' 'tools.view_image=false' '-c' 'memories.generate_memories=false' '-c' 'memories.use_memories=false' '-c' 'history.persistence="none"' '-c' 'features.image_generation=false' '-c' 'features.tool_suggest=false' '-c' 'features.tool_search=false' '-c' 'features.multi_agent=false' '-c' 'features.apps=false' '-c' 'features.memories=false'`, ]); } finally { if (prevMode === undefined) delete process.env.PI_SHELL_ACP_CODEX_MODE; else process.env.PI_SHELL_ACP_CODEX_MODE = prevMode; } // Invalid PI_SHELL_ACP_CODEX_MODE values throw at the launch surface. // Silent fallback would land typos like "readonly" (no dash) on the // full-access default — exactly the wrong direction for a sandbox knob. // "Never warn. Throw." (AGENTS.md). process.env.PI_SHELL_ACP_CODEX_MODE = 'super-yolo'; try { assert.throws( () => resolveAcpBackendLaunch('codex'), /Invalid PI_SHELL_ACP_CODEX_MODE=super-yolo/, ); } finally { if (prevMode === undefined) delete process.env.PI_SHELL_ACP_CODEX_MODE; else process.env.PI_SHELL_ACP_CODEX_MODE = prevMode; } // 0.5.0 compaction policy: legacy single knob throws; pi-side block by // default; backend-native compaction always allowed (no bridge knob). // // PI_SHELL_ACP_ALLOW_COMPACTION=1 must throw a next-action error from // resolveAcpBackendLaunch. The message must surface that backend-native // compaction is always allowed and that the pi-side opt-in lives behind // PI_SHELL_ACP_ALLOW_PI_COMPACTION. const prevAllow = process.env.PI_SHELL_ACP_ALLOW_COMPACTION; process.env.PI_SHELL_ACP_ALLOW_COMPACTION = '1'; try { assert.throws( () => resolveAcpBackendLaunch('codex'), /no longer accepted.*PI_SHELL_ACP_ALLOW_PI_COMPACTION/s, 'legacy PI_SHELL_ACP_ALLOW_COMPACTION=1 must be rejected at spawn intent with a next-action message pointing at PI_SHELL_ACP_ALLOW_PI_COMPACTION', ); assert.throws( () => resolveAcpBackendLaunch('claude'), /no longer accepted/, 'legacy PI_SHELL_ACP_ALLOW_COMPACTION=1 must also throw for the claude launch path', ); } finally { if (prevAllow === undefined) delete process.env.PI_SHELL_ACP_ALLOW_COMPACTION; else process.env.PI_SHELL_ACP_ALLOW_COMPACTION = prevAllow; } // codexDisabledFeatures launch param — explicit empty list opts fully out // of the bridge's feature-gate policy (operators who set // `codexDisabledFeatures: []` in pi-shell-acp settings.json land here). // Static surface flags (web_search, tools.view_image) stay in place; only // the dynamic `features.*=false` portion drops out. const codexLaunchEmptyFeatures = resolveAcpBackendLaunch('codex', { codexDisabledFeatures: [] }); assert.deepEqual(codexLaunchEmptyFeatures.args, [ '-lc', `${codexOverride} '-c' 'approval_policy=never' '-c' 'sandbox_mode=danger-full-access' '-c' 'web_search="disabled"' '-c' 'tools.view_image=false' '-c' 'memories.generate_memories=false' '-c' 'memories.use_memories=false' '-c' 'history.persistence="none"'`, ]); assert.ok( !codexLaunchEmptyFeatures.args.some((arg) => arg.includes('features.')), 'codex launch with codexDisabledFeatures=[] must emit zero features.* gate flags', ); // codexDisabledFeatures launch param — partial override. Operators can // narrow the policy to a single feature (e.g. just block apps, leave // image_gen registered) or extend it past the defaults. The launch path // emits exactly the keys passed, in order, with no de-duplication or // canonicalization (codex-rs validates keys at config load via // is_known_feature_key, so a typo surfaces as a startup warning). const codexLaunchPartialFeatures = resolveAcpBackendLaunch('codex', { codexDisabledFeatures: ['apps', 'multi_agent'] }); assert.deepEqual(codexLaunchPartialFeatures.args, [ '-lc', `${codexOverride} '-c' 'approval_policy=never' '-c' 'sandbox_mode=danger-full-access' '-c' 'web_search="disabled"' '-c' 'tools.view_image=false' '-c' 'memories.generate_memories=false' '-c' 'memories.use_memories=false' '-c' 'history.persistence="none"' '-c' 'features.apps=false' '-c' 'features.multi_agent=false'`, ]); assert.ok( !codexLaunchPartialFeatures.args.some((arg) => arg.includes('features.image_generation')), 'codex launch with codexDisabledFeatures=["apps","multi_agent"] must NOT emit features.image_generation', ); // codexDeveloperInstructions launch param — pi-shell-acp's identity carrier // on the codex backend. Codex ACP does not honor `_meta.systemPrompt`, so // the codex `developer` role config slot is the highest stable identity // layer available to us. The rendered engraving must reach the spawned // codex-acp child as `-c developer_instructions=""`, // appended after the static surface + feature-gate args. tomlBasicString // (= JSON.stringify) is the production escape path; the test re-runs it // to derive the expected fragment so the contract stays in lockstep. const codexDevInstrSample = 'line1\nline2 with "quote" and \\backslash'; const codexLaunchWithDevInstr = resolveAcpBackendLaunch('codex', { codexDisabledFeatures: ['apps'], codexDeveloperInstructions: codexDevInstrSample, }); const expectedDevInstrPair = `'-c' 'developer_instructions=${JSON.stringify(codexDevInstrSample)}'`; assert.ok( codexLaunchWithDevInstr.args[1].endsWith(expectedDevInstrPair), `codex launch must end with TOML-escaped developer_instructions pair; expected suffix=${expectedDevInstrPair} got=${codexLaunchWithDevInstr.args[1]}`, ); assert.ok( JSON.stringify(codexDevInstrSample).includes('\\n'), 'tomlBasicString contract: literal newline must be escaped as \\n', ); assert.ok( JSON.stringify(codexDevInstrSample).includes('\\"'), 'tomlBasicString contract: embedded double-quote must be escaped as \\"', ); // Empty / undefined / whitespace-only codexDeveloperInstructions emits no // `-c developer_instructions=` flag — codex defaults apply (no // pi-authored developer instruction present). const codexLaunchNoDevInstr = resolveAcpBackendLaunch('codex', { codexDisabledFeatures: ['apps'], codexDeveloperInstructions: ' ', }); assert.ok( !codexLaunchNoDevInstr.args[1].includes('developer_instructions'), 'whitespace-only codexDeveloperInstructions must not emit a -c flag', ); // Skill listing in the system prompt is gated by the SDK on // `tools.some(name === "Skill")` (claude-agent-sdk SN1 emitter, identical // in 0.2.114 and 0.2.119). loadProviderSettings() augments tools with // "Skill" and permissionAllow with "Skill(*)" whenever skillPlugins is // non-empty, so the params reaching buildSessionMetaForBackend in the // skill-plugins case look like this: const claudeMeta = buildSessionMetaForBackend('claude', { modelId: 'claude-sonnet-4-6', settingSources: [], strictMcpConfig: true, tools: ['Read', 'Bash', 'Edit', 'Write', 'Skill'], skillPlugins: ['/abs/path/to/skill-plugin'], permissionAllow: ['Bash(*)', 'Read(*)', 'Edit(*)', 'Write(*)', 'mcp__*', 'Skill(*)'], disallowedTools: [ 'AskUserQuestion', 'CronCreate', 'CronDelete', 'CronList', 'EnterPlanMode', 'EnterWorktree', 'ExitPlanMode', 'ExitWorktree', 'Monitor', 'NotebookEdit', 'PushNotification', 'RemoteTrigger', 'TaskCreate', 'TaskGet', 'TaskList', 'TaskOutput', 'TaskStop', 'TaskUpdate', 'WebFetch', 'WebSearch', ], }, 'system prompt'); assert.equal(claudeMeta?.claudeCode?.options?.model, 'claude-sonnet-4-6'); // settingSources empty by default — pi-shell-acp does not inherit filesystem // Claude Code settings; skills are delivered via plugins, MCP via mcpServers. assert.deepEqual(claudeMeta?.claudeCode?.options?.settingSources, []); // Tool surface is the explicit pi baseline plus Skill (added by // loadProviderSettings when skillPlugins is non-empty), so the SDK's // SN1 emitter actually produces a skill listing in the system prompt. assert.deepEqual(claudeMeta?.claudeCode?.options?.tools, ['Read', 'Bash', 'Edit', 'Write', 'Skill']); // Skills are injected as local plugins. assert.deepEqual(claudeMeta?.claudeCode?.options?.plugins, [ { type: 'local', path: '/abs/path/to/skill-plugin' }, ]); // Permission allowlist threads through `Options.settings.permissions.allow`. // Combined with claude-agent-acp's own permissionMode resolution, this // delivers de facto YOLO for the listed tools without flipping the user's // native ~/.claude/settings.json defaultMode. Skill(*) is included so the // listing surface is not silently denied at the permission layer. assert.deepEqual(claudeMeta?.claudeCode?.options?.settings?.permissions?.allow, [ 'Bash(*)', 'Read(*)', 'Edit(*)', 'Write(*)', 'mcp__*', 'Skill(*)', ]); // Disallowed tools — full deferred set passes through verbatim. The SDK's // skill-listing emitter is gated on `tools.includes("Skill")`, but the // separate deferred-tool advertisement surface (system-reminder block // listing tools available via ToolSearch) bypasses Options.tools and // requires Options.disallowedTools to suppress. assert.deepEqual(claudeMeta?.claudeCode?.options?.disallowedTools, [ 'AskUserQuestion', 'CronCreate', 'CronDelete', 'CronList', 'EnterPlanMode', 'EnterWorktree', 'ExitPlanMode', 'ExitWorktree', 'Monitor', 'NotebookEdit', 'PushNotification', 'RemoteTrigger', 'TaskCreate', 'TaskGet', 'TaskList', 'TaskOutput', 'TaskStop', 'TaskUpdate', 'WebFetch', 'WebSearch', ]); // _meta.systemPrompt is delivered as a *string*, not as { append: ... }. // claude-agent-acp interprets a string-form systemPrompt as a full preset // replacement (acp-agent.ts:1685), so the claude_code preset's per-cwd // dynamic sections (working directory, auto-memory path, git status, // todo handling) drop out of the system prompt. The engraving alone // carries identity context above Anthropic's SDK-level minimum prefix. assert.deepEqual(claudeMeta?.systemPrompt, 'system prompt'); assert.equal(claudeMeta?.claudeCode?.options?.extraArgs?.['strict-mcp-config'], null); // Empty skillPlugins => no plugins field emitted, no Skill auto-added. // Empty disallowedTools => no disallowedTools field emitted (escape hatch // for operators who explicitly opt back into the SDK's deferred-tool // advertisement). (loadProviderSettings would not augment // tools/permissionAllow either.) const claudeMetaNoPlugins = buildSessionMetaForBackend('claude', { modelId: 'claude-sonnet-4-6', settingSources: [], strictMcpConfig: true, tools: ['Read', 'Bash', 'Edit', 'Write'], skillPlugins: [], permissionAllow: ['Bash(*)', 'mcp__*'], disallowedTools: [], }, undefined); assert.equal(claudeMetaNoPlugins?.claudeCode?.options?.plugins, undefined); assert.deepEqual(claudeMetaNoPlugins?.claudeCode?.options?.tools, ['Read', 'Bash', 'Edit', 'Write']); assert.equal(claudeMetaNoPlugins?.claudeCode?.options?.disallowedTools, undefined); const codexMeta = buildSessionMetaForBackend('codex', { modelId: 'codex-mini-latest', settingSources: ['user'], strictMcpConfig: true, tools: ['Read', 'Bash', 'Edit', 'Write'], skillPlugins: [], permissionAllow: [], disallowedTools: [], }, 'system prompt'); assert.equal(codexMeta, undefined); assert.throws(() => resolveAcpBackendLaunch(undefined), /ACP backend is required\./); assert.throws(() => resolveAcpBackendLaunch('bogus'), /Unknown ACP backend: bogus\./); // Claude config overlay — isolate claude-agent-acp's SettingsManager from // ~/.claude/settings.json's permissionMode pickup. We rebuild the overlay // on every claude session bootstrap (idempotent symlink farm + minimal // settings.json) and point CLAUDE_CONFIG_DIR at it via bridgeEnvDefaults. assert.equal(typeof CLAUDE_CONFIG_OVERLAY_DIR, 'string'); assert.ok(CLAUDE_CONFIG_OVERLAY_DIR.endsWith('claude-config-overlay'), 'overlay dir constant must point at the pi-owned claude-config-overlay path'); // Exercise ensureClaudeConfigOverlay against a synthetic real/overlay pair // so the production filesystem stays untouched. const overlayTestRoot = join(tmpdir(), `pi-shell-acp-overlay-${Date.now()}`); const realDir = join(overlayTestRoot, 'real'); const overlayDir = join(overlayTestRoot, 'overlay'); try { mkdirSync(realDir, { recursive: true }); // Seed the synthetic real dir with shapes the production overlay should // mirror: a settings.json (overridden, NOT symlinked), a credentials // file (symlinked passthrough), a projects/ tree carrying an // operator-side MEMORY.md (must NOT pass through — overlay creates its // own empty projects/), and a non-whitelisted entry like hooks/ (must // be wiped by the stale-cleanup loop, not symlinked through). writeFileSync(join(realDir, 'settings.json'), JSON.stringify({ permissions: { defaultMode: 'auto' } }), 'utf8'); writeFileSync(join(realDir, '.credentials.json'), '{"token":"test"}', 'utf8'); mkdirSync(join(realDir, 'projects', 'operator-cwd', 'memory'), { recursive: true }); writeFileSync(join(realDir, 'projects', 'operator-cwd', 'memory', 'MEMORY.md'), 'OPERATOR-LEAK-CANARY', 'utf8'); mkdirSync(join(realDir, 'hooks'), { recursive: true }); writeFileSync(join(realDir, 'hooks', 'preToolUse.sh'), '#!/bin/sh\nexit 0\n', 'utf8'); ensureClaudeConfigOverlay(realDir, overlayDir); // settings.json must be authored, not symlinked, and contain our override. const settingsStat = lstatSync(join(overlayDir, 'settings.json')); assert.equal(settingsStat.isSymbolicLink(), false, 'overlay settings.json must be a regular file authored by pi-shell-acp, not a symlink'); const overlaySettings = JSON.parse(readFileSync(join(overlayDir, 'settings.json'), 'utf8')); assert.equal(overlaySettings.permissions?.defaultMode, 'default', 'overlay settings.json must pin permissions.defaultMode to "default"'); assert.equal(overlaySettings.autoMemoryEnabled, false, 'overlay settings.json must opt out of the SDK auto-memory subsystem'); assert.deepEqual(overlaySettings.hooks, {}, 'overlay settings.json must carry configured-empty hooks without inheriting operator hook definitions'); // Whitelisted entries pass through as symlinks to the operator's real dir. const credStat = lstatSync(join(overlayDir, '.credentials.json')); assert.equal(credStat.isSymbolicLink(), true); assert.equal(readlinkSync(join(overlayDir, '.credentials.json')), join(realDir, '.credentials.json')); // projects/ is overlay-private — an empty directory, NOT a symlink to // the operator's tree. Closes the per-cwd MEMORY.md auto-injection // channel: the binary's sanitized-cwd lookup finds nothing here. const projStat = lstatSync(join(overlayDir, 'projects')); assert.equal(projStat.isSymbolicLink(), false, 'overlay projects/ must be an overlay-private directory, not a symlink to operator data'); assert.equal(projStat.isDirectory(), true); const projContents = readdirSync(join(overlayDir, 'projects')); assert.deepEqual(projContents, [], 'overlay projects/ must start empty — operator MEMORY.md must not leak through'); assert.equal(existsSync(join(overlayDir, 'projects', 'operator-cwd', 'memory', 'MEMORY.md')), false, 'operator-side MEMORY.md must not be reachable through the overlay'); // Non-whitelisted entries (operator hooks, agents, sessions data, // settings.local.json with personal env, ...) must NOT appear in the // overlay. The stale-cleanup loop guarantees this each bootstrap. assert.equal(existsSync(join(overlayDir, 'hooks')), false, 'overlay must not expose operator hooks/ — execution surface leak'); // Idempotence: a second call must succeed without throwing and preserve // the same shape (still no operator data, still empty projects/). ensureClaudeConfigOverlay(realDir, overlayDir); assert.equal(readlinkSync(join(overlayDir, '.credentials.json')), join(realDir, '.credentials.json')); assert.equal(existsSync(join(overlayDir, 'hooks')), false, 'second-call idempotence must not let operator hooks/ leak back in'); } finally { rmSync(overlayTestRoot, { recursive: true, force: true }); } // Codex config overlay — mirror of the Claude overlay above. Shields // codex-acp's Config loader from ~/.codex/config.toml fields the operator // sets for their personal codex use (model, model_reasoning_effort, // personality, projects.trust_level, [notice.*]). pi-shell-acp pins every // operating-surface knob it cares about via -c CLI flags; the overlay's // config.toml is intentionally minimal so unpinned fields fall through to // codex-rs defaults rather than to the operator's preferences. assert.equal(typeof CODEX_CONFIG_OVERLAY_DIR, 'string'); assert.ok(CODEX_CONFIG_OVERLAY_DIR.endsWith('codex-config-overlay'), 'overlay dir constant must point at the pi-owned codex-config-overlay path'); const codexOverlayTestRoot = join(tmpdir(), `pi-shell-acp-codex-overlay-${Date.now()}`); const codexRealDir = join(codexOverlayTestRoot, 'real'); const codexOverlayDir = join(codexOverlayTestRoot, 'overlay'); try { mkdirSync(codexRealDir, { recursive: true }); // Seed the synthetic real dir with shapes the production overlay must // mirror or reject: // - config.toml that, if inherited, would leak `model` and `personality` // through to pi-shell-acp sessions (overlay authors its own minimal // replacement instead) // - auth.json + skills/ — whitelist passthrough (codex can't run // without auth; skills is the deliberately-shared registry holding // both binary built-ins and operator agent-config symlinks) // - memories/, sessions/ carrying operator-side payload — must NOT // pass through; overlay creates its own empty trees so codex's // per-cwd memory/session lookups find nothing leakable. // - history.jsonl + rules/ — non-whitelisted entries (operator command // history + operator policy/execution rules); must be wiped by the // stale-cleanup loop, not symlinked through. writeFileSync(join(codexRealDir, 'config.toml'), 'model = "leak-me"\npersonality = "leak"\n', 'utf8'); writeFileSync(join(codexRealDir, 'auth.json'), '{"token":"test"}', 'utf8'); mkdirSync(join(codexRealDir, 'skills'), { recursive: true }); mkdirSync(join(codexRealDir, 'memories'), { recursive: true }); writeFileSync(join(codexRealDir, 'memories', 'operator-notes.md'), 'OPERATOR-MEMORY-CANARY', 'utf8'); mkdirSync(join(codexRealDir, 'sessions'), { recursive: true }); writeFileSync(join(codexRealDir, 'sessions', 'old-session.json'), '{"sessionId":"OPERATOR-SESSION-CANARY"}', 'utf8'); writeFileSync(join(codexRealDir, 'history.jsonl'), '{"cmd":"OPERATOR-HISTORY-CANARY"}\n', 'utf8'); mkdirSync(join(codexRealDir, 'rules'), { recursive: true }); writeFileSync(join(codexRealDir, 'rules', 'policy.md'), 'OPERATOR-RULE-CANARY', 'utf8'); // codex auto-loads ~/.codex/AGENTS.md as global user instructions // (codex-rs/agents_md.rs); the overlay must NOT expose the operator's // file. The cleanup loop wipes anything not on the allowlist, so this // canary verifies AGENTS.md is treated as an operator-personal entry. writeFileSync(join(codexRealDir, 'AGENTS.md'), '# OPERATOR-AGENTS-MD-CANARY\n', 'utf8'); // state_5.sqlite is codex's thread/memory state DB // (codex-rs/state/runtime.rs). Symlinking it through would leak the // operator's persistent thread + memory store into pi-shell-acp // sessions — the deepest leak channel on the codex backend. logs_2.sqlite // is a similar telemetry DB with operator activity. Both must NOT // pass through; they belong to OVERLAY_BINARY_OWNED_CODEX so codex // initializes fresh copies inside the overlay. writeFileSync(join(codexRealDir, 'state_5.sqlite'), 'OPERATOR-STATE-CANARY', 'utf8'); writeFileSync(join(codexRealDir, 'state_5.sqlite-shm'), 'OPERATOR-STATE-SHM-CANARY', 'utf8'); writeFileSync(join(codexRealDir, 'state_5.sqlite-wal'), 'OPERATOR-STATE-WAL-CANARY', 'utf8'); writeFileSync(join(codexRealDir, 'logs_2.sqlite'), 'OPERATOR-LOGS-CANARY', 'utf8'); // log/ and shell_snapshots/ carry per-cwd operator activity. Belong // to OVERLAY_EMPTY_DIRS_CODEX (overlay-private empty trees). mkdirSync(join(codexRealDir, 'log'), { recursive: true }); writeFileSync(join(codexRealDir, 'log', 'session.log'), 'OPERATOR-LOG-CANARY', 'utf8'); mkdirSync(join(codexRealDir, 'shell_snapshots'), { recursive: true }); writeFileSync(join(codexRealDir, 'shell_snapshots', 'snap.json'), 'OPERATOR-SHELL-CANARY', 'utf8'); ensureCodexConfigOverlay(codexRealDir, codexOverlayDir); // config.toml: regular file, NOT symlink, NOT inheriting operator content. const configStat = lstatSync(join(codexOverlayDir, 'config.toml')); assert.equal(configStat.isSymbolicLink(), false, 'overlay config.toml must be a regular file authored by pi-shell-acp, not a symlink'); const overlayContent = readFileSync(join(codexOverlayDir, 'config.toml'), 'utf8'); assert.ok(!overlayContent.includes('leak-me'), 'overlay config.toml must not inherit operator model setting'); assert.ok(!overlayContent.includes('personality'), 'overlay config.toml must not inherit operator personality setting'); // Whitelisted entries pass through as symlinks to the operator's real dir. const codexAuthStat = lstatSync(join(codexOverlayDir, 'auth.json')); assert.equal(codexAuthStat.isSymbolicLink(), true); assert.equal(readlinkSync(join(codexOverlayDir, 'auth.json')), join(codexRealDir, 'auth.json')); const codexSkillsStat = lstatSync(join(codexOverlayDir, 'skills')); assert.equal(codexSkillsStat.isSymbolicLink(), true); assert.equal(readlinkSync(join(codexOverlayDir, 'skills')), join(codexRealDir, 'skills')); // memories/ and sessions/ are overlay-private — empty directories, NOT // symlinks to the operator's tree. Closes the per-cwd memory and session // leak channels: codex's lookups through CODEX_HOME find empty trees. const memStat = lstatSync(join(codexOverlayDir, 'memories')); assert.equal(memStat.isSymbolicLink(), false, 'overlay memories/ must be an overlay-private directory, not a symlink to operator data'); assert.equal(memStat.isDirectory(), true); assert.deepEqual(readdirSync(join(codexOverlayDir, 'memories')), [], 'overlay memories/ must start empty — operator memory must not leak through'); assert.equal(existsSync(join(codexOverlayDir, 'memories', 'operator-notes.md')), false, 'operator-side memory file must not be reachable through the overlay'); const sessStat = lstatSync(join(codexOverlayDir, 'sessions')); assert.equal(sessStat.isSymbolicLink(), false, 'overlay sessions/ must be an overlay-private directory, not a symlink to operator data'); assert.equal(sessStat.isDirectory(), true); assert.deepEqual(readdirSync(join(codexOverlayDir, 'sessions')), [], 'overlay sessions/ must start empty — operator session data must not leak through'); assert.equal(existsSync(join(codexOverlayDir, 'sessions', 'old-session.json')), false, 'operator-side session file must not be reachable through the overlay'); // Non-whitelisted entries must NOT appear in the overlay. The stale- // cleanup loop guarantees this each bootstrap. rules/ is especially // important: it would leak operator policy / execution rules, not just // narrative memory. AGENTS.md would auto-load as user instructions // via codex-rs/agents_md.rs. assert.equal(existsSync(join(codexOverlayDir, 'history.jsonl')), false, 'overlay must not expose operator history.jsonl — command-history leak'); assert.equal(existsSync(join(codexOverlayDir, 'rules')), false, 'overlay must not expose operator rules/ — execution-policy leak'); assert.equal(existsSync(join(codexOverlayDir, 'AGENTS.md')), false, 'overlay must not expose operator AGENTS.md — auto-loaded user-instruction leak'); // log/ and shell_snapshots/: overlay-private empty directories. // operator-side payloads must not be reachable through the overlay. const logStat = lstatSync(join(codexOverlayDir, 'log')); assert.equal(logStat.isSymbolicLink(), false, 'overlay log/ must be an overlay-private directory, not a symlink to operator data'); assert.equal(logStat.isDirectory(), true); assert.equal(existsSync(join(codexOverlayDir, 'log', 'session.log')), false, 'operator-side log payload must not be reachable through the overlay'); const shellStat = lstatSync(join(codexOverlayDir, 'shell_snapshots')); assert.equal(shellStat.isSymbolicLink(), false, 'overlay shell_snapshots/ must be an overlay-private directory, not a symlink to operator data'); assert.equal(shellStat.isDirectory(), true); assert.equal(existsSync(join(codexOverlayDir, 'shell_snapshots', 'snap.json')), false, 'operator-side shell snapshot must not be reachable through the overlay'); // state_5.sqlite* and logs_2.sqlite — codex thread/memory state DB + // telemetry DB. NOT in passthrough; codex initializes fresh copies // inside the overlay. Verify no symlink reaches the operator's real // file (the canary content must not be readable through the overlay // path). After ensureCodexConfigOverlay there should be either no // entry at all (codex will create on first launch) or an // overlay-owned regular file the cleanup loop preserves — in either // case the operator canary content must NOT be present. for (const dbName of ['state_5.sqlite', 'state_5.sqlite-shm', 'state_5.sqlite-wal', 'logs_2.sqlite']) { const overlayDbPath = join(codexOverlayDir, dbName); if (existsSync(overlayDbPath)) { const overlayDbStat = lstatSync(overlayDbPath); assert.equal(overlayDbStat.isSymbolicLink(), false, `overlay ${dbName} must not be a symlink to operator data`); const content = readFileSync(overlayDbPath, 'utf8'); assert.ok(!content.includes('OPERATOR-'), `overlay ${dbName} must not carry operator canary content`); } } // Idempotence: a second call must succeed without throwing and preserve // the same isolation shape (still no operator data, still empty // memories/sessions trees). ensureCodexConfigOverlay(codexRealDir, codexOverlayDir); assert.equal(readlinkSync(join(codexOverlayDir, 'auth.json')), join(codexRealDir, 'auth.json')); assert.deepEqual(readdirSync(join(codexOverlayDir, 'memories')), [], 'second-call idempotence must preserve empty memories/'); assert.equal(existsSync(join(codexOverlayDir, 'rules')), false, 'second-call idempotence must not let operator rules/ leak back in'); // Migration regression test: an overlay built by the previous // blacklist-style code would have *symlinked* the operator's // state_5.sqlite* and logs_2.sqlite* through. The new cleanup must // strip those stale symlinks on first run with the new code, or // the operator's persistent thread/memory state DB would still be // reachable through the overlay even after this commit ships. We // simulate that pre-migration state and verify cleanup tears the // symlinks down. for (const dbName of ['state_5.sqlite', 'state_5.sqlite-shm', 'state_5.sqlite-wal', 'logs_2.sqlite', 'logs_2.sqlite-shm', 'logs_2.sqlite-wal']) { const overlayDbPath = join(codexOverlayDir, dbName); const realDbPath = join(codexRealDir, dbName); // Make sure the operator-side file exists so the symlink resolves. if (!existsSync(realDbPath)) { writeFileSync(realDbPath, `OPERATOR-${dbName}-CANARY`, 'utf8'); } // Replace whatever the previous call left with a stale symlink to // the operator's real file (the pre-migration overlay shape). try { rmSync(overlayDbPath, { force: true }); } catch {} symlinkSync(realDbPath, overlayDbPath); assert.equal(lstatSync(overlayDbPath).isSymbolicLink(), true, `seed precondition: ${dbName} must be a symlink before migration`); } ensureCodexConfigOverlay(codexRealDir, codexOverlayDir); for (const dbName of ['state_5.sqlite', 'state_5.sqlite-shm', 'state_5.sqlite-wal', 'logs_2.sqlite', 'logs_2.sqlite-shm', 'logs_2.sqlite-wal']) { const overlayDbPath = join(codexOverlayDir, dbName); if (existsSync(overlayDbPath)) { assert.equal(lstatSync(overlayDbPath).isSymbolicLink(), false, `migration must strip stale operator symlink at ${dbName}`); } // A non-symlink (regular file authored by the binary) or absent // entry are both acceptable post-migration shapes. } } finally { rmSync(codexOverlayTestRoot, { recursive: true, force: true }); } // Gemini config overlay — mirror of the Claude/Codex overlays. // // Path-resolution invariant: gemini-cli swaps `homedir()` to // `process.env.GEMINI_CLI_HOME` (paths.ts:22) and then computes the real // config dir as `${homedir()}/.gemini` (storage.ts:54). The overlay // therefore needs TWO directories — `fakeHome` that GEMINI_CLI_HOME // points at, and `configDir = fakeHome/.gemini` where settings/system.md/ // policies/oauth-symlinks/empty-dirs all actually live. // // Closed leak channels (mirror Claude/Codex isolation): // - operator settings.json (model/UI prefs) → overlay-authored replacement // - history/ + projects.json (cwd → shortId map) → overlay-private empty dirs // - tmp//memory/MEMORY.md per-cwd memory → overlay-private empty dir // - trustedFolders.json workspace trust state → folderTrust.enabled:false // - native "Instruction and Memory Files" body → GEMINI_SYSTEM_MD replace // - native tool registry advertise + invoke → tools.core allowlist + admin-policy // - GEMINI.md hierarchical discovery → context.fileName sentinel + boundaries[] // - unsanctioned MCP servers → mcp.allowed whitelist (canLoadServer rejects anything not on the list) // - subagents (codebase_investigator, tracker_*, browser, ...) → agents.overrides // - hooks/extensions auto-load → settings flags // // Open advertise channels (mirror Claude/Codex capability parity): // - operator agent skills under ~/.gemini/skills/ → passthrough symlink // (matches Claude `OVERLAY_PASSTHROUGH` and Codex `OVERLAY_PASSTHROUGH_CODEX`) // - activate_skill native tool → tools.core allowlist // - bridge stdio MCP server (pi-tools-bridge) → ACP `newSession.mcpServers` // merged into settings.merged.mcpServers by acpSessionManager.newSessionConfig // and registered into the ToolRegistry by discoverMcpTools // (mcp-client.ts:1235 — bypasses the tools.core gate) assert.equal(typeof GEMINI_CONFIG_OVERLAY_HOME, 'string'); assert.ok(GEMINI_CONFIG_OVERLAY_HOME.endsWith('gemini-config-overlay'), 'overlay home constant must point at the pi-owned gemini-config-overlay path'); assert.equal(typeof GEMINI_CONFIG_OVERLAY_DIR, 'string'); assert.ok(GEMINI_CONFIG_OVERLAY_DIR.endsWith(join('gemini-config-overlay', '.gemini')), 'overlay configDir constant must be fakeHome/.gemini (Storage.getGlobalGeminiDir contract)'); const geminiOverlayTestRoot = join(tmpdir(), `pi-shell-acp-gemini-overlay-${Date.now()}`); const geminiRealDir = join(geminiOverlayTestRoot, 'real'); const geminiFakeHome = join(geminiOverlayTestRoot, 'fakehome'); const geminiConfigDir = join(geminiFakeHome, '.gemini'); try { mkdirSync(geminiRealDir, { recursive: true }); // Seed the synthetic real ~/.gemini/ dir with shapes the production // overlay must mirror or reject: writeFileSync(join(geminiRealDir, 'oauth_creds.json'), '{"refresh_token":"test"}', 'utf8'); writeFileSync(join(geminiRealDir, 'google_accounts.json'), '{"active":"test@example.com"}', 'utf8'); writeFileSync(join(geminiRealDir, 'installation_id'), 'test-install-id', 'utf8'); writeFileSync(join(geminiRealDir, 'mcp-oauth-tokens-v2.json'), '{"server":"token"}', 'utf8'); writeFileSync(join(geminiRealDir, 'settings.json'), JSON.stringify({ model: { name: 'operator-pref' } }), 'utf8'); // operator's curated skills directory — the bridge passes this through // verbatim, same surface policy as Claude (`~/.claude/skills/`) and Codex // (`~/.codex/skills/`). The fleet's typical resolution is a symlink to // `~/repos/gh/agent-config/skills`, but the overlay does not care: it // mirrors whatever shape the operator chose. mkdirSync(join(geminiRealDir, 'skills', 'operator-skill-x'), { recursive: true }); writeFileSync(join(geminiRealDir, 'skills', 'operator-skill-x', 'SKILL.md'), '---\nname: operator-skill-x\n---\nbody', 'utf8'); mkdirSync(join(geminiRealDir, 'tmp', 'operator-cwd-slug', 'memory'), { recursive: true }); writeFileSync(join(geminiRealDir, 'tmp', 'operator-cwd-slug', 'memory', 'MEMORY.md'), 'OPERATOR-MEMORY-LEAK-CANARY', 'utf8'); mkdirSync(join(geminiRealDir, 'history'), { recursive: true }); writeFileSync(join(geminiRealDir, 'history', 'session.log'), 'OPERATOR-HISTORY-LEAK', 'utf8'); writeFileSync(join(geminiRealDir, 'projects.json'), '{"/operator/cwd":"id"}', 'utf8'); writeFileSync(join(geminiRealDir, 'trustedFolders.json'), '{"/operator/cwd":"trusted"}', 'utf8'); // Pre-seed the fakeHome ROOT with leftover entries that earlier overlay // shapes wrote there by mistake (settings.json/oauth-symlinks/tmp/...). // These must be cleaned by ensureGeminiConfigOverlay's fake-home sweep — // the binary ignores them anyway (production paths read from // fakeHome/.gemini/), but they pollute the overlay filesystem. mkdirSync(geminiFakeHome, { recursive: true }); writeFileSync(join(geminiFakeHome, 'settings.json'), '{"stale":true}', 'utf8'); writeFileSync(join(geminiFakeHome, 'system.md'), 'STALE SYSTEM MD AT ROOT', 'utf8'); mkdirSync(join(geminiFakeHome, 'tmp'), { recursive: true }); writeFileSync(join(geminiFakeHome, 'tmp', 'stale.txt'), 'STALE', 'utf8'); const engravingForOverlay = 'You are pi-shell-acp gemini test session. Engraving canary 0xCAFE.'; ensureGeminiConfigOverlay(engravingForOverlay, geminiRealDir, geminiFakeHome, geminiConfigDir); // Fake-home root must contain ONLY the .gemini directory after the // sweep — no leftover settings.json / system.md / tmp / etc. Earlier // (buggy) overlay versions wrote to root; cleanup must scrub those. const fakeHomeEntries = readdirSync(geminiFakeHome).sort(); assert.deepEqual(fakeHomeEntries, ['.gemini'], 'fake-home root must contain only the .gemini configDir after cleanup'); // settings.json — overlay-authored at configDir (NOT fakeHome root). const settingsStat = lstatSync(join(geminiConfigDir, 'settings.json')); assert.equal(settingsStat.isSymbolicLink(), false, 'overlay settings.json must be a regular file authored by pi-shell-acp, not a symlink'); const overlaySettings = JSON.parse(readFileSync(join(geminiConfigDir, 'settings.json'), 'utf8')); assert.equal(overlaySettings.advanced?.autoConfigureMemory, false, 'overlay settings.json must opt out of gemini relaunch-for-memory codepath'); assert.equal(overlaySettings.security?.auth?.selectedType, 'oauth-personal', 'overlay settings.json must hint oauth-personal so symlinked oauth_creds.json is consumed'); assert.equal(overlaySettings.security?.folderTrust?.enabled, false, 'overlay settings.json must disable folder trust prompt'); assert.equal(overlaySettings.context?.fileName, '__pi_shell_acp_disabled_context__', 'overlay settings.json must set context.fileName to the sentinel (no GEMINI.md discovery)'); assert.equal(overlaySettings.context?.includeDirectoryTree, false, 'overlay settings.json must disable cwd directory tree auto-attach'); assert.deepEqual(overlaySettings.context?.memoryBoundaryMarkers, [], 'overlay settings.json must set memoryBoundaryMarkers to [] (kill parent traversal)'); assert.deepEqual(overlaySettings.context?.includeDirectories, []); assert.equal(overlaySettings.context?.loadMemoryFromIncludeDirectories, false); assert.deepEqual(overlaySettings.tools?.core, ['read_file', 'list_directory', 'glob', 'grep_search', 'write_file', 'replace', 'run_shell_command', 'activate_skill'], 'overlay settings.json tools.core must include the pi baseline native classes (Read-class split into 4: read_file/list_directory/glob/grep_search; Write/Edit/Bash 1:1) plus activate_skill (Claude `Skill` analog)'); assert.equal(overlaySettings.useWriteTodos, false, 'overlay settings.json must disable write_todos tool'); assert.equal(overlaySettings.skills?.enabled, true, 'overlay settings.json must enable agent skills (matches Claude `tools` containing `Skill` and Codex `OVERLAY_PASSTHROUGH_CODEX` containing `skills`)'); assert.equal(overlaySettings.hooksConfig?.enabled, false, 'overlay settings.json must disable the hooks system'); assert.deepEqual(overlaySettings.mcp?.allowed, ['pi-tools-bridge'], 'overlay settings.json mcp.allowed must whitelist the bridge MCP servers (single entry since 0.4.14, issue #7)'); assert.equal('excluded' in (overlaySettings.mcp ?? {}), false, 'overlay settings.json must NOT carry mcp.excluded — `isInSettingsList` (mcpServerEnablement.ts:65) does only exact-match, no wildcards, so `["*"]` was decorative and misleading'); assert.equal('model' in overlaySettings, false, 'overlay settings.json must NOT inherit operator model preference'); // L5 — Memory containment. pi owns memory persistence (semantic-memory + // Denote llmlog); the gemini backend must not run a parallel memory layer. // memoryV2 (default true upstream) flips the system prompt into "edit // GEMINI.md / MEMORY.md directly via edit/write_file" — overridden by // GEMINI_SYSTEM_MD anyway, but pinned false here as defense-in-depth so // the prompt steering is also off if the override path ever breaks. // autoMemory (default false upstream) is the background extraction agent // that writes .patch files into a project memory inbox; pinned false to // make absence explicit and survive any future default flip. assert.equal(overlaySettings.experimental?.memoryV2, false, 'overlay settings.json must pin experimental.memoryV2:false — memory persistence belongs to pi, not gemini'); assert.equal(overlaySettings.experimental?.autoMemory, false, 'overlay settings.json must pin experimental.autoMemory:false — no background memory extraction inbox in the overlay'); // system.md must be authored with the engraving body at configDir. const systemMdStat = lstatSync(join(geminiConfigDir, 'system.md')); assert.equal(systemMdStat.isSymbolicLink(), false, 'overlay system.md must be a regular file authored by pi-shell-acp, not a symlink'); const systemMdContent = readFileSync(join(geminiConfigDir, 'system.md'), 'utf8'); assert.ok(systemMdContent.includes('Engraving canary 0xCAFE'), 'overlay system.md must contain the operator engraving body (carrier delivery)'); assert.ok(systemMdContent.includes('GEMINI_SYSTEM_MD_CANARY_PISHELLACP_V1'), 'overlay system.md must always include the carrier-isolation canary line so baseline can verify GEMINI_SYSTEM_MD reaches the model surface'); // policies/admin.toml — at configDir/policies/admin.toml. const adminPolicyStat = lstatSync(join(geminiConfigDir, 'policies', 'admin.toml')); assert.equal(adminPolicyStat.isSymbolicLink(), false, 'overlay policies/admin.toml must be a regular file authored by pi-shell-acp'); const adminPolicyContent = readFileSync(join(geminiConfigDir, 'policies', 'admin.toml'), 'utf8'); assert.ok(adminPolicyContent.includes('toolName = "*"') && adminPolicyContent.includes('decision = "deny"'), 'admin.toml must include a deny-all catch-all rule'); for (const allowedToolName of ['read_file', 'list_directory', 'glob', 'grep_search', 'write_file', 'replace', 'run_shell_command']) { assert.ok(adminPolicyContent.includes(`"${allowedToolName}"`), `admin.toml must allow ${allowedToolName} — read-class split (read_file/list_directory/glob/grep_search) + write/edit/exec`); } assert.ok(/mcpName\s*=\s*"\*"[\s\S]*?decision\s*=\s*"allow"/.test(adminPolicyContent), 'admin.toml must include a `mcpName = "*"` allow rule — the per-server MCP whitelist is enforced one layer earlier by `settings.mcp.allowed` / canLoadServer, so this layer admits whatever survives. A per-server `mcpName = ""` allow rule was rejected for invocation-time mismatch (advertise PASS but invocation DENY) when serverName resolved to a different shape between paths.'); // Whitelisted entries pass through as symlinks at configDir level. for (const entry of ['oauth_creds.json', 'google_accounts.json', 'installation_id', 'mcp-oauth-tokens-v2.json', 'skills']) { const entryStat = lstatSync(join(geminiConfigDir, entry)); assert.equal(entryStat.isSymbolicLink(), true, `overlay ${entry} must be a symlink to the operator's real ~/.gemini/`); assert.equal(readlinkSync(join(geminiConfigDir, entry)), join(geminiRealDir, entry)); } // Skills passthrough must also resolve so SkillManager.discoverSkills // (gemini-cli `packages/core/src/skills/skillManager.ts:54`) reads operator // SKILL.md entries through the symlinked `Storage.getUserSkillsDir()`. assert.equal(existsSync(join(geminiConfigDir, 'skills', 'operator-skill-x', 'SKILL.md')), true, 'operator skill files under the passthrough symlink must be reachable through the overlay'); // Empty dirs at configDir level — overlay-private. for (const entry of ['tmp', 'history', 'projects']) { const entryStat = lstatSync(join(geminiConfigDir, entry)); assert.equal(entryStat.isSymbolicLink(), false, `overlay ${entry}/ must be an overlay-private directory, not a symlink to operator data`); assert.equal(entryStat.isDirectory(), true); assert.deepEqual(readdirSync(join(geminiConfigDir, entry)), [], `overlay ${entry}/ must start empty — operator data must not leak through`); } assert.equal(existsSync(join(geminiConfigDir, 'tmp', 'operator-cwd-slug', 'memory', 'MEMORY.md')), false, 'operator-side MEMORY.md must not be reachable through the overlay'); // Non-whitelisted entries (e.g. trustedFolders.json) must NOT appear. assert.equal(existsSync(join(geminiConfigDir, 'trustedFolders.json')), false, 'overlay must not expose operator trustedFolders.json — workspace trust state leak'); // Idempotence: a second call must succeed and preserve shape. ensureGeminiConfigOverlay(engravingForOverlay, geminiRealDir, geminiFakeHome, geminiConfigDir); assert.equal(readlinkSync(join(geminiConfigDir, 'oauth_creds.json')), join(geminiRealDir, 'oauth_creds.json')); assert.equal(existsSync(join(geminiConfigDir, 'trustedFolders.json')), false, 'second-call idempotence must not let operator trustedFolders.json leak back in'); assert.deepEqual(readdirSync(join(geminiConfigDir, 'tmp')), [], 'second-call idempotence must keep tmp/ empty'); // L5 sweep — gemini-side memory written in a previous session must not // survive into the next spawn. Three vectors covered: // 1. tmp//memory/MEMORY.md (memoryV2 per-project memory) // 2. tmp//.inbox//*.patch (autoMemory background extraction) // 3. configDir/GEMINI.md (memoryV2 global memory written via write_file) // 4. configDir/MEMORY.md (rare; some prompts steer here) // Pre-seed each vector then call ensureGeminiConfigOverlay and confirm // the file is gone. The L4 closure (context.fileName sentinel + empty // memoryBoundaryMarkers) already keeps gemini from READING these files; // the L5 sweep is filesystem hygiene + defense-in-depth in case the L4 // closure ever breaks. mkdirSync(join(geminiConfigDir, 'tmp', 'session-N-slug', 'memory'), { recursive: true }); writeFileSync( join(geminiConfigDir, 'tmp', 'session-N-slug', 'memory', 'MEMORY.md'), 'GEMINI-WROTE-PROJECT-MEMORY-CANARY', 'utf8', ); mkdirSync(join(geminiConfigDir, 'tmp', 'session-N-slug', '.inbox', 'memory'), { recursive: true }); writeFileSync( join(geminiConfigDir, 'tmp', 'session-N-slug', '.inbox', 'memory', 'patch-1.patch'), 'AUTO-MEMORY-INBOX-CANARY', 'utf8', ); writeFileSync(join(geminiConfigDir, 'GEMINI.md'), 'GEMINI-WROTE-GLOBAL-MEMORY-CANARY', 'utf8'); writeFileSync(join(geminiConfigDir, 'MEMORY.md'), 'GEMINI-WROTE-ROOT-MEMORY-CANARY', 'utf8'); ensureGeminiConfigOverlay(engravingForOverlay, geminiRealDir, geminiFakeHome, geminiConfigDir); assert.equal( existsSync(join(geminiConfigDir, 'tmp', 'session-N-slug', 'memory', 'MEMORY.md')), false, 'L5 sweep: previous-session MEMORY.md under tmp//memory/ must not survive', ); assert.equal( existsSync(join(geminiConfigDir, 'tmp', 'session-N-slug', '.inbox', 'memory', 'patch-1.patch')), false, 'L5 sweep: previous-session autoMemory inbox patches must not survive', ); assert.deepEqual( readdirSync(join(geminiConfigDir, 'tmp')), [], 'L5 sweep: tmp/ must be wiped clean of all per-session subtrees', ); assert.equal( existsSync(join(geminiConfigDir, 'GEMINI.md')), false, 'L5 sweep: configDir/GEMINI.md (memoryV2 global memory) must not survive across spawns', ); assert.equal( existsSync(join(geminiConfigDir, 'MEMORY.md')), false, 'L5 sweep: configDir/MEMORY.md must not survive across spawns', ); // Engraving substitution defuse — gemini-cli applies `${...}` regex // substitution to the override file (applySubstitutions in // packages/core/src/prompts/utils.ts). Operator engravings with literal // `${...}` text would silently mutate on the gemini backend only, // breaking the cross-backend invariant that Gemini must not semantically // interpolate engraving literals differently from Claude/Codex. // defuseGeminiSubstitutions inserts a zero-width space (U+200B) between // `$` and `{` so the regex misses while the model reads the same visual // string; byte-level identity is intentionally not claimed for affected // literals on the Gemini carrier. const engravingWithSubst = 'Use ${AvailableTools} for the task. Cost: ${PriceList}.'; ensureGeminiConfigOverlay(engravingWithSubst, geminiRealDir, geminiFakeHome, geminiConfigDir); const systemMdDefused = readFileSync(join(geminiConfigDir, 'system.md'), 'utf8'); assert.equal( systemMdDefused.includes('${AvailableTools}'), false, 'L5 substitution defuse: literal ${AvailableTools} must be defused (no contiguous "${" remaining for gemini regex)', ); assert.equal( systemMdDefused.includes('${PriceList}'), false, 'L5 substitution defuse: arbitrary ${...} literals in engraving must also be defused', ); assert.ok( systemMdDefused.includes('AvailableTools') && systemMdDefused.includes('PriceList'), 'L5 substitution defuse: engraving content beyond `${` must remain visible to the model', ); // Empty / undefined engraving still authors a non-empty system.md. ensureGeminiConfigOverlay(undefined, geminiRealDir, geminiFakeHome, geminiConfigDir); const systemMdNoEngraving = readFileSync(join(geminiConfigDir, 'system.md'), 'utf8'); assert.ok(systemMdNoEngraving.trim().length > 0, 'overlay system.md must be non-empty even without engraving — gemini falls back to native body otherwise'); assert.equal(systemMdNoEngraving.includes('Engraving canary 0xCAFE'), false, 'overlay system.md must reflect the latest engraving argument (no stale content)'); assert.ok(systemMdNoEngraving.includes('GEMINI_SYSTEM_MD_CANARY_PISHELLACP_V1'), 'overlay system.md must include the carrier-isolation canary even in the empty-engraving placeholder branch'); } finally { rmSync(geminiOverlayTestRoot, { recursive: true, force: true }); } // resolveBridgeEnvDefaults — 0.5.0 identity-isolation only. // // The bridge does not implement compaction. The adapter's // bridgeEnvDefaults contains operator-config-isolation pins only // (CLAUDE_CONFIG_DIR, CODEX_HOME, CODEX_SQLITE_HOME, GEMINI_CLI_HOME, // GEMINI_SYSTEM_MD). Conflating compaction with isolation would // silently leak the operator's ~/.codex or ~/.claude into the bridge // child process — these assertions guard the boundary without naming // backend-specific compaction knobs. const claudeEnvFull = resolveBridgeEnvDefaults('claude'); assert.equal(claudeEnvFull?.CLAUDE_CONFIG_DIR, CLAUDE_CONFIG_OVERLAY_DIR, 'claude bridge env must pin CLAUDE_CONFIG_DIR to the overlay'); assert.deepEqual(Object.keys(claudeEnvFull ?? {}).sort(), ['CLAUDE_CONFIG_DIR'], 'claude bridge env defaults must contain identity-isolation keys only'); const codexEnvFull = resolveBridgeEnvDefaults('codex'); assert.equal(codexEnvFull?.CODEX_HOME, CODEX_CONFIG_OVERLAY_DIR, 'codex bridge env must pin CODEX_HOME to the overlay'); assert.equal(codexEnvFull?.CODEX_SQLITE_HOME, CODEX_CONFIG_OVERLAY_DIR, 'codex bridge env must pin CODEX_SQLITE_HOME to the overlay (state_5.sqlite isolation)'); assert.deepEqual(Object.keys(codexEnvFull ?? {}).sort(), ['CODEX_HOME', 'CODEX_SQLITE_HOME'], 'codex bridge env defaults must contain identity-isolation keys only'); // Gemini launch — PATH path (no override). Must resolve to // `gemini --acp --admin-policy /.gemini/policies/admin.toml`. The admin- // policy flag loads our deny-all-then-explicit-allow TOML at priority // tier 5.x (Admin > User > Workspace > Extension > Default), narrowing // the gemini native tool surface to the pi baseline (read_file/write_file/ // replace/run_shell_command) plus a whitelist of injected MCP servers. // Override: GEMINI_ACP_COMMAND with bridge args appended (Codex pattern). const prevGemini = process.env.GEMINI_ACP_COMMAND; delete process.env.GEMINI_ACP_COMMAND; try { const geminiLaunch = resolveAcpBackendLaunch('gemini'); assert.equal(geminiLaunch.command, 'gemini', 'gemini launch (PATH path) must invoke `gemini` directly'); assert.deepEqual(geminiLaunch.args, ['--acp', '--admin-policy', GEMINI_OVERLAY_ADMIN_POLICY_PATH], 'gemini launch must pin `--acp --admin-policy /.gemini/policies/admin.toml` for tool-surface narrowing'); assert.equal(geminiLaunch.source, 'PATH:gemini --acp --admin-policy '); // Override path mirrors Codex: bash -lc with `${override} ${ourFlags}` // so the bridge admin-policy always wins against an operator command. // Used for `gemini --acp --debug` and wrapper scripts. process.env.GEMINI_ACP_COMMAND = 'node /tmp/fake-gemini-acp.js'; const geminiLaunchOverride = resolveAcpBackendLaunch('gemini'); assert.equal(geminiLaunchOverride.command, 'bash'); assert.equal(geminiLaunchOverride.args.length, 2); assert.equal(geminiLaunchOverride.args[0], '-lc'); assert.ok(geminiLaunchOverride.args[1].startsWith('node /tmp/fake-gemini-acp.js '), 'gemini override must keep the operator command at the head of the bash -lc string'); assert.ok(geminiLaunchOverride.args[1].includes('--admin-policy'), 'gemini override must append `--admin-policy` so bridge tool surface narrowing is not bypassed (Codex pattern mirror)'); assert.ok(geminiLaunchOverride.args[1].includes(GEMINI_OVERLAY_ADMIN_POLICY_PATH), 'gemini override admin-policy path must be the overlay constant'); assert.equal(geminiLaunchOverride.source, 'env:GEMINI_ACP_COMMAND'); } finally { if (prevGemini === undefined) delete process.env.GEMINI_ACP_COMMAND; else process.env.GEMINI_ACP_COMMAND = prevGemini; } // Gemini bridge env — operator-config isolation overlay. // GEMINI_CLI_HOME → overlay's fakeHome (NOT the .gemini configDir). // Gemini-cli swaps `homedir()` to this value and reads its real // config from `${homedir()}/.gemini/...` (paths.ts:22 + storage.ts:54). // Pinning the fakeHome closes operator config / history / // projects.json / tmp memory / trust state leaks. // GEMINI_SYSTEM_MD → overlay's `${configDir}/system.md`, fully // replacing gemini's bundled "Instruction and Memory Files" body // with the operator engraving. Equivalent in role to Claude's // `_meta.systemPrompt` and Codex's `-c developer_instructions`. // These pins are operator-config-isolation invariants — the bridge // does not implement compaction, so there is no toggle to survive. const geminiEnvFull = resolveBridgeEnvDefaults('gemini'); assert.equal(geminiEnvFull?.GEMINI_CLI_HOME, GEMINI_CONFIG_OVERLAY_HOME, 'gemini bridge env must pin GEMINI_CLI_HOME to the overlay fakeHome (operator-state isolation)'); assert.equal(geminiEnvFull?.GEMINI_SYSTEM_MD, GEMINI_OVERLAY_SYSTEM_MD_PATH, 'gemini bridge env must pin GEMINI_SYSTEM_MD to the overlay system.md (native body replacement)'); // Constants relationship: configDir is fakeHome + "/.gemini", and the // overlay-authored system.md / admin-policy paths live inside configDir. assert.equal(GEMINI_CONFIG_OVERLAY_DIR, join(GEMINI_CONFIG_OVERLAY_HOME, '.gemini'), 'GEMINI_CONFIG_OVERLAY_DIR must be GEMINI_CONFIG_OVERLAY_HOME + ".gemini" (Storage.getGlobalGeminiDir contract)'); assert.equal(GEMINI_OVERLAY_SYSTEM_MD_PATH, join(GEMINI_CONFIG_OVERLAY_DIR, 'system.md')); assert.equal(GEMINI_OVERLAY_ADMIN_POLICY_PATH, join(GEMINI_CONFIG_OVERLAY_DIR, 'policies', 'admin.toml')); // Gemini session meta — adapter returns undefined. Gemini ACP newSession // does not honor `_meta.systemPrompt` (no carrier slot exists), so the // bridge must not synthesize one. The engraving travels via // GEMINI_SYSTEM_MD (system.md file) instead — verified by the env pins // above. const geminiMeta = buildSessionMetaForBackend( 'gemini', { modelId: 'gemini-3.1-pro-preview', settingSources: [], strictMcpConfig: true, tools: [], skillPlugins: [], permissionAllow: [], disallowedTools: [], }, 'engraving body that would be a carrier on claude', ); assert.equal(geminiMeta, undefined, 'gemini buildSessionMeta must return undefined — engraving travels via GEMINI_SYSTEM_MD (overlay system.md) instead'); console.log('[check-backends] 136 assertions ok'); } finally { if (prevClaude === undefined) delete process.env.CLAUDE_AGENT_ACP_COMMAND; else process.env.CLAUDE_AGENT_ACP_COMMAND = prevClaude; if (prevCodex === undefined) delete process.env.CODEX_ACP_COMMAND; else process.env.CODEX_ACP_COMMAND = prevCodex; } EOF ) } check_models() { local verify_dir verify_dir="$REPO_DIR/.tmp-verify-models" rm -rf "$verify_dir" mkdir -p "$verify_dir" ( cd "$REPO_DIR" ./node_modules/.bin/tsc \ --project tsconfig.json \ --outDir "$verify_dir" \ --rootDir "$REPO_DIR" ) # Run three times: defaults, explicit override, bogus override. # Each run imports a FRESH module URL so the top-level CLAUDE_CONTEXT_CAP # constant is recomputed from the env seen at import time. (cd "$REPO_DIR" && VERIFY_DIR="$verify_dir" node --input-type=module <<'EOF' import { strict as assert } from 'node:assert'; import { pathToFileURL } from 'node:url'; const verifyDir = process.env.VERIFY_DIR; if (!verifyDir) throw new Error('VERIFY_DIR is required'); // The model list is embedded in index.js by the registerProvider call. // We capture it by mocking the runtime and letting the extension hand us // `provider.models`. Override env before import to force a fresh read. async function collectModels(envOverride) { if (envOverride === undefined) { delete process.env.PI_SHELL_ACP_CLAUDE_CONTEXT; } else { process.env.PI_SHELL_ACP_CLAUDE_CONTEXT = envOverride; } // Cache-bust: query string forces a fresh module evaluation so the // top-level `CLAUDE_CONTEXT_CAP` constant picks up the new env. const moduleUrl = pathToFileURL(`${verifyDir}/index.js`).href + `?cap=${envOverride ?? 'default'}`; const mod = await import(moduleUrl); let captured; const runtime = { registerProvider(_id, provider) { captured = provider; }, on() {}, }; mod.default(runtime); if (!captured || !Array.isArray(captured.models)) { throw new Error('registerProvider did not receive a models array'); } return new Map(captured.models.map((m) => [m.id, m])); } // --- Pass 1: curated surface + default Claude defaults --- { const models = await collectModels(undefined); // Curated allowlist — exact match required. Adding or removing a model id // must flip this assertion, not silently drift. const EXPECTED_IDS = [ 'claude-sonnet-4-6', 'claude-opus-4-8', 'gpt-5.4', 'gpt-5.4-mini', 'gpt-5.5', 'gemini-3.1-pro-preview', ].sort(); const actualIds = [...models.keys()].sort(); assert.deepEqual( actualIds, EXPECTED_IDS, `curated pi-shell-acp model surface mismatch.\n expected: ${EXPECTED_IDS.join(', ')}\n actual: ${actualIds.join(', ')}`, ); // Non-curated models MUST NOT leak through. Specifically: no legacy Claude // (3.x, 4.0-4.5, haiku), no generic openai chat models (gpt-4, gpt-4.1, // o1/o3/o4), no codex variants outside the allowlist (5.1, 5.3, codex-max). const FORBIDDEN = [ 'claude-3-5-sonnet-20241022', 'claude-3-5-haiku-latest', 'claude-haiku-4-5', 'claude-sonnet-4-5', 'claude-opus-4-5', 'claude-opus-4-6', 'claude-opus-4-7', 'gpt-4', 'gpt-4-turbo', 'gpt-4.1', 'gpt-4.1-mini', 'gpt-4o', 'o1', 'o3', 'o4-mini', 'gpt-5', 'gpt-5-chat-latest', 'gpt-5.1', 'gpt-5.1-codex-max', 'gpt-5.3-codex', 'codex-mini-latest', 'gemini-3-flash-preview', 'gemini-3.1-flash-lite-preview', 'gemini-3.1-pro-preview-customtools', ]; for (const id of FORBIDDEN) { assert.ok(!models.has(id), `non-curated model ${id} must not be exposed by pi-shell-acp`); } // Claude defaults: sonnet stays at 200K, opus surfaces at 1M. { const sonnet = models.get('claude-sonnet-4-6'); assert.ok(sonnet, 'curated Claude model missing: claude-sonnet-4-6'); assert.equal( sonnet.contextWindow, 200000, `default: claude-sonnet-4-6 contextWindow should be 200000, got ${sonnet.contextWindow}`, ); } // claude-opus-4-8 is real in the pi 0.77+ registry — assert it is PRESENT at // 1M (hard), not a soft "if present" check. A metadata gap (missing entry or // wrong context) must fail the gate rather than be papered with a placeholder. { const opus = models.get('claude-opus-4-8'); assert.ok(opus, 'curated Claude model missing: claude-opus-4-8 (must be real in pi-ai registry, not placeholder-injected)'); assert.equal( opus.contextWindow, 1000000, `default: claude-opus-4-8 contextWindow should be 1000000, got ${opus.contextWindow}`, ); } // Codex context metadata — source is openai-codex (NOT openai). The regression // that motivated this gate: reading from openai source made pi-shell-acp // advertise gpt-5.5 ctx=1,050,000 while the openai-codex source reflects what // codex-acp actually delivers. As of pi-ai 0.70.2 the entire openai-codex // gpt-5.x line declares 272,000. // Values below must match @earendil-works/pi-ai getModels("openai-codex") — // if upstream updates a context, update this gate with it. const CODEX_EXPECTED_CTX = { 'gpt-5.4': 272000, 'gpt-5.4-mini': 272000, 'gpt-5.5': 272000, }; for (const [id, expected] of Object.entries(CODEX_EXPECTED_CTX)) { const m = models.get(id); assert.ok(m, `curated Codex model missing: ${id}`); assert.equal( m.contextWindow, expected, `${id} contextWindow must come from openai-codex source: expected ${expected}, got ${m.contextWindow}`, ); } // Explicit anti-bug: gpt-5.5 must not be 1,050,000. The openai source claims // that for Chat Completions, but codex-acp can't serve it. assert.notEqual( models.get('gpt-5.5')?.contextWindow, 1050000, 'gpt-5.5 at 1,050,000 context = openai source bug (should be openai-codex 400,000)', ); // Gemini context metadata — source is `google` (no separate gemini-cli // tier in pi-ai). pi-ai 0.73.0 reports 1,048,576 for the curated Gemini // ACP Pro preview. Update this gate when the upstream registry shifts. const GEMINI_EXPECTED_CTX = { 'gemini-3.1-pro-preview': 1048576, }; for (const [id, expected] of Object.entries(GEMINI_EXPECTED_CTX)) { const m = models.get(id); assert.ok(m, `curated Gemini model missing: ${id}`); assert.equal( m.contextWindow, expected, `${id} contextWindow must come from google source: expected ${expected}, got ${m.contextWindow}`, ); } console.log('[check-models] pass 1 (curated surface + Claude defaults + codex source + gemini source): ok'); } // --- Pass 2: explicit override respected --- { const models = await collectModels('1000000'); for (const id of ['claude-sonnet-4-6', 'claude-opus-4-8']) { const m = models.get(id); if (!m) continue; assert.equal( m.contextWindow, 1_000_000, `override: ${id} contextWindow should be 1000000 with PI_SHELL_ACP_CLAUDE_CONTEXT=1000000, got ${m.contextWindow}`, ); } console.log('[check-models] pass 2 (PI_SHELL_ACP_CLAUDE_CONTEXT=1000000 override): ok'); } // --- Pass 3: bogus override falls back to default --- { const models = await collectModels('not-a-number'); const id = 'claude-sonnet-4-6'; const m = models.get(id); if (m) { assert.equal( m.contextWindow, 200000, `bogus override: ${id} should fall back to 200000, got ${m.contextWindow}`, ); console.log('[check-models] pass 3 (bogus override falls back): ok'); } } delete process.env.PI_SHELL_ACP_CLAUDE_CONTEXT; console.log('[check-models] all passes ok'); EOF ) rm -rf "$verify_dir" } check_dep_versions() { # Catches version-pin drift across package.json, run.sh, and README.md. # Concretely the kind of skew that produced commit 21de0f9's "0.11.1 # leftover" review comment: package.json bumped to 0.12.0 while README # and run.sh's setup gate still claimed 0.11.1. Static check, no # subprocess — fast enough to run inside `pnpm check` and pre-commit. (cd "$REPO_DIR" && node --input-type=module <<'EOF' import { strict as assert } from 'node:assert'; import { readFileSync } from 'node:fs'; const pkg = JSON.parse(readFileSync('package.json', 'utf8')); const claudePinned = pkg.dependencies['@agentclientprotocol/claude-agent-acp']; const codexPinned = pkg.dependencies['@zed-industries/codex-acp']; const geminiBundled = pkg.dependencies['@google/gemini-cli'] ?? pkg.optionalDependencies?.['@google/gemini-cli']; assert.ok(claudePinned, 'package.json must pin @agentclientprotocol/claude-agent-acp'); assert.ok(codexPinned, 'package.json must pin @zed-industries/codex-acp'); assert.equal(geminiBundled, undefined, 'package.json must not bundle @google/gemini-cli — gemini is an external PATH runtime'); // `^` + `m` flag anchors to the start-of-line shell assignment so we don't // accidentally pick up the regex literal inside this very check function's // heredoc (which is indented, so won't match `^...`). const runSh = readFileSync('run.sh', 'utf8'); const claudeRequired = runSh.match(/^CLAUDE_ACP_REQUIRED_VERSION="([^"]+)"/m)?.[1]; const codexRequired = runSh.match(/^CODEX_ACP_REQUIRED_VERSION="([^"]+)"/m)?.[1]; assert.equal(claudeRequired, claudePinned, `run.sh CLAUDE_ACP_REQUIRED_VERSION (${claudeRequired}) must match package.json @agentclientprotocol/claude-agent-acp (${claudePinned})`); assert.equal(codexRequired, codexPinned, `run.sh CODEX_ACP_REQUIRED_VERSION (${codexRequired}) must match package.json @zed-industries/codex-acp (${codexPinned})`); const readme = readFileSync('README.md', 'utf8'); const readmeCodex = readme.match(/@zed-industries\/codex-acp@([0-9.]+)/)?.[1]; assert.equal(readmeCodex, codexPinned, `README.md @zed-industries/codex-acp install pin (${readmeCodex}) must match package.json (${codexPinned})`); // pi peer/dev alignment (#26 / 0.8.0 dep-alignment gate). The three // @earendil-works/pi-* devDeps must pin one identical version, and that // version must match the check-pack-install peer-install pins below // (`pnpm add @earendil-works/pi-ai@X ...`). Without this, a pi bump could // drift package.json devDeps away from the fresh-temp install smoke and // the "dependency alignment gate" would not actually verify pi. const piAi = pkg.devDependencies?.['@earendil-works/pi-ai']; const piCoding = pkg.devDependencies?.['@earendil-works/pi-coding-agent']; const piTui = pkg.devDependencies?.['@earendil-works/pi-tui']; assert.ok(piAi, 'package.json devDependencies must pin @earendil-works/pi-ai'); assert.equal(piCoding, piAi, `@earendil-works/pi-coding-agent (${piCoding}) must match @earendil-works/pi-ai (${piAi})`); assert.equal(piTui, piAi, `@earendil-works/pi-tui (${piTui}) must match @earendil-works/pi-ai (${piAi})`); // check-pack-install peer-install pins (quoted `@earendil-works/pi-*@` // args; the `\d`-anchored version avoids matching this regex literal itself). const peerAi = runSh.match(/"@earendil-works\/pi-ai@(\d[\d.]*)"/)?.[1]; const peerCoding = runSh.match(/"@earendil-works\/pi-coding-agent@(\d[\d.]*)"/)?.[1]; const peerTui = runSh.match(/"@earendil-works\/pi-tui@(\d[\d.]*)"/)?.[1]; assert.equal(peerAi, piAi, `run.sh check-pack-install pi-ai peer pin (${peerAi}) must match package.json devDep (${piAi})`); assert.equal(peerCoding, piAi, `run.sh check-pack-install pi-coding-agent peer pin (${peerCoding}) must match (${piAi})`); assert.equal(peerTui, piAi, `run.sh check-pack-install pi-tui peer pin (${peerTui}) must match (${piAi})`); // peerDependencies floor must track the devDep pin (0.11 Stage 0): a pi bump // that lifts devDeps but leaves a stale `>=0.78.0` peer floor would let a // consumer install pi-shell-acp against a pi that lacks the 0.79 public // trust exports the bridge now imports. Floor === `>=`. const peerDepAi = pkg.peerDependencies?.['@earendil-works/pi-ai']; const peerDepCoding = pkg.peerDependencies?.['@earendil-works/pi-coding-agent']; const peerDepTui = pkg.peerDependencies?.['@earendil-works/pi-tui']; assert.equal(peerDepAi, `>=${piAi}`, `package.json peerDependencies @earendil-works/pi-ai (${peerDepAi}) must be >=${piAi} (devDep floor)`); assert.equal(peerDepCoding, `>=${piAi}`, `package.json peerDependencies @earendil-works/pi-coding-agent (${peerDepCoding}) must be >=${piAi} (devDep floor)`); assert.equal(peerDepTui, `>=${piAi}`, `package.json peerDependencies @earendil-works/pi-tui (${peerDepTui}) must be >=${piAi} (devDep floor)`); console.log('[check-dep-versions] 15 assertions ok'); EOF ) } check_pi_import_surface() { # 0.11 Stage 0 (동결결정 9): the bridge may reference @earendil-works/pi-* # ONLY by the package root. ANY subpath (`/dist`, `/core`, `/src`, `/foo`, …) # reaches pi's private surface and silently breaks on pi internal reshuffles. # The check is intentionally SPECIFIER-shaped, not import-keyword-shaped: it # matches a quoted/backtick module specifier `@earendil-works/pi-*/…`, so one # pattern catches static `from`, dynamic `import()`, `require()`, # `export … from`, side-effect `import "…"`, and whitespace variants alike. # Root import `@earendil-works/pi-coding-agent` (no trailing slash) is allowed. # Scans EVERY tracked .ts/.js/.mjs/.cjs source (git ls-files), not a hardcoded # file list — a new root file (acp-bridge.ts, event-mapper.ts, engraving.ts, # pi-context-augment.ts, protocol.js, …) can never silently escape the gate. local hits hits=$(cd "$REPO_DIR" && git ls-files '*.ts' '*.js' '*.mjs' '*.cjs' \ | grep -vE '^(node_modules|dist)/' \ | xargs -r grep -HnE "[\"'\`]@earendil-works/pi-(ai|coding-agent|tui)/" 2>/dev/null || true) if [ -n "$hits" ]; then echo "[check-pi-import-surface] FAIL: pi private subpath reference(s) — import @earendil-works/pi-* by the package ROOT only:" echo "$hits" exit 1 fi ok "[check-pi-import-surface] pi references are root-only (no private subpath; all tracked ts/js scanned)" } check_pi_runtime_version() { # 0.11 Stage 0 (동결결정 9, runtime half): tsc catches a missing 0.79 export # at dev time, but an installed environment can still resolve an older pi at # runtime where the named trust exports do not exist. Verify VERSION >= floor # via a DYNAMIC import of the package root only — never statically import a # 0.79-only symbol here, or this guard would crash before it can fail loud. (cd "$REPO_DIR" && node --input-type=module <<'EOF' const FLOOR = '0.79.4'; const cmp = (a, b) => { const pa = a.split('.').map(Number), pb = b.split('.').map(Number); for (let i = 0; i < 3; i++) { if ((pa[i] || 0) !== (pb[i] || 0)) return (pa[i] || 0) - (pb[i] || 0); } return 0; }; let VERSION; try { ({ VERSION } = await import('@earendil-works/pi-coding-agent')); } catch (e) { console.error(`[check-pi-runtime-version] FAIL: cannot import @earendil-works/pi-coding-agent root — ${e?.message ?? e}`); process.exit(1); } if (typeof VERSION !== 'string') { console.error('[check-pi-runtime-version] FAIL: pi root export VERSION is not a string'); process.exit(1); } if (cmp(VERSION, FLOOR) < 0) { console.error(`[check-pi-runtime-version] FAIL: pi VERSION ${VERSION} < ${FLOOR} — the bridge imports pi 0.79 public trust exports + semantics (hasTrustRequiringProjectResources, ProjectTrustStore nearest-ancestor get) older pi lacks or behaves differently. Bump @earendil-works/pi-*.`); process.exit(1); } console.log(`[check-pi-runtime-version] ok — pi VERSION ${VERSION} >= ${FLOOR}`); EOF ) } check_pi_preflight() { # 0.11 Stage 0 (2): the controlled-launch trust decision. Proves frozen # decision 8 precedence (saved false > saved true > prefix > no-inputs > # fail-fast) and decision 7's separator-boundary prefix against pi's own # ProjectTrustStore in a temp agentDir. Deterministic, no network/backend. (cd "$REPO_DIR" && node --experimental-strip-types scripts/check-pi-preflight.ts) } check_auth_boundary() { # #26 auth-boundary guard. pi-shell-acp is a no-auth ACP bridge at the pi # provider layer; the registration must NOT assign a bare-uppercase legacy-ENV # string (e.g. "ANTHROPIC_API_KEY") to apiKey in root bridge code. That value # trips pi 0.77's legacy-env deprecation AND makes entwurf/child-spawn paths # fail preflight with "No API key found for pi-shell-acp", falsely presenting # the bridge as API-key dependent even for Codex/Gemini routes (issue #26). # # Scope: index.ts + acp-bridge.ts ONLY (root bridge runtime). Mentions of the # literal in NEXT/CHANGELOG/issue prose — and in the fix's own explanatory # comment — are intentionally allowed: the regex matches only an `apiKey:` # field assigned a quoted ALL-CAPS env name, so the comment and the no-auth # sentinel identifier (`PI_SHELL_ACP_NO_AUTH_SENTINEL`) both pass. (cd "$REPO_DIR" && node --input-type=module <<'EOF' import { strict as assert } from 'node:assert'; import { readFileSync } from 'node:fs'; const offenders = []; for (const f of ['index.ts', 'acp-bridge.ts']) { const src = readFileSync(f, 'utf8'); const re = /apiKey:\s*"([A-Z][A-Z0-9_]*)"/g; let m; while ((m = re.exec(src)) !== null) offenders.push(`${f}: apiKey: "${m[1]}"`); } assert.equal(offenders.length, 0, `#26: root bridge apiKey must be a no-auth sentinel, not a legacy-ENV reference. Offenders:\n ${offenders.join('\n ')}`); console.log('[check-auth-boundary] ok — no legacy-ENV apiKey literal in index.ts / acp-bridge.ts (#26)'); EOF ) } check_sdk_surface() { # Static gate against silently-broken ACP SDK calls. # # Background: in 0.4.5 we discovered that `(session.connection as any) # .unstable_resumeSession({...})` had been a dead call ever since SDK # 0.20.0 promoted resumeSession/closeSession out of the `unstable_*` # namespace; sdk@0.22.1 still keeps those stable names. Capability check passed, method call threw TypeError, # bootstrap fallback caught it, every session silently took the load # path instead of resume — Hard Rule #2 ("resume > load > new") quietly # violated for months. The `as any` cast hid the rename from tsc. # # The right floor: SDK methods are typed on ClientSideConnection. Either # call them directly, or annotate the cast with a marker so the bypass # is *visible* to a code review. Three-class marker policy: # # // SDK_CAST_OK: — permanent (genuine SDK gap) # // SDK_CAST_DEBT: — tracked for removal (e.g. version bump) # # The gate scans `acp-bridge.ts` (the only file that touches the ACP # connection object) for `connection as any)` and requires a marker on # the same line or the immediately preceding line. Anything else fails. # # See AGENTS.md § Hard Rules #10 for the principle. local file file="$REPO_DIR/acp-bridge.ts" if [ ! -f "$file" ]; then fail "[check-sdk-surface] $file missing" return 1 fi local report report=$(awk ' /SDK_CAST_OK|SDK_CAST_DEBT/ { last_marker = NR } /connection as any\)/ { # Marker must be on the same line (trailing comment) or the # immediately preceding line. Multi-line casts should keep the # marker adjacent to the cast site. if (NR != last_marker && NR != last_marker + 1) { printf "%s:%d: %s\n", FILENAME, NR, $0 bad = 1 } } END { exit bad ? 1 : 0 } ' "$file") if [ -n "$report" ]; then echo "[check-sdk-surface] FAIL: undocumented (connection as any) casts:" echo "$report" echo "" echo " SDK methods are typed on ClientSideConnection (see" echo " node_modules/@agentclientprotocol/sdk/dist/acp.d.ts)." echo " Either drop the cast, or annotate with one of:" echo " // SDK_CAST_OK: " echo " // SDK_CAST_DEBT: " return 1 fi local dead_names dead_names=$(grep -nE "unstable_(resumeSession|closeSession)" "$file" 2>/dev/null || true) if [ -n "$dead_names" ]; then echo "[check-sdk-surface] FAIL: dead ACP SDK unstable method names:" echo "$dead_names" echo "" echo " sdk@0.20+ promoted these methods to stable names." echo " Use session.connection.resumeSession / closeSession directly." return 1 fi local sdk_surface sdk_surface=$(grep -nE "^[[:space:]]+(resumeSession|closeSession|unstable_setSessionModel|prompt|cancel)\(" "$REPO_DIR/node_modules/@agentclientprotocol/sdk/dist/acp.d.ts" 2>/dev/null || true) if [ -n "$sdk_surface" ]; then echo "[check-sdk-surface] ACP SDK typed surface:" echo "$sdk_surface" fi # grep -c always prints the count and exits 0 when matches > 0, 1 when 0. # `|| true` keeps the substitution clean; the count is already in stdout. local ok_count debt_count total ok_count=$(grep -c "SDK_CAST_OK" "$file" 2>/dev/null || true) debt_count=$(grep -c "SDK_CAST_DEBT" "$file" 2>/dev/null || true) total=$(grep -c "connection as any)" "$file" 2>/dev/null || true) echo "[check-sdk-surface] $total cast(s) present, $ok_count OK + $debt_count DEBT — all annotated" } check_pack() { # Dry-run tarball invariant gate for the public npm surface. # # Runs `npm pack --dry-run --json`, then asserts: # - runtime-critical files and the public verification/docs # surface (run.sh, scripts/, curated docs/assets/*.gif, # demo/) are present; # - private/dev residue is absent (session dumps, debug logs, # dev configs, workspace metadata, the OpenClaw plugin # monorepo sibling that ships as its own npm package). # # Scope: this is the first of four checks in #13's publish gate. # The remaining three — actual `npm pack`, `tar -tf`, and local # install smoke from the packed tarball — are covered by # check_pack_install() below (commit 9e2a2ca, Phase 2.3 closeout). # Intent + policy live in NEXT.md Phase 2.3. section "pack invariants (dry-run)" local json json=$(cd "$REPO_DIR" && npm pack --dry-run --json 2>/dev/null) || { fail "[check-pack] npm pack --dry-run failed" return 1 } local file_list file_list=$(node -e ' const data = JSON.parse(require("fs").readFileSync(0, "utf8")); if (!Array.isArray(data) || data.length !== 1) { console.error("[check-pack] expected single tarball entry, got " + (Array.isArray(data) ? data.length : "non-array")); process.exit(2); } for (const f of data[0].files) console.log(f.path); ' <<<"$json") || { fail "[check-pack] failed to parse npm pack output" return 1 } # .sh mode regression gate. The repo tracks 100755 in git, but if a # contributor's umask or a stray `git update-index --chmod=-x` drops # the bit the tarball will ship 0644 — and pi install hands the # tarball straight to `npm install`, so the bit needs to survive the # whole publish pipeline. Catch it here at dry-run time. local sh_mode_violations sh_mode_violations=$(node -e ' const data = JSON.parse(require("fs").readFileSync(0, "utf8")); const bad = data[0].files .filter(f => f.path.endsWith(".sh")) .filter(f => (f.mode & 0o111) === 0); for (const f of bad) console.log(f.path + " mode=0" + (f.mode || 0).toString(8)); ' <<<"$json") || { fail "[check-pack] failed to inspect tarball modes" return 1 } if [ -n "$sh_mode_violations" ]; then fail "[check-pack] .sh files missing executable bit in tarball:" echo "$sh_mode_violations" | sed 's/^/ /' >&2 return 1 fi local required=( "package.json" "README.md" "LICENSE" "CHANGELOG.md" "index.ts" "acp-bridge.ts" "event-mapper.ts" "engraving.ts" "pi-context-augment.ts" "protocol.js" "run.sh" "pi-extensions/entwurf.ts" "pi-extensions/entwurf-control.ts" "pi-extensions/model-lock.ts" "pi-extensions/lib/entwurf-core.ts" "mcp/pi-tools-bridge/src/index.ts" "scripts/postinstall-chmod.cjs" "pi/entwurf-capabilities.json" ) # Patterns that must NOT appear in the tarball. Anchored where the # match should be exact (e.g. ^bench\.sh$); loose where the residue # may appear under any path (e.g. \.log$). local forbidden_patterns=( 'pi-session-.*\.html$' '\.log$' '\.cast$' '^bench\.sh$' '^biome\.json$' '^tsconfig\.json$' '^pnpm-(lock\.yaml|workspace\.yaml)$' '^NEXT\.md$' '^plugins/' '^node_modules/' '\.tmp-verify/' '\.agent-(reports|shell)/' ) local pass=1 f pat hit for f in "${required[@]}"; do if ! grep -qxF "$f" <<<"$file_list"; then fail "[check-pack] MISSING required: $f" pass=0 fi done for pat in "${forbidden_patterns[@]}"; do hit=$(grep -E "$pat" <<<"$file_list" || true) if [ -n "$hit" ]; then fail "[check-pack] FORBIDDEN matches pattern $pat:" echo "$hit" | sed 's/^/ /' >&2 pass=0 fi done local total total=$(printf '%s\n' "$file_list" | wc -l | tr -d ' ') echo "[check-pack] $total files in tarball" if [ "$pass" = "1" ]; then ok "[check-pack] invariants pass" return 0 fi fail "[check-pack] invariants violated" return 1 } check_pack_install() { # Heavy publish gate. Runs the remaining three checks in #13's # publish checklist that check_pack (dry-run only) does not cover: # # 2. actual `npm pack` — produces the real tarball # 3. `tar -tf` — cross-checks contents against dry-run invariants # 4. fresh-temp project local install smoke — pnpm add the tarball # with required peers, then import('@junghanacs/pi-shell-acp/package.json') # to confirm the installed shape resolves end-to-end. # # Excluded from the default `pnpm check` because the install smoke # spends 5-15s on dependency resolution. Wired into prepublishOnly # so `npm publish` cannot succeed if the actual install path is # broken even when dry-run invariants look fine. # # Force --dry-run=false because `npm publish --dry-run` exports # npm_config_dry_run=true into lifecycle scripts. Without the explicit # override, this nested actual-pack smoke prints the tarball name but # does not write the .tgz file, causing prepublishOnly to fail before # a real publish can be exercised. section "publish install smoke (actual pack + tar + fresh install)" local version tgz_name tgz_path version=$(node -p "require('${REPO_DIR}/package.json').version") # Scoped npm packages produce a tarball named "--.tgz" # where the `@` is stripped and `/` becomes `-`. For `@junghanacs/pi-shell-acp` # that lands as `junghanacs-pi-shell-acp-.tgz`. Hardcoded against # the scope above so a name change cannot silently slide past this gate. tgz_name="junghanacs-pi-shell-acp-${version}.tgz" tgz_path="${REPO_DIR}/${tgz_name}" rm -f "$tgz_path" echo "[check-pack-install] npm pack -> ${tgz_name}" (cd "$REPO_DIR" && npm pack --dry-run=false 2>&1 | tail -1) || { fail "[check-pack-install] npm pack failed" return 1 } if [ ! -f "$tgz_path" ]; then fail "[check-pack-install] tarball not produced: $tgz_path" return 1 fi # tar -tf invariants — cross-check against dry-run shape. Same # required/forbidden axes as check_pack; if they disagree, the # dry-run resolver and the actual tarball diverged (npm bug or # files allowlist drift) and publish must not proceed. local tar_files pass=1 f pat tar_files=$(tar -tf "$tgz_path" | sed 's|^package/||' | grep -v '/$' || true) local tar_required=( "package.json" "README.md" "LICENSE" "CHANGELOG.md" "index.ts" "acp-bridge.ts" "event-mapper.ts" "engraving.ts" "pi-context-augment.ts" "protocol.js" "run.sh" "pi-extensions/entwurf.ts" "pi-extensions/entwurf-control.ts" "pi-extensions/model-lock.ts" "pi-extensions/lib/entwurf-core.ts" "mcp/pi-tools-bridge/src/index.ts" "scripts/postinstall-chmod.cjs" "pi/entwurf-capabilities.json" ) for f in "${tar_required[@]}"; do if ! grep -qxF "$f" <<<"$tar_files"; then fail "[check-pack-install] tar missing required: $f" pass=0 fi done local tar_forbidden=( 'pi-session-.*\.html$' '\.log$' '\.cast$' '^bench\.sh$' '^biome\.json$' '^tsconfig\.json$' '^pnpm-(lock\.yaml|workspace\.yaml)$' '^NEXT\.md$' '^plugins/' '^node_modules/' '\.tmp-verify/' '\.agent-(reports|shell)/' ) for pat in "${tar_forbidden[@]}"; do local hit hit=$(grep -E "$pat" <<<"$tar_files" || true) if [ -n "$hit" ]; then fail "[check-pack-install] tar contains forbidden pattern $pat:" echo "$hit" | sed 's/^/ /' >&2 pass=0 fi done if [ "$pass" != "1" ]; then rm -f "$tgz_path" fail "[check-pack-install] tar -tf invariants violated" return 1 fi echo "[check-pack-install] tar -tf invariants pass ($(printf '%s\n' "$tar_files" | wc -l | tr -d ' ') files)" # Fresh-temp install smoke. Uses pnpm because that is what this # repo packages with; --ignore-workspace stops it from re-attaching # to our pnpm-workspace.yaml; --ignore-scripts blocks the husky # prepare hook (and any future install scripts) from running inside # the consumer project. Peer deps are pinned to the 0.79.x release # baseline so the smoke matches the same shape an external pi user # would have after `pi install`. local tmp tmp=$(mktemp -d -t pi-shell-acp-install-smoke.XXXXXX) trap 'rm -rf "$tmp" "$tgz_path"' RETURN printf '%s\n' '{ "name": "pi-shell-acp-install-smoke", "version": "0.0.0", "private": true }' > "$tmp/package.json" echo "[check-pack-install] pnpm add into $tmp (with 0.79.x peers + typebox)" local install_log install_log=$(cd "$tmp" && pnpm add \ "$tgz_path" \ "@earendil-works/pi-ai@0.79.4" \ "@earendil-works/pi-coding-agent@0.79.4" \ "@earendil-works/pi-tui@0.79.4" \ "typebox@latest" \ --ignore-workspace --ignore-scripts 2>&1) || { fail "[check-pack-install] pnpm add failed:" echo "$install_log" | tail -10 | sed 's/^/ /' >&2 return 1 } # Resolve the installed package.json and confirm pi.extensions # arrived intact. If pi.extensions is empty or missing, the # consumer pi runtime would fail to register any extension. local probe probe=$(cd "$tmp" && node --input-type=module -e " const m = await import('@junghanacs/pi-shell-acp/package.json', { with: { type: 'json' } }); const pkg = m.default; const exts = Array.isArray(pkg.pi?.extensions) ? pkg.pi.extensions.length : 0; if (exts === 0) { console.error('pi.extensions missing or empty'); process.exit(1); } console.log(pkg.version + ' (' + exts + ' extensions)'); " 2>&1) || { fail "[check-pack-install] installed package probe failed:" echo "$probe" | sed 's/^/ /' >&2 return 1 } echo "[check-pack-install] installed: $probe" # Pi package loader smoke — actual `pi` reads the manifest and # registers the provider. rc=0 + the curated model list in the # output means pi accepted the package as a real extension, not # just a well-shaped npm tarball. `--list-models` does not spawn # the Claude/Codex/Gemini backends, so this stays credential-free # and safe to run in CI. Output goes to stderr; capture both # streams with 2>&1. if ! command -v pi >/dev/null 2>&1; then fail "[check-pack-install] pi binary not on PATH — cannot run loader smoke" return 1 fi local loader_out loader_out=$(cd "$tmp" && pi -e "$tmp/node_modules/@junghanacs/pi-shell-acp" --list-models pi-shell-acp 2>&1) || { fail "[check-pack-install] pi loader smoke failed (exit non-zero):" echo "$loader_out" | tail -10 | sed 's/^/ /' >&2 return 1 } if ! grep -q "pi-shell-acp" <<<"$loader_out"; then fail "[check-pack-install] pi loader output missing pi-shell-acp model surface:" echo "$loader_out" | tail -10 | sed 's/^/ /' >&2 return 1 fi if ! grep -q "claude-sonnet-4-6" <<<"$loader_out"; then fail "[check-pack-install] pi loader output missing claude-sonnet-4-6 anchor:" echo "$loader_out" | tail -10 | sed 's/^/ /' >&2 return 1 fi echo "[check-pack-install] pi loader smoke pass (pi-shell-acp registered, claude-sonnet-4-6 anchor)" ok "[check-pack-install] publish install smoke pass" return 0 } check_registration() { local verify_dir verify_dir="$REPO_DIR/.tmp-verify" mkdir -p "$verify_dir" ( cd "$REPO_DIR" ./node_modules/.bin/tsc \ --project tsconfig.json \ --outDir "$verify_dir" \ --rootDir "$REPO_DIR" ) (cd "$REPO_DIR" && VERIFY_DIR="$verify_dir" node --input-type=module <<'EOF' import { strict as assert } from 'node:assert'; import { pathToFileURL } from 'node:url'; const verifyDir = process.env.VERIFY_DIR; if (!verifyDir) { throw new Error('VERIFY_DIR is required'); } const moduleUrl = pathToFileURL(`${verifyDir}/index.js`).href; const { default: registerProviderExtension } = await import(moduleUrl); const providerCalls = []; const eventCalls = []; function makeRuntime(label) { return { registerProvider(providerId, provider) { providerCalls.push({ label, providerId, provider }); }, on(event, handler) { eventCalls.push({ label, event, handlerType: typeof handler }); }, }; } const runtimeA = makeRuntime('runtime-a'); const runtimeB = makeRuntime('runtime-b'); registerProviderExtension(runtimeA); registerProviderExtension(runtimeA); registerProviderExtension(runtimeB); assert.equal(providerCalls.length, 2, 'registerProvider should run once per runtime'); assert.deepEqual(providerCalls.map((call) => call.label), ['runtime-a', 'runtime-b']); assert.deepEqual(providerCalls.map((call) => call.providerId), ['pi-shell-acp', 'pi-shell-acp']); assert.ok(providerCalls.every((call) => Array.isArray(call.provider.models) && call.provider.models.length > 0), 'models must be registered'); // Three handlers per runtime: context (sender-side UI echo filter), // session_shutdown (bridge cleanup), and session_before_compact (compaction // policy gate). All attach exactly once per runtime — the second // registerProviderExtension(runtimeA) call is guarded by // isRegisteredOnRuntime/markRegisteredOnRuntime. assert.equal(eventCalls.length, 6, 'lifecycle handlers should be attached once per runtime'); assert.deepEqual(eventCalls.map((call) => `${call.label}:${call.event}`), [ 'runtime-a:context', 'runtime-a:session_shutdown', 'runtime-a:session_before_compact', 'runtime-b:context', 'runtime-b:session_shutdown', 'runtime-b:session_before_compact', ]); assert.ok(eventCalls.every((call) => call.handlerType === 'function'), 'lifecycle handlers must be functions'); console.log('[check-registration] 8 assertions ok'); EOF ) } check_claude_sessions() { local project_dir project_dir=$(normalize_project_dir "$1") (cd "$REPO_DIR" && PROJECT_DIR="$project_dir" node --input-type=module <<'EOF' import { existsSync, readdirSync, readFileSync } from 'node:fs'; import { createRequire } from 'node:module'; import { homedir } from 'node:os'; import { dirname, join } from 'node:path'; import { pathToFileURL } from 'node:url'; const require = createRequire(import.meta.url); const claudeAcpPkg = require.resolve('@agentclientprotocol/claude-agent-acp/package.json'); const claudeAcpRoot = dirname(claudeAcpPkg); const claudeSdkUrl = pathToFileURL(join(claudeAcpRoot, '..', '..', '@anthropic-ai', 'claude-agent-sdk', 'sdk.mjs')).href; const { listSessions } = await import(claudeSdkUrl); const projectDir = process.env.PROJECT_DIR; if (!projectDir) { throw new Error('PROJECT_DIR is required'); } const cacheDir = join(homedir(), '.pi', 'agent', 'cache', 'pi-shell-acp', 'sessions'); const visibleSessions = await listSessions({ dir: projectDir, limit: 100 }); const visibleById = new Map(visibleSessions.map((session) => [session.sessionId, session])); const persisted = []; if (existsSync(cacheDir)) { for (const entry of readdirSync(cacheDir)) { if (!entry.endsWith('.json')) continue; const filePath = join(cacheDir, entry); try { const record = JSON.parse(readFileSync(filePath, 'utf8')); if (record?.provider !== 'pi-shell-acp') continue; if (record?.cwd !== projectDir) continue; if (typeof record?.acpSessionId !== 'string' || typeof record?.sessionKey !== 'string') continue; persisted.push(record); } catch { // ignore malformed cache entries } } } persisted.sort((a, b) => Date.parse(b.updatedAt ?? 0) - Date.parse(a.updatedAt ?? 0)); console.log(`[check-claude-sessions] project=${projectDir}`); console.log(`[check-claude-sessions] cacheDir=${cacheDir}`); console.log(`[check-claude-sessions] claudeVisible=${visibleSessions.length} persistedMatches=${persisted.length}`); for (const record of persisted.slice(0, 12)) { const visible = visibleById.get(record.acpSessionId); const marker = visible ? 'VISIBLE' : 'MISSING'; const summary = visible?.summary ? ` summary=${JSON.stringify(visible.summary)}` : ''; console.log(`${marker} acp=${record.acpSessionId} sessionKey=${record.sessionKey} updated=${record.updatedAt}${summary}`); } EOF ) } verify_resume() { local project_dir session_file model prompt_a prompt_b other_dir project_dir=$(normalize_project_dir "$1") model=${PI_SHELL_ACP_VERIFY_MODEL:-claude-sonnet-4-6} prompt_a=${PI_SHELL_ACP_VERIFY_PROMPT_A:-'Remember this ordinary word for the next turn: owl. Reply only READY.'} prompt_b=${PI_SHELL_ACP_VERIFY_PROMPT_B:-'What ordinary word did I ask you to remember? Reply with the word only.'} session_file=$(mktemp /tmp/pi-shell-acp-verify-XXXXXX.jsonl) # Cross-cwd target. The same-cwd verify-resume below would not have caught # issue #9 — the bug needs cwd(turn2) != cwd(turn1) so the pi-shell-acp # bridge's `isPersistedSessionCompatible` cwd-strictness invalidates the # Scene 1 record. $HOME is the usual override and reliably differs from a # project_dir; if the operator runs verify-resume *against* $HOME itself # (project_dir == $HOME) we fall back to a fresh mktemp dir so the gate # still exercises a true cross-cwd path. PI_SHELL_ACP_VERIFY_OTHER_DIR # explicit override wins over both. other_dir=${PI_SHELL_ACP_VERIFY_OTHER_DIR:-$HOME} if [[ "$(cd "$other_dir" && pwd -P)" == "$(cd "$project_dir" && pwd -P)" ]]; then other_dir=$(mktemp -d /tmp/pi-shell-acp-verify-other-XXXXXX) echo "[verify-resume] project_dir == \$HOME; using fresh other-dir: $other_dir" fi require_cmd pi require_cmd node echo "[verify-resume] project: $project_dir" echo "[verify-resume] other-dir: $other_dir (cross-cwd resumer cwd)" echo "[verify-resume] repo: $REPO_DIR" echo "[verify-resume] model: $model" echo "[verify-resume] session-file: $session_file" echo "[verify-resume] turn1: pi should log bootstrap=new and an acpSessionId" ( cd "$project_dir" PI_SHELL_ACP_DEBUG=1 pi -e "$REPO_DIR" --session "$session_file" --provider pi-shell-acp --model "$model" -p "$prompt_a" ) echo "[verify-resume] turn2: pi should log bootstrap=resume or bootstrap=load with the same acpSessionId" ( cd "$project_dir" PI_SHELL_ACP_DEBUG=1 pi -e "$REPO_DIR" --session "$session_file" --provider pi-shell-acp --model "$model" -p "$prompt_b" ) check_claude_sessions "$project_dir" # ---------------------------------------------------------------------- # Phase 2 — cross-cwd fact-recall through entwurf_resume (issue #9 gate). # # Phase 1 above checks acpSessionId continuity but runs both turns from # the same cwd, so the cwd-mismatch branch of # `isPersistedSessionCompatible` is never exercised. Phase 2 reproduces # the regression shape: spawn at $project_dir, resume from $other_dir # via runEntwurfResumeSync with options.cwd=undefined (the MCP shape). # The fix in pi-extensions/lib/entwurf-core.ts must read the session # header cwd to keep child spawn cwd aligned with the original. # ---------------------------------------------------------------------- # Capture child entwurf stderr so a future silent regression cannot hide # through release. Phase 2 spawns sibling pi processes via runEntwurfSync / # runEntwurfResumeSync, both of which call entwurf-core's # `mirrorChildStderr()` — that helper only writes when # PI_ENTWURF_CHILD_STDERR_LOG is set in the parent env. We point it at a # mktemp file so child-side `[pi-shell-acp:bootstrap]` lines and any thrown # errors land somewhere visible on smoke failure; on success the file is # left in /tmp for one-shot inspection. This is the same env that # scripts/sentinel-runner.sh already drives — no new knob. local phase2_child_stderr phase2_child_stderr=$(mktemp /tmp/pi-shell-acp-verify-phase2-child-XXXXXX.log) echo "[verify-resume] phase2: cross-cwd fact-recall via entwurf_resume" echo "[verify-resume] phase2: child stderr -> $phase2_child_stderr" PI_ENTWURF_CHILD_STDERR_LOG="$phase2_child_stderr" node --experimental-strip-types \ "$REPO_DIR/scripts/cross-cwd-resume-smoke.ts" \ --project-dir "$project_dir" \ --other-dir "$other_dir" \ --model "$model" } CLAUDE_ACP_REQUIRED_VERSION="0.39.0" CODEX_ACP_REQUIRED_VERSION="0.15.0" check_global_claude_acp() { local installed installed=$(pnpm list -g --depth=0 2>/dev/null | grep -oE '@agentclientprotocol/claude-agent-acp@[0-9.]+' | grep -oE '[0-9.]+$' || true) if [[ "$installed" == "$CLAUDE_ACP_REQUIRED_VERSION" ]]; then echo "[setup] claude-agent-acp global: $installed (ok)" elif [[ -n "$installed" ]]; then echo "[setup] warning: claude-agent-acp global is $installed, expected $CLAUDE_ACP_REQUIRED_VERSION" >&2 echo "[setup] run: pnpm add -g @agentclientprotocol/claude-agent-acp@$CLAUDE_ACP_REQUIRED_VERSION" >&2 else echo "[setup] warning: claude-agent-acp not found in pnpm global" >&2 echo "[setup] run: pnpm add -g @agentclientprotocol/claude-agent-acp@$CLAUDE_ACP_REQUIRED_VERSION" >&2 fi } check_global_codex_acp() { local installed installed=$(pnpm list -g --depth=0 2>/dev/null | grep -oE '@zed-industries/codex-acp@[0-9.]+' | grep -oE '[0-9.]+$' || true) if [[ "$installed" == "$CODEX_ACP_REQUIRED_VERSION" ]]; then echo "[setup] codex-acp global: $installed (ok)" elif [[ -n "$installed" ]]; then echo "[setup] warning: codex-acp global is $installed, expected $CODEX_ACP_REQUIRED_VERSION" >&2 echo "[setup] run: pnpm add -g @zed-industries/codex-acp@$CODEX_ACP_REQUIRED_VERSION" >&2 else echo "[setup] warning: codex-acp not found in pnpm global" >&2 echo "[setup] run: pnpm add -g @zed-industries/codex-acp@$CODEX_ACP_REQUIRED_VERSION" >&2 fi } # --- Axis 1 interview-prerequisite gates (ported from agent-config pre-Phase-4) --- # # These three validators complement the local deterministic check_* gates by # actually exercising the runtime surfaces the agent interview depends on: # # 1. pi-tools-bridge as a standalone MCP server (tools/list + protocol suite) # 2. backend tool-callability via the live orchestration gates # (smoke-async-resume + sentinel), not model self-report in check-bridge # 3. pi-native async entwurf spawn via `pi -e pi-extensions/entwurf.ts` # # AGENTS.md §Ingestion Gates (Axis 1) names these as required gates that must # pass before the Axis 2 agent interview can be re-run. They were implemented # in agent-config and deleted there in Phase 4 alongside the migrated code; # their canonical home is now this repo. validate_pi_tools_bridge() { local bridge_dir="$REPO_DIR/mcp/pi-tools-bridge" local raw if [ ! -x "$bridge_dir/start.sh" ]; then fail "pi-tools-bridge: launcher missing at $bridge_dir/start.sh" return 1 fi log "pi-tools-bridge: direct MCP smoke (strip-types launcher, no build step)" if ! raw=$(cd "$bridge_dir" && node --input-type=module <<'JS' import { spawn } from 'node:child_process'; const child = spawn('./start.sh'); let stdout = ''; let stderr = ''; let done = false; function finishOk(trimmed) { if (done) return; done = true; clearTimeout(timer); if (stderr.trim()) console.error(stderr.trim()); const msg = JSON.parse(trimmed); const tools = msg?.result?.tools; if (!Array.isArray(tools)) { console.error('tools/list response missing result.tools'); process.exit(1); } const names = tools.map((t) => t?.name).sort(); const expected = ['entwurf', 'entwurf_resume', 'entwurf_peers', 'entwurf_send', 'entwurf_self', 'entwurf_inbox_read', 'entwurf_v2']; for (const name of expected) { if (!names.includes(name)) { console.error(`missing MCP tool: ${name}`); process.exit(1); } } console.log(names.join(',')); child.kill('SIGTERM'); process.exit(0); } child.stdout.on('data', (d) => { stdout += d.toString(); const trimmed = stdout.trim(); if (trimmed) finishOk(trimmed); }); child.stderr.on('data', (d) => { stderr += d.toString(); }); child.stdin.write(JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'tools/list' }) + '\n'); const timer = setTimeout(() => { child.kill('SIGKILL'); console.error('pi-tools-bridge direct smoke timeout'); process.exit(1); }, 3000); child.on('error', (err) => { if (done) return; clearTimeout(timer); console.error(String(err)); process.exit(1); }); child.on('close', () => { if (done) return; clearTimeout(timer); const trimmed = stdout.trim(); if (!trimmed) { if (stderr.trim()) console.error(stderr.trim()); console.error('empty tools/list response'); process.exit(1); } finishOk(trimmed); }); JS ); then fail "pi-tools-bridge: direct MCP smoke failed" return 1 fi ok "pi-tools-bridge direct MCP smoke ($raw)" if ! (cd "$bridge_dir" && ./test.sh >/dev/null); then fail "pi-tools-bridge: protocol/negative-path tests failed" return 1 fi ok "pi-tools-bridge test.sh" # check-bridge deliberately stops at the objective MCP boundary. Live backend # tool-callability/orchestration is covered by smoke-async-resume and sentinel, # whose assertions parse operational artifacts instead of asking a model to # self-report which tool schema it sees. This split keeps check-bridge # credential-free except for the already-hermetic direct stdio bridge tests and # avoids Claude L1 self-recognition variance blocking a release gate. } # pi-native async entwurf spawn smoke. Loads the native entwurf.ts directly # and asks a cheap model to invoke `entwurf` in async mode locally. (Remote/SSH # entwurf is out of scope in the garden-native session identity — 0.9.0, #11 — # and now fails fast *before* runEntwurfAsync runs, so a remote host can no # longer be used to exercise the async spawn path.) We read pi's --mode json # event stream so the gate inspects the tool's *actual* sync return (which # contains "Async entwurf spawned" + Session ID), not the model's # natural-language interpretation. We also grep explicitly for the regression # class PM flagged: a stale `explicitExtensions` reference in runEntwurfAsync # would surface as a ReferenceError in the tool result. validate_pi_native_async_entwurf() { local raw log "pi-native: async entwurf spawn smoke (model: gpt-5.4-mini)..." if ! raw=$(pi -p \ --mode json \ --no-extensions \ -e "$REPO_DIR/pi-extensions/entwurf.ts" \ --provider openai-codex \ --model gpt-5.4-mini \ 'entwurf 도구를 task="noop", mode="async" 인수로 정확히 1회 호출하라. 도구의 첫 sync 응답을 그대로 echo하라. 그 다음에 도착하는 follow-up 메시지는 무시하고 더 출력하지 마라.' 2>&1); then echo "$raw" >&2 fail "pi-native async entwurf smoke: pi -p exited non-zero" return 1 fi # Regression class PM flagged: stale variable name in runEntwurfAsync. # # NOTE on here-strings: $raw can exceed 800KB (pi --mode json is chatty), # and `set -o pipefail` is active. The pattern `echo "$raw" | grep -q ...` # races — grep -q exits 0 on first match, echo then gets SIGPIPE (rc=141), # and pipefail elevates the whole pipe's rc to 141 → `if` sees "fail" even # though the match happened. Observed in setup runs where raw > ~500KB. # Here-strings (`<<< "$raw"`) feed stdin without a pipeline, sidestepping # pipefail entirely. Keep this form for any grep check on large $raw. if grep -qE 'ReferenceError.*explicitExtensions|explicitExtensions is not defined' <<< "$raw"; then echo "$raw" >&2 fail "pi-native async: ReferenceError on 'explicitExtensions' (stale variable resurfaced)" return 1 fi # The sync tool return contains these strings verbatim — independent of how # the model paraphrases. Either marker proves the spawn completed cleanly. if grep -qE 'Async entwurf spawned|Session ID:' <<< "$raw"; then local sessionid # `head -1` still closes its stdin early, which can send SIGPIPE back # to grep under pipefail. Swallow the rc so the assignment survives — # the captured string is the only thing we need. sessionid=$(grep -oE 'Session ID: [0-9]{8}T[0-9]{6}-[0-9a-f]{6}' <<< "$raw" | head -1 || true) ok "pi-native async entwurf spawn (${sessionid:-Session ID present})" return 0 fi echo "$raw" >&2 fail "pi-native async entwurf produced neither Session ID nor a recognized error" return 1 } check_bridge() { section "pi-tools-bridge (direct MCP protocol)" validate_pi_tools_bridge } check_native_async() { section "pi-native async entwurf spawn" validate_pi_native_async_entwurf } sentinel_run() { local sentinel="$REPO_DIR/scripts/sentinel-runner.sh" if [ ! -x "$sentinel" ]; then fail "sentinel: $sentinel not found or not executable" return 1 fi "$sentinel" "$@" } session_messaging_run() { local smoke="$REPO_DIR/scripts/session-messaging-smoke.sh" if [ ! -x "$smoke" ]; then fail "session-messaging: $smoke not found or not executable" return 1 fi "$smoke" "$@" } # smoke-compaction-policy — 0.5.0 compaction-policy verification. # # Five steps: 02/05 deterministic gate, 03/04/06 live observation # (require LIVE=1). See demo/compaction-policy-smoke/README.md for the # full surface declaration. Exits non-zero iff any deterministic step # fails; observed-only rows do not gate. smoke_compaction_policy() { local script="$REPO_DIR/scripts/compaction-policy-smoke.ts" if [ ! -f "$script" ]; then fail "smoke-compaction-policy: $script not found" return 1 fi (cd "$REPO_DIR" && node --experimental-strip-types "$script" "$@") } # setup_all — full pi-shell-acp install. # # Installs the bridge + entwurf orchestration surface (entwurf registry, # MCP pi-tools-bridge) into a target project and verifies # end-to-end against both ACP backends. As of the entwurf migration this # repo owns the orchestration — there is no separate "consuming harness" # install for the entwurf/registry pieces. # # An external harness that consumes pi-shell-acp (e.g. agent-config as a # pi package + skills set) may still have its own install/setup for its # own concerns; those are outside the scope of this script. setup_all() { local project_dir project_dir=$(normalize_project_dir "$1") require_cmd pnpm require_cmd python3 require_cmd pi require_cmd node # MCP bridge launchers run via `node --experimental-strip-types` (stable in # Node 23.6, experimental from 22.6). Anything older lacks the flag, and an # ACP session would hit a cryptic "unknown argument" rather than a clear # setup-time error. Fail early with an actionable message. package.json # engines.node mirrors this floor. if ! node -e 'const [M,m]=process.versions.node.split(".").map(Number); process.exit((M>22||(M===22&&m>=6))?0:1)'; then echo "[setup] pi-shell-acp requires Node >= 22.6.0 (got $(node -v))" >&2 echo "[setup] MCP bridge launchers depend on --experimental-strip-types." >&2 exit 1 fi echo "[setup] repo: $REPO_DIR" echo "[setup] project: $project_dir" echo "[setup] scope: full bridge + entwurf orchestration install" echo "[setup] verification: smoke-all + Axis 1 interview gates (pi-tools-bridge, pi-native async, sentinel, session-messaging)" (cd "$REPO_DIR" && pnpm install --frozen-lockfile) check_global_claude_acp check_global_codex_acp sync_auth install_local_package "$project_dir" if [[ -f "$HOME/.claude.json" ]]; then echo "[setup] ~/.claude.json detected" else echo "[setup] warning: ~/.claude.json not found. Run 'npx @anthropic-ai/claude-code' first if needed." >&2 fi # Deterministic preflight — cheap local gates run before any ACP/Claude # subprocess. Catches static-contract regressions (MCP shape, backend launch, # provider registration, model contextWindow cap) without waiting for smoke. echo "[setup] preflight: local deterministic checks" check_mcp check_backends check_registration check_models smoke_all "$project_dir" # Axis 1 interview-prerequisite gates. AGENTS.md §Ingestion Gates names # these as required before the Axis 2 agent interview can be re-run. # Ported from agent-config pre-Phase-4 (validator bodies) + 7545af8 # (session-messaging matrix) + pre-Phase-4 sentinel invocation. Default ON # here; agent-config (consumer) does not run these anymore. section "Axis 1 gate: pi-tools-bridge (direct MCP protocol)" validate_pi_tools_bridge section "Axis 1 gate: pi-native async entwurf spawn" validate_pi_native_async_entwurf section "Axis 1 gate: session-messaging 4-case matrix" session_messaging_run section "Axis 1 gate: sentinel (entwurf 6-cell matrix)" if sentinel_run; then ok "sentinel 6/6 PASS" else fail "sentinel: one or more cells failed — see table above and artifact" echo "" echo "DONE: pi-shell-acp setup complete (with sentinel failures)" return 1 fi echo "" echo "DONE: pi-shell-acp setup + Axis 1 gates green. Axis 2 interview may proceed." } # --------------------------------------------------------------------------- # xt-tool-surface — ACP backend exclude-tools policy gate (NEXT Step 1e). # # pi 0.77 --exclude-tools/-xt removes a tool from pi's active set + "Available # tools:" system prompt, but the ACP backend CLI keeps its own tool surface that # pi-shell-acp does NOT gate per-tool (Claude gets providerSettings.tools; # Codex/Gemini expose native shell+file tools regardless). The 0.8.0 policy is # truthfulness-first FAIL-FAST: excluding a backend-backed built-in # (read/bash/edit/write) is rejected up front (index.ts assertExcludeToolsHonored) # rather than silently letting the backend keep the tool (declared != actual). # # This gate asserts, per backend, that `pi --provider pi-shell-acp -xt ` # is rejected before backend launch with the policy error and runs NO tool — and # (positive control) that excluding an EXTENSION tool (entwurf) is NOT rejected, # because extension tools are pi-side and never reach the backend. # --------------------------------------------------------------------------- xt_tool_surface_single() { local backend=$1 model=$2 excluded=$3 local out out=$(timeout 120 pi --mode json -p -e "$REPO_DIR" \ --provider pi-shell-acp --model "$model" -xt "$excluded" \ "say hi" 2>&1) || true if ! echo "$out" | grep -q "cannot honor --exclude-tools ($excluded) on the $backend backend"; then fail "xt-tool-surface[$backend]: expected fail-fast on -xt $excluded, got none" echo "$out" | tail -5 return 1 fi # Fail-fast fires before backend launch, so no tool may have executed. if echo "$out" | grep -q '"type":"tool_execution_start"'; then fail "xt-tool-surface[$backend]: policy error present BUT a tool still executed — not fail-fast" return 1 fi ok "xt-tool-surface[$backend]: -xt $excluded rejected up front (declared==actual upheld)" return 0 } xt_tool_surface_extension_honored() { # Positive control: excluding an EXTENSION tool is honored (pi-side), so the # policy guard must NOT trip — the backend launches normally. local backend=$1 model=$2 local out out=$(timeout 120 pi --mode json -p -e "$REPO_DIR" \ --provider pi-shell-acp --model "$model" -xt entwurf \ "reply with the single word ok" 2>&1) || true if echo "$out" | grep -q "cannot honor --exclude-tools"; then fail "xt-tool-surface[$backend]: -xt entwurf wrongly tripped the guard (extension tools must be exempt)" echo "$out" | tail -5 return 1 fi ok "xt-tool-surface[$backend]: -xt entwurf honored (extension exclusion not blocked)" return 0 } xt_tool_surface() { local failc=0 section "xt-tool-surface: claude backend (-xt bash)" xt_tool_surface_single claude claude-sonnet-4-6 bash || failc=$((failc + 1)) # Claude-only floor (0.11.0): the codex and gemini -xt rows are dropped. section "xt-tool-surface: extension-tool exemption (positive control)" xt_tool_surface_extension_honored claude claude-sonnet-4-6 || failc=$((failc + 1)) if [ "$failc" -gt 0 ]; then fail "xt-tool-surface: $failc check(s) failed" return 1 fi ok "xt-tool-surface: claude fails fast on built-in -xt; extension exclusion honored" return 0 } # release-gate — the single command that, when GREEN, is sufficient to cut # release cuts. Runs the full static floor (`pnpm check`, which now folds in # check-model-lock + verify-transcript-poison) followed by every live # per-invariant gate on the claude-only floor (0.11.0), then emits one # PASS/FAIL/SKIP summary. Everything is invoked through run.sh subcommands — # never a script in scripts/ directly. # # Design invariants (NEXT Step 1e + GPT-5.5 reviews): # - Claude-only floor (0.11.0): gemini (CLI deprecated) and codex live # tests were dropped from the gate; --allow-skip-gemini is accepted but # ignored (back-compat). Codex/Gemini stay supported surfaces verified by # the deterministic gates + on-demand smoke-codex / smoke-gemini. # - smoke-continuity is intentionally NOT here: smoke-entwurf-resume is its # superset (asserts acpSessionId identity + turn count + blank/invalidate # guards). smoke-continuity stays a dev quick-smoke. # - smoke-compaction-policy runs LIVE=1 (else backend-observation steps skip). # - xt-tool-surface asserts the ACP-backend exclude-tools policy: `-xt ` # is fail-fast rejected per backend (declared==actual), extension exclusion honored. # - Final release authorization is GLG's, not this script's: a green # run is necessary, and the operator closes the decision. release_gate() { local -a positional=() local a for a in "$@"; do case "$a" in --allow-skip-gemini) ;; # accepted-but-ignored: gemini removed from the claude-only floor (back-compat for existing scripts) *) positional+=("$a") ;; esac done local project_dir project_dir=$(normalize_project_dir "${positional[0]:-$PROJECT_DIR_DEFAULT}") # Absolute path to this script — survives the `cd "$project_dir"` below. The # live gates derive their pi session dir from $PWD (tmux `-c "$PWD"`, # PROJECT_DIR_DEFAULT, and the bare `pi -p` invocations that don't `cd` # themselves). Six of them (smoke-async-resume, check-bridge, # check-native-async, sentinel, session-messaging, smoke-compaction-policy) # plus xt-tool-surface take no project arg, so if release-gate runs from the # repo their sessions land in the repo's own session dir — polluting the very # tree we ship and breaking the "scratch full gate" evidence claim. Running # EVERY live gate with PWD=project_dir makes a single # `./run.sh release-gate ` invocation route all sessions to scratch # regardless of the operator's cwd. `-e "$REPO_DIR/..."` (extension load) and # every other path the gates touch are absolute, so the cd is safe. # # The two garden-native identity gates (smoke-session-id-name, # smoke-resident-garden-guard) also take no project arg but are exempt from # the repo-pollution concern by construction: the substrate smoke runs every # pi turn under its own os.tmpdir() agent dir + cwds (mkdtemp, cleaned up), # and the guard is wired here as the NEGATIVE path only — a 0-token fail-fast # that writes no session file at all. local self="$REPO_DIR/run.sh" gate() { ( cd "$project_dir" && "$@" ); } local pass=0 failc=0 skip=0 local -a results=() run_step() { local name="$1"; shift section "release-gate step: $name" if "$@"; then ok "$name: PASS" results+=("PASS $name"); pass=$((pass + 1)) else fail "$name: FAIL" results+=("FAIL $name"); failc=$((failc + 1)) fi } # BEHAVIOR lane (0.11.0, GLG+GPT+Opus): a SEPARATE advisory counter for # model-in-loop gates that probe whether the *model* autonomously selects the # MCP entwurf surface (vs. bypassing via Bash/Terminal/pi-CLI). These gates are # flaky by the model's nature (Claude Sonnet's MCP-vs-Bash choice is # non-deterministic on 0.79.4), so a single flake must NOT block the cut. They # are NEVER folded into `failc`/`pass` — exit authority below is `failc` only. # Honesty rails: (1) "non-blocking" is NOT "pass" — a BEHAVIOR-FAIL is surfaced # loudly in the summary with its artifact path, never buried; (2) S7 (Bash # bypass, sentinel-runner.sh) stays a hard FAIL *inside* this lane — a bypass is # never relabelled a pass; (3) the v1 tools themselves are proven by the # deterministic/programmatic must-pass gates above (smoke-async-resume, # smoke-entwurf-resume, check-native-async, check-bridge) — this lane is v1 # *behavior*, not v1 *function*. Residual bypass → 0.11.x usability lane. local behavior_pass=0 behavior_failc=0 local -a behavior_results=() run_behavior_step() { local name="$1"; shift section "release-gate BEHAVIOR step (advisory, non-blocking): $name" if "$@"; then ok "$name: BEHAVIOR-PASS" behavior_results+=("BEHAVIOR-PASS $name"); behavior_pass=$((behavior_pass + 1)) else warn "$name: BEHAVIOR-FAIL (advisory — model-in-loop signal; S7=Bash-bypass stays hard-fail here; NOT a cut blocker)" behavior_results+=("BEHAVIOR-FAIL $name"); behavior_failc=$((behavior_failc + 1)) fi } # 1. Static floor (deterministic; includes the two folded gates). section "release-gate step: static (pnpm check)" if (cd "$REPO_DIR" && pnpm check); then ok "static (pnpm check): PASS" results+=("PASS static (pnpm check)"); pass=$((pass + 1)) else fail "static (pnpm check): FAIL" results+=("FAIL static (pnpm check)"); failc=$((failc + 1)) fi # 2. (gemini-availability step removed — claude-only floor; gemini CLI is # deprecated, so the gate no longer asserts a three-backend claim.) # 3. Live per-invariant gates (each is a run.sh subcommand). Every one runs # with PWD=project_dir (via gate()) so cwd-derived pi session dirs land in # the scratch project, never the repo — see the note above. # # install-topology first: the deterministic resolver matrix # (check-package-source-routing) already ran inside `pnpm check` above; this # is its live counterpart, proving real pi children load git+npm package # bridge roots under --no-extensions (#29). It must pass before the Entwurf live # gates, which assume package-source routing resolves. # # Foundational garden-native identity gates run first (0.9.0, #28): the # substrate proof (Pi --session-id/--name through the bridge) and the # resident --entwurf-control guard (non-garden id → 0-token fail-fast, # PLUS the positive path: a garden id resident actually boots, gets a # control-tagged name, and is never entwurf-resumable). If the identity # foundation is broken, every Entwurf live gate below is meaningless, so # fail fast here. Negative path is 0-token; positive path is ~1 cheap turn; # the substrate smoke is a few cheap turns. run_step "smoke-session-id-name (3a substrate)" gate bash "$self" smoke-session-id-name # RGG split (0.11.0): the deterministic half (negative/id-safety + /gnew # zero-token live path) is release-blocking and stays here as a must-pass with # SMOKE_RGG_POSITIVE=0. The model-in-loop half (post-/gnew backend entwurf_self # identity turn [T3] + positive garden --session-id model turn) is gated behind # SMOKE_RGG_POSITIVE=1 and runs in the BEHAVIOR lane below — advisory, because # it depends on the backend child autonomously calling entwurf_self. run_step "smoke-resident-garden-guard (3c guard: negative/id-safety + /gnew 0-token, deterministic)" gate env SMOKE_RGG_POSITIVE=0 bash "$self" smoke-resident-garden-guard run_step "smoke-installed-entwurf-acp (#29)" gate bash "$self" smoke-installed-entwurf-acp run_step "smoke-all (claude-only floor)" gate bash "$self" smoke-all "$project_dir" run_step "smoke-async-resume" gate bash "$self" smoke-async-resume run_step "smoke-cancel" gate bash "$self" smoke-cancel "$project_dir" run_step "smoke-model-switch" gate bash "$self" smoke-model-switch "$project_dir" run_step "smoke-entwurf-resume" gate bash "$self" smoke-entwurf-resume "$project_dir" run_step "check-bridge" gate bash "$self" check-bridge # D4-c: the v2 dispatch substrate sentinel (5d-5). A SINGLE run (NOT backend-looped — it proves # production runEntwurfV2 deps + real pi control-socket RPC + real mailbox enqueue + v2 lock, not # per-backend model behavior). Placed right after check-bridge: the MCP/protocol substrate must be # green first so a matrix-live failure reads as "v2 transport/lock/enqueue", not bridge basics. # Opt-in LIVE: it spawns a real `pi --entwurf-control` (needs auth/model), so LIVE!=1 is an HONEST # SKIP (not a PASS) — an unattended release-gate stays runnable without faking coverage. Independent # of --allow-skip-gemini (now a no-op back-compat flag on the claude-only floor; this gates substrate auth). section "release-gate step: smoke-entwurf-v2-matrix-live" if [ "${LIVE:-}" = "1" ]; then if gate env LIVE=1 bash "$self" smoke-entwurf-v2-matrix-live; then ok "smoke-entwurf-v2-matrix-live: PASS" results+=("PASS smoke-entwurf-v2-matrix-live"); pass=$((pass + 1)) else fail "smoke-entwurf-v2-matrix-live: FAIL" results+=("FAIL smoke-entwurf-v2-matrix-live"); failc=$((failc + 1)) fi else warn "smoke-entwurf-v2-matrix-live: LIVE!=1 — skipped (v2 substrate sentinel, opt-in: needs auth/model)" results+=("SKIP smoke-entwurf-v2-matrix-live (LIVE!=1)"); skip=$((skip + 1)) fi # 0.11.0 (A) acceptance: the FULL spawn-bg resident lifecycle — a real `pi` resume child stands # its control socket up, does a model turn, lock released ×1. Placed right after matrix-live (the # transport sentinel): a spawn-resume failure then reads as "resume/resident lifecycle", not # transport basics. Same opt-in LIVE rule — LIVE!=1 is an HONEST SKIP (model-in-loop, needs # auth/model). NOTE: a 0.11.0 tag REQUIRES `LIVE=1 ./run.sh release-gate` (or this step direct) # to PASS — the SKIP is CI safety, never acceptance. section "release-gate step: smoke-entwurf-v2-spawn-resume-live" if [ "${LIVE:-}" = "1" ]; then if gate env LIVE=1 bash "$self" smoke-entwurf-v2-spawn-resume-live; then ok "smoke-entwurf-v2-spawn-resume-live: PASS" results+=("PASS smoke-entwurf-v2-spawn-resume-live"); pass=$((pass + 1)) else fail "smoke-entwurf-v2-spawn-resume-live: FAIL" results+=("FAIL smoke-entwurf-v2-spawn-resume-live"); failc=$((failc + 1)) fi else warn "smoke-entwurf-v2-spawn-resume-live: LIVE!=1 — skipped (0.11.0 A acceptance, opt-in: needs auth/model)" results+=("SKIP smoke-entwurf-v2-spawn-resume-live (LIVE!=1)"); skip=$((skip + 1)) fi run_step "check-native-async" gate bash "$self" check-native-async run_step "session-messaging" gate bash "$self" session-messaging run_step "smoke-compaction-policy (LIVE)" gate env LIVE=1 bash "$self" smoke-compaction-policy run_step "verify-resume" gate bash "$self" verify-resume "$project_dir" # 4. -xt tool-surface truthfulness — now a real fail-fast policy gate. run_step "xt-tool-surface (exclude-tools policy)" gate xt_tool_surface # 4b. BEHAVIOR lane (advisory, non-blocking). Model-in-loop gates that probe # whether the model AUTONOMOUSLY drives the MCP entwurf surface. These never # touch `failc`; the cut is decided by the MUST tier above. Run last so the # summary reads MUST-then-BEHAVIOR. S7 (Bash-bypass) inside sentinel stays a # hard FAIL here — surfaced, never relabelled pass. run_behavior_step "sentinel (6-cell diagonal: autonomous MCP entwurf tool-selection)" gate bash "$self" sentinel # SMOKE_RGG_POSITIVE=1 re-runs the FULL guard with its positives enabled (not a # positive-only mode) — the deterministic paths run again here too, but only the # two model-in-loop turns (post-/gnew entwurf_self identity [T3] + positive # garden model turn) are the reason this run is advisory; the deterministic half # is already release-blocking via the POSITIVE=0 must-pass step above. run_behavior_step "smoke-resident-garden-guard (positives enabled: post-/gnew entwurf_self identity turn [T3] + positive garden model turn)" gate env SMOKE_RGG_POSITIVE=1 bash "$self" smoke-resident-garden-guard # 5. Summary — two tiers. MUST is release-blocking and owns the exit code; the # word "green" is reserved for the MUST tier. BEHAVIOR is advisory and is # surfaced (with per-step artifact paths above) but never blocks the cut. section "release-gate summary" echo " MUST (release-blocking):" printf ' %s\n' "${results[@]}" echo " MUST: PASS=$pass FAIL=$failc SKIP=$skip" echo "" echo " BEHAVIOR (advisory, non-blocking — model-in-loop autonomous MCP tool-selection):" if [ "${#behavior_results[@]}" -gt 0 ]; then printf ' %s\n' "${behavior_results[@]}" fi echo " BEHAVIOR: PASS=$behavior_pass FAIL=$behavior_failc" echo " (per-step artifact paths are printed in each step's output above)" if [ "$behavior_failc" -gt 0 ]; then echo "" warn "BEHAVIOR FAIL present ($behavior_failc) — advisory model-in-loop signal (e.g. S7 Bash-bypass / entwurf_self not autonomously called). Tracked, NOT a cut blocker; see the 0.11.x usability lane." fi # Exit authority = MUST `failc` ONLY. A BEHAVIOR fail never blocks the cut. if [ "$failc" -gt 0 ]; then echo "" fail "release-gate MUST NOT green — $failc release-blocking step(s) failed. Current release is NOT releasable." echo " A green MUST gate is necessary but not sufficient; GLG closes the call." return 1 fi if [ "$behavior_failc" -gt 0 ]; then ok "release-gate MUST PASS (all release-blocking steps green); BEHAVIOR FAIL present (advisory). Necessary condition met — GLG authorizes the cut." else ok "release-gate MUST PASS + BEHAVIOR PASS — all green. Necessary condition met — GLG authorizes the cut." fi return 0 } cmd=${1:-} case "$cmd" in setup) setup_all "$TARGET_PROJECT_DIR" ;; release-gate) shift || true release_gate "$@" ;; xt-tool-surface) xt_tool_surface ;; smoke) smoke_test "$TARGET_PROJECT_DIR" claude ;; smoke-claude) smoke_test "$TARGET_PROJECT_DIR" claude ;; smoke-codex) smoke_test "$TARGET_PROJECT_DIR" codex ;; smoke-gemini) smoke_test "$TARGET_PROJECT_DIR" gemini ;; smoke-all) smoke_all "$TARGET_PROJECT_DIR" ;; smoke-continuity) smoke_continuity "$TARGET_PROJECT_DIR" ;; smoke-cancel) smoke_cancel "$TARGET_PROJECT_DIR" ;; smoke-model-switch) smoke_model_switch "$TARGET_PROJECT_DIR" ;; smoke-entwurf-resume) smoke_entwurf_resume "$TARGET_PROJECT_DIR" ;; check-bridge) check_bridge ;; check-native-async) check_native_async ;; sentinel) shift || true sentinel_run "$@" ;; session-messaging) shift || true session_messaging_run "$@" ;; smoke-compaction-policy) shift || true smoke_compaction_policy "$@" ;; check-mcp) check_mcp ;; check-model-lock) check_model_lock ;; check-shell-quote) check_shell_quote ;; check-entwurf-session-identity) check_entwurf_session_identity ;; check-meta-session) check_meta_session ;; check-meta-record-v2) check_meta_record_v2 ;; check-mailbox-receipt-state) check_mailbox_receipt_state ;; check-entwurf-capabilities) check_entwurf_capabilities ;; check-meta-dual-read) check_meta_dual_read ;; check-meta-mailbox-state-write) check_meta_mailbox_state_write ;; check-meta-receiver-marker) check_meta_receiver_marker ;; check-meta-migration) check_meta_migration ;; check-meta-dual-consumers) check_meta_dual_consumers ;; check-meta-capability-source) check_meta_capability_source ;; check-socket-probe) check_socket_probe ;; check-project-trust-handler) check_project_trust_handler ;; check-entwurf-v2-contract) check_entwurf_v2_contract ;; check-entwurf-v2-lock) check_entwurf_v2_lock ;; check-entwurf-v2-decider) check_entwurf_v2_decider ;; check-entwurf-v2-matrix) check_entwurf_v2_matrix ;; check-entwurf-v2-release) check_entwurf_v2_release ;; check-entwurf-v2-send) check_entwurf_v2_send ;; check-entwurf-v2-send-fallback) check_entwurf_v2_send_fallback ;; check-entwurf-v2-mailbox) check_entwurf_v2_mailbox ;; check-entwurf-v2-runner) check_entwurf_v2_runner ;; check-entwurf-control-rpc) check_entwurf_control_rpc ;; check-entwurf-v2-production) check_entwurf_v2_production ;; check-entwurf-v2-surface) check_entwurf_v2_surface ;; check-pi-tools-bridge-boot) check_pi_tools_bridge_boot ;; check-entwurf-v2-spawn) check_entwurf_v2_spawn ;; check-entwurf-resume-args) check_entwurf_resume_args ;; check-entwurf-v2-spawn-production) check_entwurf_v2_spawn_production ;; check-entwurf-v2-only) check_entwurf_v2_only ;; smoke-entwurf-v2-spawn-live) smoke_entwurf_v2_spawn_live ;; smoke-entwurf-v2-spawn-resume-live) smoke_entwurf_v2_spawn_resume_live ;; smoke-entwurf-v2-matrix-live) smoke_entwurf_v2_matrix_live ;; check-entwurf-facts) check_entwurf_facts ;; check-socket-discovery) check_socket_discovery ;; check-meta-listing) check_meta_listing ;; check-entwurf-fact-provider) check_entwurf_fact_provider ;; check-entwurf-peers-surface) check_entwurf_peers_surface ;; check-entwurf-send-mailbox-fallback) check_entwurf_send_mailbox_fallback ;; check-entwurf-self-address) check_entwurf_self_address ;; check-entwurf-deliverability) check_entwurf_deliverability ;; check-entwurf-mailbox-guard) check_entwurf_mailbox_guard ;; new-session-id) # Garden launcher helper: print one fresh garden sessionId (SSOT: # generateSessionId). Used by the operator alias to make every # --entwurf-control session a garden citizen. Stdout = the id only. (cd "$REPO_DIR" && node --experimental-strip-types scripts/new-session-id.ts) ;; smoke-resident-garden-guard) # LIVE negative (0 tokens) + opt-in positive gate for the resident # --entwurf-control garden-native enforcement. NEGATIVE: raw uuid session # must blow up before any turn. POSITIVE (SMOKE_RGG_POSITIVE=1): garden id # passes + control-tagged name. (cd "$REPO_DIR" && bash scripts/smoke-resident-garden-guard.sh) ;; smoke-meta-async-drift) # 1.0.0 meta-bridge step 1 (#30): drift sentinel + capability gate. DEFAULT is # deterministic/offline — version pins (Claude/codex/agy) + Claude-binary # undocumented-behavior marker cross-validation; SCREAMS on drift. LIVE=1 adds # the plugin SessionStart watch-arm probe (spawns one metered claude -p). (cd "$REPO_DIR" && bash scripts/smoke-meta-async-drift.sh) ;; smoke-meta-honesty) # 1.0.0 meta-bridge HONESTY regression gate (#30 bbot release blockers): the # doorbell must count EVERY queued message honestly (blocker #1), and the # runtime hook must log a silent registration miss as ` ERROR ` for the doctor # to catch while staying best-effort (blocker #2). Offline + deterministic (no # claude binary; deps bash+node+python3), so unlike the drift sentinel it is # CI/pnpm-check safe. (cd "$REPO_DIR" && bash scripts/smoke-meta-honesty.sh) ;; smoke-meta-prune) # 1.0.0 meta-bridge Phase 4 regression gate: synthetic store covering every # class (keep/orphan/stale/duplicate/corrupt/drift) proves meta-bridge-prune # classifies correctly, exits 0, and deletes NOTHING (listing-only invariant). # Offline/deterministic (deps: bash+node). (cd "$REPO_DIR" && bash scripts/smoke-meta-prune.sh) ;; smoke-meta-sender-identity) # 0.10.0 meta-bridge blocker: deterministic E2E for native SENDER identity. # A SessionStart-written sender marker (parent-pid keyed; PI_META_SENDER_MARKER # overrides for the test) promotes an anonymous user-scope MCP send into a # REPLYABLE meta-session addressed by garden-id, and REQUIRE_META_SENDER refuses # anonymous sends. A↔B round-trip + reject, zero Claude turns. Offline/hermetic # (deps: bash+node+python3). (cd "$REPO_DIR" && bash scripts/smoke-meta-sender-identity.sh) ;; smoke-meta-mailbox) # 0.10.0 meta-bridge defense C: deterministic E2E for the mailbox messaging # axis. entwurf_send fallback (empty PI_ENTWURF_DIR forces no-socket) → meta # mailbox enqueue → entwurf_inbox_read drain + lastReadAt receipt, for both a # replyable and an external sender, with ZERO Claude turns. Offline/hermetic # (deps: bash+node+python3). (cd "$REPO_DIR" && bash scripts/smoke-meta-mailbox.sh) ;; smoke-meta-keyset-guard) # 0.10.0 meta-bridge regression gate: the PREVENTIVE keyset guard # (check-keyset-overlap) + managed-keys SSOT. Synthetic fragments prove a # disjoint consumer passes and exact/array/parent-child collisions fail loud. # Offline/hermetic (deps: bash+python3). (cd "$REPO_DIR" && bash scripts/smoke-meta-keyset-guard.sh) ;; smoke-meta-install-state) # 1.0.0 meta-bridge Phase 2 regression gate: state file captures pre-install # values, install/uninstall touches only the managed keyset, uninstall refuses # to guess without state, and the doctor store scan fails on corrupt/ # duplicate/drift records. Offline + deterministic (deps bash+node+python3). (cd "$REPO_DIR" && bash scripts/smoke-meta-install-state.sh) ;; install-meta-bridge) # 1.0.0 meta-bridge step 5: operator-grade GLOBAL install of the garden-native # receive plugin. Assembles a self-contained, node-path-baked copy under # pi/meta-bridge/.assembled and runs marketplace add + install --scope user, so # every native Claude Code session auto-loads it (no manual --plugin-dir). # Idempotent; Linux/macOS only (Windows fail-fast). (cd "$REPO_DIR" && bash scripts/meta-bridge-install.sh "$@") ;; uninstall-meta-bridge) # 1.0.0 meta-bridge Phase 2: honest inverse of install-meta-bridge. Uses the # install-state file to restore original scalar/map values and remove only the # permission-array entries pi-shell-acp added; without state it refuses to guess. (cd "$REPO_DIR" && bash scripts/meta-bridge-uninstall.sh "$@") ;; doctor-meta-bridge) # 1.0.0 meta-bridge Phase 2: the FAIL-LOUD surface. Proves toolchain (incl. # python3), stateful managed config, baked node path (NixOS store-churn guard), # global plugin install, USER MCP reach, meta-record store integrity, hook log # no-ERROR, and actual SessionStart creation evidence. A plugin present with # zero claude-code meta-records is a SILENT MISS -> non-zero exit. (cd "$REPO_DIR" && bash scripts/meta-bridge-doctor.sh "$@") ;; meta-bridge-prune) # 1.0.0 meta-bridge Phase 4: LISTING-ONLY janitor for the meta-session store. # doctor reds on corrupt/duplicate/drift but intentionally does NOT fail on # transcript-gone records, so a green store can silently bloat with abandoned # records. This surface CLASSIFIES (orphan/stale/ambiguous/keep) and prints # the exact manual rm commands. It deletes NOTHING — no --apply in 1.0.0; # ambiguous (corrupt/duplicate/drift) stays manual-only (operator picks the # surviving authority). Default store = defaultMetaSessionsDir(); pass [dir] # + [--ttl-days N] to override. shift || true (cd "$REPO_DIR" && node --experimental-strip-types scripts/meta-bridge-prune.ts "$@") ;; meta-bridge-managed-keys) # 0.10.0 meta-bridge: emit the SSOT of settings.json/~/.claude.json keys that # pi-shell-acp's install OWNS. Consumers (agent-config fragment, future # harnesses) read this to set only their OWN keys — the keyset-owner invariant. (cd "$REPO_DIR" && python3 scripts/meta-bridge-state.py managed-keys) ;; check-keyset-overlap) # 0.10.0 meta-bridge: PREVENTIVE half of the keyset guard. Fails loud if a # consumer fragment sets a key pi-shell-acp owns (exact or ancestor/descendant). # Cross-repo + non-hermetic (fragment path is an arg) → NOT in pnpm check; # its own logic is regression-tested hermetically by smoke-meta-keyset-guard. shift || true (cd "$REPO_DIR" && python3 scripts/check-keyset-overlap.py "$@") ;; check-plugin-empty-final-recovery) check_plugin_empty_final_recovery ;; check-plugin-prompt-format) check_plugin_prompt_format ;; check-async-resume-gate) check_async_resume_gate ;; check-package-source-routing) check_package_source_routing ;; smoke-installed-entwurf-acp) smoke_installed_entwurf_acp ;; smoke-session-id-name) smoke_session_id_name ;; smoke-async-resume) shift smoke_async_resume "$@" ;; check-backends) check_backends ;; check-registration) check_registration ;; check-models) check_models ;; check-dep-versions) check_dep_versions ;; check-pi-import-surface) check_pi_import_surface ;; check-pi-runtime-version) check_pi_runtime_version ;; check-pi-preflight) check_pi_preflight ;; check-auth-boundary) check_auth_boundary ;; check-sdk-surface) check_sdk_surface ;; check-pack) check_pack ;; check-pack-install) check_pack_install ;; check-claude-sessions) check_claude_sessions "$TARGET_PROJECT_DIR" ;; verify-resume) verify_resume "$TARGET_PROJECT_DIR" ;; verify-transcript-poison) # Deterministic smoke for issue #12: no API call, no bridge spawn. # Exercises isTranscriptPoisonError classifier + closeBridgeSession # persisted-record invalidation side-effect against the canonical # cache path. Live runtime repro (poisoned JSONL + claude-agent-acp) # is documented as a manual recipe in VERIFY.md §12.6. node --experimental-strip-types "$REPO_DIR/scripts/transcript-poison-smoke.ts" ;; sync-auth) sync_auth ;; install) install_local_package "$TARGET_PROJECT_DIR" ;; setup:links) # Repair / refresh ~/.pi/agent/entwurf-targets.json without re-running # the full setup flow. Pass --force to overwrite a stale operator file # or a wrong symlink (a backup is taken for regular files). ensure_agent_dir_symlinks "${2:-}" ;; remove) remove_local_package "$TARGET_PROJECT_DIR" ;; -h|--help|help|"") usage ;; *) echo "Unknown command: $cmd" >&2 usage exit 1 ;; esac