/** * backtrack.ts — Backward causal chain analysis on the commit log. * * Implements **backward program slicing** (Weiser 1984, thin-slice variant): * given a starting execution step, walk backwards through read→write * dependencies to build the causal DAG that produced the data at that step. * * ## Algorithm * * BFS on the implicit dependency graph where edges run from reader → writer. * * 1. Locate startId in commitLog → root node * 2. Get keysRead for root via `getKeysRead` callback * 3. For each key read, find who last wrote it before this step → parent commit * 4. Create parent CausalNode, link to root.parents * 5. Enqueue parent. Repeat until queue empty or limits hit. * * Output is a **DAG** (not a linked list): a stage reading `creditScore` AND `dti` * from different writers has two parents. * * ## Staged Optimization * * Two writer-lookup strategies, chosen automatically by commit log size: * * | Strategy | When | Complexity per lookup | * |----------|------|----------------------| * | Linear scan | N ≤ 256 | O(N) — simple backward scan | * | Reverse index | N > 256 | O(K log N) — prebuilt key→[indices], binary search | * * The threshold (256) is chosen so the O(N) build cost of the reverse index * is amortized over the BFS traversal. Below 256, linear scan wins because * there's no index build overhead. The consumer never sees this — `causalChain()` * picks the right strategy internally (like a query optimizer choosing between * sequential scan vs index scan based on table size). * * ## Complexity * * - **Small logs (N ≤ 256):** O(V × K × N) total. V=visited, K=avg keys/node. * - **Large logs (N > 256):** O(N × U) index build + O(V × K × log N) lookups. * U = unique keys. Amortized over all BFS hops. * * ## References * * - Weiser, M. (1984). "Program Slicing." IEEE TSE. * - Sridharan, M. et al. (2007). "Thin Slicing." PLDI. * * @example * ```typescript * import { causalChain, flattenCausalDAG, formatCausalChain } from 'footprintjs/trace'; * * const dag = causalChain(commitLog, 'decide#2', (id) => recorder.getKeysRead(id)); * const flat = flattenCausalDAG(dag); // BFS-ordered flat list * console.log(formatCausalChain(dag)); // human-readable * ``` */ import type { CommitBundle, UntrackedSource } from './types.js'; /** A single node in the causal DAG. */ export interface CausalNode { /** Unique execution step identifier. */ runtimeStageId: string; /** Stable stage identifier. */ stageId: string; /** Human-readable stage name. */ stageName: string; /** Keys this stage wrote (from its CommitBundle.trace). */ keysWritten: string[]; /** The key whose read→write dependency linked this node to its child. Empty for the root. */ linkedBy: string; /** BFS depth from the starting node (0 = start). */ depth: number; /** * Parent nodes — stages this node depends on. DAG: multiple parents * possible. KEPT for compatibility; with the `controlDeps` option * (RFC-003 D3) governing deciders appear here too. For typed/keyed/ * weighted detail use {@link parentEdges}. */ parents: CausalNode[]; /** * RFC-003 D3 — one edge per dependency LINK (not per parent): a node * reading two keys from the same writer has ONE entry in {@link parents} * but TWO `'data'` edges here. Control dependencies (via the * `controlDeps` option) add a `'control'` edge to the governing decider. */ parentEdges: CausalEdge[]; /** * RFC-003 D2 honesty marker — stamped from the stage's * `CommitBundle.untrackedSources`. Present when this stage ALSO consumed * untracked read paths (`args` / `env` / unshadowed `silent` reads): the * backward slice through this node may be incomplete, because those reads * produce no read→write edge to follow. `formatCausalChain` renders this * as a `⚠ … slice may be incomplete here` line. Absent when the stage's * reads were fully tracked. */ incompleteSources?: ReadonlyArray; /** * RFC-003 D4 truncation visibility — set on the ROOT node only, and only * when a limit actually cut the slice: `byDepth` when a node at the * `maxDepth` horizon still had edges to expand, `byNodes` when the * `maxNodes` budget blocked creating a discovered parent. Absent when the * slice is complete. Dev mode (`enableDevMode()`) also warns on * truncation, and `formatCausalChain` appends a `⚠ slice truncated …` * line — a consumer must never mistake a truncated slice for a full one. */ truncated?: { byDepth: boolean; byNodes: boolean; }; } /** * RFC-003 D3 — a typed dependency edge from a child node to one parent. * * - `kind: 'data'` — read→write dependency; `key` is the state key. * - `kind: 'control'` — the parent is the decider/selector whose decision * allowed the child to run; `key` is the decide() rule label when present. * * `weight` defaults to 1.0; the `weigh` hook (RFC-003 D4) can override it. * The engine itself NEVER computes weights — semantics belong to the * consumer-injected weigher. */ export interface CausalEdge { parent: CausalNode; kind: 'data' | 'control'; key?: string; weight: number; } /** * RFC-003 D3 — a control dependency resolved for one execution step: * which decider/selector execution allowed this stage to run. */ export interface ControlDependency { /** runtimeStageId of the governing decider/selector execution step. */ deciderId: string; /** The decide() rule label for the chosen branch, when present. */ label?: string; } /** * RFC-003 D3 — callback resolving the governing decider for an execution * step. Return `undefined` when the step is not control-dependent on any * recorded decision. Build one with `controlDepRecorder()` from * `footprintjs/trace`, or supply your own. */ export type ControlDepLookup = (runtimeStageId: string) => ControlDependency | undefined; /** * RFC-003 D4 — consumer-injected edge weigher. Called once per created * edge; return a weight, or `undefined` to keep the default 1.0. The * ENGINE never computes weights (zero new dependencies — semantics like * embedding similarity or FDL influence belong to downstream libraries, * the same plug-in pattern as `NarrativeFormatter`). Weights render in * `formatCausalChain` as `← via systemPrompt (0.18)`. */ export type EdgeWeigher = (child: CausalNode, parent: CausalNode, key: string | undefined, kind: 'data' | 'control') => number | undefined; /** Options for causalChain(). */ export interface CausalChainOptions { /** Maximum BFS depth (default: 20). Prevents runaway traversal. */ maxDepth?: number; /** Maximum total nodes to visit (default: 100). Hard cap for safety. */ maxNodes?: number; /** * RFC-003 D3 — control-dependence lookup. When provided, expanding a node * ALSO links a `'control'` edge to its governing decider (labeled by the * decide() rule label when present); the decider node then expands * normally through its own data reads, so chains like * `status ← [control] ClassifyRisk ← [data: creditScore] PullBureau` * resolve end-to-end. Without this option behavior is unchanged. */ controlDeps?: ControlDepLookup; /** * RFC-003 D4 — edge weigher. Stamps `CausalEdge.weight` at edge creation; * `undefined` (or no weigher) → 1.0. See {@link EdgeWeigher}. */ weigh?: EdgeWeigher; } /** * Callback that returns the keys a stage read during execution. * The backtracker calls this for each visited node to determine * which read→write edges to follow. * * Implementors: QualityRecorder tracks keysRead per step, * or build a Map from ScopeRecorder.onRead events. */ export type KeysReadLookup = (runtimeStageId: string) => string[]; /** * Build the causal DAG rooted at `startId` by walking backwards * through read→write dependencies in the commit log. * * Automatically selects the optimal writer lookup strategy: * - Linear scan for small logs (≤ 256 commits) * - Reverse index with binary search for large logs (> 256 commits) * * Produces a DAG (not a tree): if two children both read from the same * parent, the parent node is shared (deduped by runtimeStageId). * * @param commitLog Ordered commit bundles from executor.getSnapshot().commitLog * @param startId runtimeStageId to start backtracking from * @param getKeysRead Callback returning keys read by a given execution step * @param options Depth and node limits * @returns Root CausalNode with .parents forming the DAG, or undefined if startId not found */ export declare function causalChain(commitLog: CommitBundle[], startId: string, getKeysRead: KeysReadLookup, options?: CausalChainOptions): CausalNode | undefined; /** * Flatten the causal DAG into a BFS-ordered list of nodes. * Each node appears exactly once (first occurrence by BFS order). * Useful for linear display or iteration. */ export declare function flattenCausalDAG(root: CausalNode): CausalNode[]; /** * Format a causal DAG as human-readable indented text. * Shows the dependency chain with depth indentation and linked-by keys. * * RFC-003 D2: nodes that consumed untracked read paths render an extra * `⚠ also consumed … — slice may be incomplete here` line, so a consumer * (human or LLM) debugging from the slice is TOLD when it is incomplete. * * RFC-003 D3: control edges render as `← [control: ]` * (label omitted when the decision carried none). Data rendering is * byte-identical to the pre-D3 output — `← via ` from the node's * discovery-time `linkedBy`. * * RFC-003 D4: edge weights from the `weigh` hook render as a suffix — * `← via systemPrompt (0.18)` — only when ≠ 1.0, so unweighted output is * unchanged. A truncated slice (root.truncated) appends a final * `⚠ slice truncated …` line. */ export declare function formatCausalChain(root: CausalNode): string; /** @internal Exposed for testing the strategy selection. */ export declare const _REVERSE_INDEX_THRESHOLD = 256;