import { type Assignment, type Expression, type ParameterDeclaration, type RecordExpr, type Value } from './program'; import { type AbortSignal, type AnalysisBudget } from './program-stats-support'; import { type SymDist, type SymValue } from './sym-dist'; export interface Percentiles { p5: number; p10: number; p25: number; p50: number; p75: number; p90: number; p95: number; } export interface NumberAggregateStats { mean: number; stddev: number; min: number; max: number; distribution: Map; cdf: Map; percentiles: Percentiles; count: number; } /** One descriptor per joint dimension, in canonical order. For records the * `name` is the field name; for arrays it is the stringified position * index (`"0"`, `"1"`, …). */ export interface JointDimension { name: string; } /** A single joint outcome: one decoded value per dimension (aligned with * `JointDistribution.dimensions` by index) plus its probability mass. * Numbers stay numbers and nested structure stays structured — values are * never stringified. */ export interface JointOutcome { values: Value[]; p: number; } /** * Structured form of a record / array joint distribution. Carries decoded * tuples keyed by the underlying values, so consumers never have to * `JSON.parse` a map key to recover dimension values, and numeric ordering / * binning survive. Populated wherever the legacy `joint: Map` * is populated; `truncated` mirrors `jointTruncated` (when set, `outcomes` is * empty but `dimensions` is still complete). */ export interface JointDistribution { dimensions: ReadonlyArray; outcomes: ReadonlyArray; truncated?: boolean; } export type FieldStats = { type: 'number'; mean: number; stddev: number; variance: number; mode: number[]; min: number; max: number; distribution: Map; cdf: Map; percentiles: Percentiles; skewness: number; kurtosis: number; standardError?: number; } | { type: 'boolean'; pTrue: number; standardError?: number; } | { type: 'string'; frequencies: Map; standardErrors?: Map; } | { type: 'array'; elements: FieldStats[]; aggregate?: NumberAggregateStats; joint?: Map; jointTruncated?: boolean; jointStructured?: JointDistribution; } | { type: 'record'; fields: Record; joint?: Map; jointTruncated?: boolean; jointStructured?: JointDistribution; } | { type: 'discriminated'; discriminator: 'kind' | 'shape'; variants: DiscriminatedVariant[]; } | { type: 'partial-number'; undefinedMass: number; mean: number; stddev: number; variance: number; mode: number[]; min: number; max: number; distribution: Map; cdf: Map; percentiles: Percentiles; skewness: number; kurtosis: number; standardError?: number; } | { type: 'undefined'; } | { type: 'mixed'; }; export interface DiscriminatedVariant { tag: string; probability: number; standardError?: number; keys: string[]; fields: Record; } export type Tier = 'constant' | 'exact' | 'monte-carlo'; /** The tier a completed analysis ran under, or a terminal outcome the caller * asked for via a budget. `'budget-exceeded'` is produced only when a * caller-supplied `maxAnalysisMs` / `maxAnalysisSteps` bound was hit (see * {@link AnalyzeOptions}); `classify` and `explainTier` only ever return the * three {@link Tier} values. `'aborted'` is reserved — an aborted `signal` * currently throws `AbortError` rather than producing a result. */ export type ResultTier = Tier | 'budget-exceeded' | 'aborted'; export interface AnalysisStrategy { tier: ResultTier; trials?: number; converged?: boolean; } /** * Why the exact-distribution path was abandoned for a program the classifier * had marked exact. Recorded only for *unexpected* failures — a thrown error * the exact evaluator did not anticipate (as opposed to a deliberate, typed * decline). Surfaced through {@link AnalyzeDiagnostics.fallbackReason} so an * accuracy regression in the exact engine can never ship as a silent, reason- * less downgrade to Monte Carlo. */ export interface ExactFailure { /** Short human-readable description (typically the thrown error's message). */ reason: string; /** The original thrown value, for callers that want the stack/type. */ cause?: unknown; } export interface AnalyzeDiagnostics { classifyTimeMs: number; analyzeTimeMs: number; jointSizeMax?: number; fellBackToMC: boolean; /** Set when `fellBackToMC` is true *because the exact path threw an * unexpected error* (not merely because the program is out of exact scope). * Absent for ordinary, expected fallbacks. See {@link ExactFailure}. */ fallbackReason?: string; /** Which budget limit fired, present only on a `tier: 'budget-exceeded'` * result. `'time'` for `maxAnalysisMs`, `'steps'` for `maxAnalysisSteps`. */ budgetExceeded?: 'time' | 'steps'; } export interface BindingStats { /** Source kind of the binding. `'assignment'` for `$name = expr` * statements; `'parameter'` for `$name is { … }` declarations. */ kind: 'assignment' | 'parameter'; /** Marginal distribution of the binding's resolved value. Computed * against the same analysis tier as the program's final stats. */ stats: FieldStats; } export interface AnalyzeResult { stats: FieldStats; strategy: AnalysisStrategy; diagnostics: AnalyzeDiagnostics; /** Per-binding marginals for every top-level `$assignment` and * `$parameter`. Present only when `perBinding: true` was passed. * Keyed by binding name (no `$` prefix). * * Tier asymmetry: in the exact tier, a binding whose intermediate * SymDist couldn't be represented (e.g. its joint over random * sources exceeded the joint-size cap) is *omitted* from this map — * the program-level final stats can still succeed because the * binding may participate only through an aggregation. The Monte * Carlo tier captures all bindings unconditionally via per-trial * hooks, so the same program analyzed under MC will include those * bindings. There's no flag to force MC just to populate them in * v1; if you need uniform coverage, defeat the exact path * externally (e.g. via a random count). */ perBinding?: ReadonlyMap; } export interface TierExplanationContributor { /** Human-readable location, e.g. 'statement 3'. */ location?: string; /** AST node type string, e.g. 'repeat-expr'. */ nodeType: string; /** Why this node prevented exact analysis. */ cause: string; } export interface TierExplanation { tier: 'constant' | 'exact' | 'monte-carlo'; /** One-sentence summary of why this tier was chosen. */ reason: string; /** * Per-node contributions explaining what blocked exact analysis. * Empty or omitted for exact/constant programs. */ contributors?: TierExplanationContributor[]; } export interface NumericComparison { probabilityAGreaterThanB: number; probabilityAEqualsB: number; probabilityALessThanB: number; totalVariationDistance: number; /** D(a || b); may be Infinity when supports don't overlap. */ klDivergenceAFromB: number; /** D(b || a); may be Infinity when supports don't overlap. */ klDivergenceBFromA: number; /** mean(a) - mean(b). */ meanDiff: number; /** stddev(a) - stddev(b). */ stddevDiff: number; } export interface CompareResult { a: AnalyzeResult; b: AnalyzeResult; /** * Populated when both programs produce a numeric output. Absent otherwise. */ numeric?: NumericComparison; } export interface AnalyzeOptions { trials?: number; maxTrials?: number; minTrials?: number; batchSize?: number; targetRelativeError?: number; targetBinStderr?: number; signal?: AbortSignal; parameters?: Record; /** When true, `AnalyzeResult.perBinding` is populated with marginals * for every top-level `$assignment` and `$parameter`. Defaults to * false. Loop binders (`for $x in …`) and nested-scope assignments * are not surfaced. * * Cost: * - Exact tier: free — the per-binding SymDists are already in the * analyzer's environment; we just convert them to FieldStats. * - Monte Carlo tier: light per-trial bookkeeping (one `onAssignment` * / `onParameter` callback per top-level binder execution) plus * O(trials × bindings) memory — every sampled value is retained * until aggregation. For programs with array/record-valued * bindings and large `maxTrials`, plan for the retained-value * footprint. * - Constant tier: a single evaluation captures every binder. */ perBinding?: boolean; /** When true, an *unexpected* error thrown by the exact-distribution path * propagates out of `analyze`/`analyzeAsync` instead of being swallowed * into a silent Monte Carlo fallback. Use it in tests and CI to catch * regressions in the exact engine. Deliberate, typed declines * ({@link ExactFailure} via `UnsupportedExactAnalysis`) and semantic * signals (undefined outcome, divergent explosion) still fall back even * under strict mode, because Monte Carlo represents those correctly. * Defaults to false. */ strictExact?: boolean; /** Evaluation-wide loop budget for the Monte Carlo / constant evaluator: the * maximum number of loop iterations a single trial may perform, summed * across `repeat`, comprehension, `fold`, and the per-die explode / reroll / * compound chains. Bounds the synchronous work (and memory) any one trial * can do, so a pathological program (`1d6 explode 50000000 times on 1 or * more`, deeply nested loops) raises a clean error instead of blocking the * thread. Lower it when running on the UI thread or behind a watchdog. * Defaults to 10,000,000 (see `EvaluatorOptions.maxTotalIterations`). */ maxTotalIterations?: number; /** Per-loop cap on a single `repeat` count for the evaluator. Defaults to * 10,000 (see `EvaluatorOptions.maxRepeatIterations`). */ maxRepeatIterations?: number; /** Wall-clock budget, in milliseconds, for the whole analysis. The exact * engine polls a deadline at a bounded cadence (every AST node + every * construction-loop iteration), so unlike a `Promise.race` timeout it * actually bounds *synchronous* CPU — the primitive for analyzing untrusted * input where the host can't kill a thread (e.g. a Cloudflare Worker). When * the deadline is hit, `analyze` returns a `tier: 'budget-exceeded'` result * (`stats.type: 'undefined'`, `diagnostics.budgetExceeded: 'time'`) instead * of throwing or hanging. Unset = no time bound. See {@link budgetScope}. */ maxAnalysisMs?: number; /** Deterministic step budget for the whole analysis: the maximum number of * exact-engine steps (AST nodes + construction-loop iterations) before the * analysis stops with a `tier: 'budget-exceeded'` result * (`diagnostics.budgetExceeded: 'steps'`). Unlike {@link maxAnalysisMs} it * is independent of machine speed, so it's the right bound for reproducible * tests and for a hard cap that doesn't vary with load. Unset = no step * bound. */ maxAnalysisSteps?: number; /** What {@link maxAnalysisMs} / {@link maxAnalysisSteps} bound. * - `'analysis'` (default): the budget bounds the *entire* call — the exact * engine and any Monte Carlo fallback. Hitting it stops immediately with a * `tier: 'budget-exceeded'` result and never (continues to) sample. This * is the guaranteed fast "no" for crawler-facing / untrusted surfaces. * - `'exact'`: the budget bounds only the exact engine. Hitting it declines * exact and falls back to Monte Carlo, which then runs under its own * `maxTrials` / `signal` / `convergenceTimeout` (the time budget does not * apply to it). Use when an approximate sampled curve is preferable to no * curve. */ budgetScope?: 'analysis' | 'exact'; } export interface AnalyzeAsyncOptions extends AnalyzeOptions { yieldEvery?: number; yieldEveryMs?: number; convergenceTimeout?: number; } export interface FieldConvergence { path: string; converged: boolean; samples: number; relativeError: number | null; } export interface AsyncProgress { stats: FieldStats; trials: number; converged: boolean; fieldConvergence?: FieldConvergence[]; /** Present only on the terminal `tier: 'budget-exceeded'` progress yielded * when a `maxAnalysisMs` / `maxAnalysisSteps` bound is hit. Carries the * result tier and the budget diagnostics so async callers can degrade the * same way the sync `analyze` result lets them. Absent on normal progress. */ strategy?: AnalysisStrategy; /** Per-binding marginals, present only when `perBinding: true` was * passed in `analyzeAsync`. On the Monte Carlo path the contents are * rebuilt each yield from the samples gathered so far — for large * `yieldEvery` × `trials` the rebuild cost adds up; raise * `yieldEvery` (or `yieldEveryMs`) to amortise it. */ perBinding?: ReadonlyMap; } export type IidFieldSpec = { key: string; aggregator: 'sum' | 'count' | 'product'; body: Expression | undefined; filter: Expression | undefined; binder: string; }; export interface ExprAnalysis { random: boolean; randomVarsUsed: Set; exactDist: (() => FieldStats | null) | null; symDist: (() => SymDist | null) | null; } export interface AnalysisEnv { bindings: Map; symBindings: Map | null>; boundExpressions: Map; useCounts: Map; nextSourceId: { value: number; }; diceRegistry: Map; strictExact: boolean; exactFailure: { value: ExactFailure | null; }; budget: AnalysisBudget; } /** * Inventory of top-level binders exposed to `perBinding` consumers. Only * the program's top-level statement list is walked — assignments nested * inside `repeat`, comprehension, fold, `if`/`match` branches, or any * other expression body are intentionally absent (their lifetimes don't * collapse to a single distribution). */ export interface TopLevelBindings { /** Top-level `$name = expr` statements, keyed by `name`. */ assignments: Map; /** Top-level `$name is { … }` declarations, keyed by `name`. */ parameters: Map; } export interface RecordShape { keys: string; kind: string | null; } export interface ProgramAnalysisResult { analysis: ExprAnalysis; /** Per-name SymDists for top-level bindings, in source order. Used by * the exact-tier `perBinding` extractor; `null` entries are bindings * whose distribution couldn't be exactly represented. */ symBindings: Map | null>; /** Boxed sink populated lazily when an `exactDist()` thunk swallows an * unexpected exact-path error in non-strict mode. Read after invoking the * thunk to learn why the exact path produced `null`. */ exactFailure: { value: ExactFailure | null; }; } export interface VariantBranch { recordExpr: RecordExpr; condsAlongPath: Array<{ cond: ExprAnalysis; truth: boolean; }>; } export type ResolvedFilter = { type: 'drop' | 'keep'; dir: 'low' | 'high' | 'middle'; value: number; }; /** * Per-binding sample buffers accumulated during a single `analyze` / * `analyzeAsync` run. Only top-level binders are populated; entries that * are never reached (which currently only happens in pathological MC * runs with zero trials) are skipped at `buildPerBindingFromCapture` * time. */ export interface BindingCapture { assignments: Map; parameters: Map; } /** Exported only for equivalence tests of the weighted aggregator (internal). */ export interface ConvergenceConfig { targetRelativeError: number; targetBinStderr: number; minTrials: number; } export interface MonteCarloPlan { fixedTrials: number | null; maxTrials: number; minTrials: number; batchSize: number; yieldEvery: number; yieldEveryMs: number | null; convergenceTimeout: number | null; config: ConvergenceConfig; signal: AbortSignal | undefined; parameters: Record | undefined; }