import type { AssertionHandle, AssertionResult, AssertionSeverity, EveEvalTaskResult } from "#evals/types.js"; /** * Outcome of evaluating one assertion: a 0–1 score (boolean assertions use * exactly 0 or 1) with optional human-readable detail and metadata. */ export interface AssertionOutcome { readonly score: number; readonly message?: string; readonly metadata?: Readonly>; } /** * A run-level assertion (e.g. `t.completed()`), evaluated lazily against the * final task result after `test(t)` returns. The evaluation is deferred so * the assertion always sees the complete run regardless of call order. */ export interface RunAssertion { readonly name: string; evaluate(result: EveEvalTaskResult): AssertionOutcome | Promise; } /** * Collects the assertions recorded by an eval's `test(t)`. Run-level * assertions register a deferred spec; value/judge assertions evaluate their * captured value immediately (the value is ephemeral) and register the pending * promise. {@link finalize} resolves everything against the final result and * produces the ordered {@link AssertionResult} list the verdict reads. */ export declare class AssertionCollector { #private; /** Register a run-level assertion evaluated against the final result. */ recordRun(spec: RunAssertion, severity?: AssertionSeverity): AssertionHandle; /** Register a value/judge assertion, evaluating the captured value now. */ recordValue(input: { readonly name: string; readonly severity: AssertionSeverity; readonly threshold?: number; readonly score: () => Promise; }): AssertionHandle; /** * Awaits every pending value/judge assertion, evaluates the deferred * run-level assertions against `result`, and returns the recorded results. */ finalize(result: EveEvalTaskResult): Promise; }