/**
 * src/measurement/schema.ts
 *
 * Schema for ATR benchmark measurement files.
 *
 * Every public ATR recall / precision / FP-rate claim must reference a
 * measurement file conforming to this schema. The contract is documented in
 * `data/measurements/README.md`.
 *
 * Design constraints:
 *   - No external dependencies (no zod, no ajv). Schemas evolve slowly; the
 *     dependency surface should not.
 *   - Strict at the boundary. `parseMeasurement()` throws on any deviation;
 *     it does not silently coerce, drop fields, or accept missing required
 *     fields.
 *   - Forward-compatible. `schema_version` is mandatory. Future readers can
 *     decide how to handle older versions.
 */
/** Bump this when the schema breaks backward compatibility. */
export declare const CURRENT_SCHEMA_VERSION: "1";
/**
 * Core metrics that every measurement reports.
 *
 * `recall` is the most-cited number externally. `precision` and `fp_rate`
 * anchor the recall claim against overclaim risk.
 */
export interface Metrics {
    /** True-positive rate. matched_attacks / total_attacks. Range [0, 1]. */
    recall: number;
    /** matched / (matched + false_positives). Range [0, 1]. */
    precision: number;
    /** 2 * precision * recall / (precision + recall). Range [0, 1]. */
    f1: number;
    /** false_positives / total_benign. Range [0, 1]. May be 0 if the corpus has no benign samples. */
    fp_rate: number;
}
/** Confusion matrix. Strongly recommended; CI does not require it. */
export interface Confusion {
    /** True positives — adversarial sample matched by a rule. */
    tp: number;
    /** False positives — benign sample matched by a rule. */
    fp: number;
    /** True negatives — benign sample NOT matched by any rule. */
    tn: number;
    /** False negatives — adversarial sample NOT matched (the recall gap). */
    fn: number;
}
/** Engine latency profile in milliseconds. Optional. */
export interface LatencyMs {
    p50: number;
    p95: number;
    p99: number;
    mean: number;
    max: number;
}
/**
 * Source-defined bespoke breakdown. Each source uses this shape differently:
 *   - garak: by attack family (dan, latentinjection, sysprompt_extraction, …)
 *   - skill-benchmark: by layer (layer_a, layer_b, layer_c)
 *   - mega-scan: by severity (critical, high, medium, low)
 *   - eval-harness: by category and by difficulty
 *
 * Kept as `Record<string, unknown>` because the structure is source-defined.
 * Consumers of this field must validate per-source.
 */
export type Breakdown = Record<string, unknown>;
/**
 * A single measurement run.
 *
 * Required fields are the minimum for a public-citable claim.
 */
export interface Measurement {
    /** Schema version. Currently `"1"`. */
    schema_version: typeof CURRENT_SCHEMA_VERSION;
    /** Stable source identifier. Lowercase, hyphen-separated. Example: `"garak"`, `"pint"`, `"hh-rlhf"`. */
    source: string;
    /** Upstream version. Example: `"v0.10.3"`, `"corpus-2026-04-15"`. */
    source_version: string;
    /** Optional canonical URL for the exact upstream release/commit. */
    source_url?: string;
    /** Optional upstream git SHA (for git-pinnable sources). */
    source_commit?: string;
    /** ATR version at measurement time. Read from `package.json`. */
    atr_version: string;
    /** ATR git commit at measurement time (short SHA). */
    atr_commit: string;
    /** Total rule count in the engine at measurement time. */
    rules_loaded: number;
    /** ISO 8601 UTC timestamp. Example: `"2026-05-23T03:57:58.869Z"`. */
    measured_at: string;
    /** Total sample count in the evaluated corpus. */
    samples: number;
    /** Core metrics. All four required. */
    metrics: Metrics;
    /** Confusion matrix. Optional; strongly recommended. */
    confusion?: Confusion;
    /** Engine latency profile. Optional. */
    latency_ms?: LatencyMs;
    /** Source-defined per-category / per-family / per-severity breakdown. Optional. */
    breakdown?: Breakdown;
    /** Free-text context. Optional. Keep brief. */
    notes?: string;
}
/**
 * `latest.json` per source. Points to the most recent measurement file.
 *
 * The pointer is a relative path from the `<source>/` directory.
 * Consumers should NOT cache; the file is small and re-reading is cheap.
 */
export interface LatestPointer {
    source: string;
    /** Filename of the latest measurement, relative to the `<source>/` directory. */
    file: string;
    /** Mirror of the measurement's `measured_at`, for fast inspection. */
    measured_at: string;
    /** Mirror of `metrics` for fast aggregation without reading the underlying file. */
    metrics: Metrics;
    /** Mirror of `source_version` and `atr_version` for fast inspection. */
    source_version: string;
    atr_version: string;
    /** Mirror of `samples`. */
    samples: number;
}
/**
 * Error thrown when a measurement file fails schema validation.
 * Includes the field path and reason for fast debugging.
 */
export declare class MeasurementSchemaError extends Error {
    readonly path: string;
    readonly reason: string;
    constructor(path: string, reason: string);
}
/**
 * Parse a `Measurement` from an arbitrary value. Throws `MeasurementSchemaError`
 * on any deviation from the schema. Returns a strongly-typed `Measurement`.
 *
 * Unknown extra top-level keys are allowed but ignored (forward-compat); they
 * are NOT preserved in the returned object.
 */
export declare function parseMeasurement(raw: unknown): Measurement;
/** Parse a `LatestPointer`. Throws on schema violation. */
export declare function parseLatestPointer(raw: unknown): LatestPointer;
/**
 * Compute the canonical filename for a measurement.
 *
 * Format: `<YYYY-MM-DD>_<source>-<source_version>_atr-<atr_version>.json`
 *
 * `source_version` and `atr_version` are slugified (lowercase, non-alphanumeric
 * → `-`, leading/trailing `-` removed, collapsed runs of `-`).
 */
export declare function measurementFilename(m: Pick<Measurement, "measured_at" | "source" | "source_version" | "atr_version">): string;
//# sourceMappingURL=schema.d.ts.map