/** * src/measurement/schema.ts * * Schema for ATR benchmark measurement files. * * Every public ATR recall / precision / FP-rate claim must reference a * measurement file conforming to this schema. The contract is documented in * `data/measurements/README.md`. * * Design constraints: * - No external dependencies (no zod, no ajv). Schemas evolve slowly; the * dependency surface should not. * - Strict at the boundary. `parseMeasurement()` throws on any deviation; * it does not silently coerce, drop fields, or accept missing required * fields. * - Forward-compatible. `schema_version` is mandatory. Future readers can * decide how to handle older versions. */ /** Bump this when the schema breaks backward compatibility. */ export declare const CURRENT_SCHEMA_VERSION: "1"; /** * Core metrics that every measurement reports. * * `recall` is the most-cited number externally. `precision` and `fp_rate` * anchor the recall claim against overclaim risk. */ export interface Metrics { /** True-positive rate. matched_attacks / total_attacks. Range [0, 1]. */ recall: number; /** matched / (matched + false_positives). Range [0, 1]. */ precision: number; /** 2 * precision * recall / (precision + recall). Range [0, 1]. */ f1: number; /** false_positives / total_benign. Range [0, 1]. May be 0 if the corpus has no benign samples. */ fp_rate: number; } /** Confusion matrix. Strongly recommended; CI does not require it. */ export interface Confusion { /** True positives — adversarial sample matched by a rule. */ tp: number; /** False positives — benign sample matched by a rule. */ fp: number; /** True negatives — benign sample NOT matched by any rule. */ tn: number; /** False negatives — adversarial sample NOT matched (the recall gap). */ fn: number; } /** Engine latency profile in milliseconds. Optional. */ export interface LatencyMs { p50: number; p95: number; p99: number; mean: number; max: number; } /** * Source-defined bespoke breakdown. Each source uses this shape differently: * - garak: by attack family (dan, latentinjection, sysprompt_extraction, …) * - skill-benchmark: by layer (layer_a, layer_b, layer_c) * - mega-scan: by severity (critical, high, medium, low) * - eval-harness: by category and by difficulty * * Kept as `Record` because the structure is source-defined. * Consumers of this field must validate per-source. */ export type Breakdown = Record; /** * A single measurement run. * * Required fields are the minimum for a public-citable claim. */ export interface Measurement { /** Schema version. Currently `"1"`. */ schema_version: typeof CURRENT_SCHEMA_VERSION; /** Stable source identifier. Lowercase, hyphen-separated. Example: `"garak"`, `"pint"`, `"hh-rlhf"`. */ source: string; /** Upstream version. Example: `"v0.10.3"`, `"corpus-2026-04-15"`. */ source_version: string; /** Optional canonical URL for the exact upstream release/commit. */ source_url?: string; /** Optional upstream git SHA (for git-pinnable sources). */ source_commit?: string; /** ATR version at measurement time. Read from `package.json`. */ atr_version: string; /** ATR git commit at measurement time (short SHA). */ atr_commit: string; /** Total rule count in the engine at measurement time. */ rules_loaded: number; /** ISO 8601 UTC timestamp. Example: `"2026-05-23T03:57:58.869Z"`. */ measured_at: string; /** Total sample count in the evaluated corpus. */ samples: number; /** Core metrics. All four required. */ metrics: Metrics; /** Confusion matrix. Optional; strongly recommended. */ confusion?: Confusion; /** Engine latency profile. Optional. */ latency_ms?: LatencyMs; /** Source-defined per-category / per-family / per-severity breakdown. Optional. */ breakdown?: Breakdown; /** Free-text context. Optional. Keep brief. */ notes?: string; } /** * `latest.json` per source. Points to the most recent measurement file. * * The pointer is a relative path from the `/` directory. * Consumers should NOT cache; the file is small and re-reading is cheap. */ export interface LatestPointer { source: string; /** Filename of the latest measurement, relative to the `/` directory. */ file: string; /** Mirror of the measurement's `measured_at`, for fast inspection. */ measured_at: string; /** Mirror of `metrics` for fast aggregation without reading the underlying file. */ metrics: Metrics; /** Mirror of `source_version` and `atr_version` for fast inspection. */ source_version: string; atr_version: string; /** Mirror of `samples`. */ samples: number; } /** * Error thrown when a measurement file fails schema validation. * Includes the field path and reason for fast debugging. */ export declare class MeasurementSchemaError extends Error { readonly path: string; readonly reason: string; constructor(path: string, reason: string); } /** * Parse a `Measurement` from an arbitrary value. Throws `MeasurementSchemaError` * on any deviation from the schema. Returns a strongly-typed `Measurement`. * * Unknown extra top-level keys are allowed but ignored (forward-compat); they * are NOT preserved in the returned object. */ export declare function parseMeasurement(raw: unknown): Measurement; /** Parse a `LatestPointer`. Throws on schema violation. */ export declare function parseLatestPointer(raw: unknown): LatestPointer; /** * Compute the canonical filename for a measurement. * * Format: `_-_atr-.json` * * `source_version` and `atr_version` are slugified (lowercase, non-alphanumeric * → `-`, leading/trailing `-` removed, collapsed runs of `-`). */ export declare function measurementFilename(m: Pick): string; //# sourceMappingURL=schema.d.ts.map