/** * Evaluation Harness -- orchestrates running the corpus through the ATR engine * and produces a structured EvalReport. * * Supports: * - Regex-only evaluation (Tier 2) * - Regex + Embedding evaluation (Tier 2 + 2.5) * - Full pipeline evaluation (all tiers) * - Per-sample latency measurement * - Regression check against baseline thresholds * * @module agent-threat-rules/eval/eval-harness */ import type { CorpusSample } from './corpus.js'; import { getCorpusStats } from './corpus.js'; import type { EvalReport, BaselineThresholds, RegressionCheck } from './metrics.js'; import type { RuleQualityReport } from './rule-metrics.js'; export interface EvalConfig { /** Directory containing ATR YAML rules */ readonly rulesDir: string; /** Path to attack embeddings JSON (optional, for Tier 2.5) */ readonly embeddingsPath?: string; /** Custom corpus (defaults to built-in EVAL_CORPUS) */ readonly corpus?: readonly CorpusSample[]; /** Baseline thresholds for regression check */ readonly thresholds?: BaselineThresholds; /** Path to save report JSON */ readonly outputPath?: string; /** Enable Tier 2.5 embedding evaluation (default: auto-detect) */ readonly enableEmbedding?: boolean; } /** * Run the full evaluation harness. * Returns the EvalReport and RegressionCheck. */ export declare function runEval(config: EvalConfig): Promise<{ report: EvalReport; regression: RegressionCheck; corpusStats: ReturnType; tiersUsed: readonly string[]; ruleQuality: RuleQualityReport; }>; export declare function runEvalCLI(): Promise; //# sourceMappingURL=eval-harness.d.ts.map