/**
 * SKILL.md Benchmark Harness
 *
 * Evaluates the ATR scanSkill() method against a labeled corpus of
 * malicious and benign SKILL.md files. Produces per-layer recall,
 * overall precision, and a detailed per-sample report.
 *
 * Corpus: data/skill-benchmark/manifest.json
 * Samples: data/skill-benchmark/malicious/ and data/skill-benchmark/benign/
 *
 * Layers:
 *   A = obvious payload (curl|bash, base64 exec, reverse shell)
 *   B = obfuscated (bash expansion, paste service relay, encoded)
 *   C = semantic (natural language instructions, social engineering)
 *
 * @module agent-threat-rules/eval/skill-benchmark
 */
interface SampleResult {
    readonly file: string;
    readonly label: 'malicious' | 'benign';
    readonly layer: string;
    readonly attack_type: string;
    readonly detected: boolean;
    readonly rules_fired: readonly string[];
    readonly correct: boolean;
    readonly latency_ms: number;
    readonly expected_rules_matched: boolean;
    readonly category_correct: boolean;
}
interface LayerMetrics {
    readonly total: number;
    readonly detected: number;
    readonly recall: number;
}
interface SkillBenchmarkReport {
    readonly timestamp: string;
    readonly corpus_size: number;
    readonly malicious_count: number;
    readonly benign_count: number;
    readonly overall_recall: number;
    readonly overall_precision: number;
    readonly overall_f1: number;
    readonly fp_rate: number;
    readonly layer_a: LayerMetrics;
    readonly layer_b: LayerMetrics;
    readonly layer_c: LayerMetrics;
    readonly true_positives: number;
    readonly false_positives: number;
    readonly true_negatives: number;
    readonly false_negatives: number;
    readonly expected_rules_accuracy: number;
    readonly category_accuracy: number;
    readonly avg_latency_ms: number;
    readonly max_latency_ms: number;
    readonly results: readonly SampleResult[];
    readonly missed_attacks: readonly SampleResult[];
    readonly false_alarms: readonly SampleResult[];
}
export declare function runSkillBenchmark(options?: {
    readonly rulesDir?: string;
    readonly corpusDir?: string;
    readonly outputPath?: string;
}): Promise<SkillBenchmarkReport>;
/**
 * Write the standardized version-pinned Measurement file for a SkillBenchmark
 * report. Separated from runSkillBenchmark so unit tests can exercise the
 * benchmark logic without mutating data/measurements/ on disk (which would
 * make the CI 'sync-stats --check' drift gate flake).
 *
 * Called from the CLI block at the bottom of this file and from any
 * external script that wants to persist the measurement. Safe to call
 * repeatedly the same day — uses force=true.
 */
export declare function writeSkillBenchmarkMeasurement(report: SkillBenchmarkReport): void;
export declare function printReport(report: SkillBenchmarkReport): void;
export {};
//# sourceMappingURL=skill-benchmark.d.ts.map