/** * Golden Dataset for Drift Detection Evaluation * * This dataset contains labeled test cases for evaluating the accuracy * of semantic comparison in drift detection. Each case specifies whether * two texts should be considered semantically equivalent. * * Categories: * - TRUE POSITIVES: Different phrasing, same meaning (should match) * - TRUE NEGATIVES: Different meaning (should not match) * - EDGE CASES: Boundary conditions and special scenarios * * To add new test cases: * 1. Add to appropriate section below * 2. Run `bellwether eval` to verify accuracy * 3. If test fails unexpectedly, either fix algorithm or adjust test case */ import type { GoldenTestCase } from './types.js'; /** * Dataset version history: * - 1.0.0: Initial 50 test cases * - 2.0.0: Phase 3 expansion with 150+ additional cases */ export declare const DATASET_VERSION = "2.0.0"; /** * Full golden dataset combining core and expanded cases. * Total: 150+ labeled test cases for comprehensive evaluation. */ export declare const GOLDEN_DATASET: GoldenTestCase[]; export declare const SECURITY_CASES: GoldenTestCase[]; export declare const LIMITATION_CASES: GoldenTestCase[]; export declare const ASSERTION_CASES: GoldenTestCase[]; /** * Get comprehensive statistics about the golden dataset. */ export declare function getDatasetStatistics(): { version: string; totalCases: number; coreCases: number; expandedCases: number; truePositives: number; trueNegatives: number; byCategory: { security: number; limitation: number; assertion: number; edge: number; }; byTag: Record; expanded: { totalCases: number; byCategory: Record; byTag: Record; truePositives: number; trueNegatives: number; }; }; //# sourceMappingURL=golden-dataset.d.ts.map