/**
 * Golden Dataset for Drift Detection Evaluation
 *
 * This dataset contains labeled test cases for evaluating the accuracy
 * of semantic comparison in drift detection. Each case specifies whether
 * two texts should be considered semantically equivalent.
 *
 * Categories:
 * - TRUE POSITIVES: Different phrasing, same meaning (should match)
 * - TRUE NEGATIVES: Different meaning (should not match)
 * - EDGE CASES: Boundary conditions and special scenarios
 *
 * To add new test cases:
 * 1. Add to appropriate section below
 * 2. Run `bellwether eval` to verify accuracy
 * 3. If test fails unexpectedly, either fix algorithm or adjust test case
 */
import type { GoldenTestCase } from './types.js';
/**
 * Dataset version history:
 * - 1.0.0: Initial 50 test cases
 * - 2.0.0: Phase 3 expansion with 150+ additional cases
 */
export declare const DATASET_VERSION = "2.0.0";
/**
 * Full golden dataset combining core and expanded cases.
 * Total: 150+ labeled test cases for comprehensive evaluation.
 */
export declare const GOLDEN_DATASET: GoldenTestCase[];
export declare const SECURITY_CASES: GoldenTestCase[];
export declare const LIMITATION_CASES: GoldenTestCase[];
export declare const ASSERTION_CASES: GoldenTestCase[];
/**
 * Get comprehensive statistics about the golden dataset.
 */
export declare function getDatasetStatistics(): {
    version: string;
    totalCases: number;
    coreCases: number;
    expandedCases: number;
    truePositives: number;
    trueNegatives: number;
    byCategory: {
        security: number;
        limitation: number;
        assertion: number;
        edge: number;
    };
    byTag: Record<string, number>;
    expanded: {
        totalCases: number;
        byCategory: Record<string, number>;
        byTag: Record<string, number>;
        truePositives: number;
        trueNegatives: number;
    };
};
//# sourceMappingURL=golden-dataset.d.ts.map