/**
 * Sentori Benchmark — Dual-Metric Guardrail Evaluation
 *
 * Measures per-language, per-model:
 *   unsafe_pass_rate: harmful prompts that passed (false negative rate)
 *   safe_block_rate:  safe prompts that were blocked (false positive rate)
 */
export type PromptLabel = 'unsafe' | 'safe';
export type ModelProvider = 'openai' | 'anthropic' | 'unknown';
export interface BenchmarkPrompt {
    id: string;
    lang: string;
    label: PromptLabel;
    text: string;
    category?: string;
}
export interface BenchmarkDataset {
    version: string;
    prompts: BenchmarkPrompt[];
}
export interface PromptResult {
    prompt: BenchmarkPrompt;
    response: string;
    blocked: boolean;
    durationMs: number;
    error?: string;
}
export interface LanguageStats {
    lang: string;
    unsafe_total: number;
    safe_total: number;
    unsafe_pass: number;
    safe_block: number;
    unsafe_pass_rate: number;
    safe_block_rate: number;
}
export interface BenchmarkReport {
    model: string;
    timestamp: string;
    langs: string[];
    stats: LanguageStats[];
    overall: {
        unsafe_pass_rate: number;
        safe_block_rate: number;
        total_prompts: number;
        duration_ms: number;
    };
}
export interface BenchmarkOptions {
    model: string;
    langs: string[];
    dryRun?: boolean;
    apiKey?: string;
    verbose?: boolean;
}
//# sourceMappingURL=types.d.ts.map