/** * Sentori Benchmark — Dual-Metric Guardrail Evaluation * * Measures per-language, per-model: * unsafe_pass_rate: harmful prompts that passed (false negative rate) * safe_block_rate: safe prompts that were blocked (false positive rate) */ export type PromptLabel = 'unsafe' | 'safe'; export type ModelProvider = 'openai' | 'anthropic' | 'unknown'; export interface BenchmarkPrompt { id: string; lang: string; label: PromptLabel; text: string; category?: string; } export interface BenchmarkDataset { version: string; prompts: BenchmarkPrompt[]; } export interface PromptResult { prompt: BenchmarkPrompt; response: string; blocked: boolean; durationMs: number; error?: string; } export interface LanguageStats { lang: string; unsafe_total: number; safe_total: number; unsafe_pass: number; safe_block: number; unsafe_pass_rate: number; safe_block_rate: number; } export interface BenchmarkReport { model: string; timestamp: string; langs: string[]; stats: LanguageStats[]; overall: { unsafe_pass_rate: number; safe_block_rate: number; total_prompts: number; duration_ms: number; }; } export interface BenchmarkOptions { model: string; langs: string[]; dryRun?: boolean; apiKey?: string; verbose?: boolean; } //# sourceMappingURL=types.d.ts.map