/** * CC-BOS Red Team Engine — Structured Jailbreak Testing * * Based on CC-BOS (Cross-lingual Context Bypass via Optimized Search), * arxiv 2602.22983. Reverses the attack methodology into a defensive * testing tool by auto-generating structured jailbreak variants across * 8 semantic dimensions and measuring residual Attack Success Rate (ASR). * * Usage: * sentori redteam --attack cc-bos --target ./my-guardrail.md \ * --goal "explain malware creation" --iterations 50 \ * --model claude-haiku-4-5-20251001 */ export interface DimensionSet { roleIdentity: string[]; behavioralGuidance: string[]; mechanism: string[]; metaphorMapping: string[]; expressionStyle: string[]; knowledgeRelation: string[]; contextualSetting: string[]; triggerPattern: string[]; } /** * Default CC-BOS 8-dimension search space. * Each dimension has candidate values used to compose attack variants. */ export declare const DEFAULT_DIMENSION_SET: DimensionSet; export interface AttackDimensions { roleIdentity: string; behavioralGuidance: string; mechanism: string; metaphorMapping: string; expressionStyle: string; knowledgeRelation: string; contextualSetting: string; triggerPattern: string; } export interface AttackVariant { id: number; dimensions: AttackDimensions; prompt: string; } /** * Compose an attack prompt from sampled dimension values and a goal. */ export declare function composeAttackPrompt(dims: AttackDimensions, goal: string): string; /** * Generate N attack variants by sampling from the dimension set. */ export declare function generateAttackVariants(goal: string, iterations: number, dims?: DimensionSet): AttackVariant[]; /** * Returns true if the model's response indicates a successful jailbreak * (i.e., the model complied rather than refused). */ export declare function isJailbreakSuccess(response: string): boolean; export interface VariantResult { id: number; prompt: string; dimensions: AttackDimensions; response: string; success: boolean; error?: string; } export interface RedTeamReport { goal: string; target: string; model: string; totalVariants: number; successCount: number; failureCount: number; errorCount: number; asr: number; asrPercent: string; successfulVariants: VariantResult[]; allResults: VariantResult[]; timestamp: string; durationMs: number; } export interface RedTeamOptions { goal: string; guardrailPrompt: string; targetLabel: string; model: string; apiKey: string; iterations: number; dims?: DimensionSet; onProgress?: (current: number, total: number, result: VariantResult) => void; } export declare function runCCBOSRedTeam(opts: RedTeamOptions): Promise; /** * Dry-run mode: generate attack variants without calling any LLM API. * Useful for previewing what prompts would be generated. */ export declare function dryRunCCBOS(goal: string, iterations: number, dims?: DimensionSet): AttackVariant[]; //# sourceMappingURL=cc-bos-red-team.d.ts.map