/** * Prompt Injection Detection — Scans input content for manipulation attempts. * * Detects patterns in user messages, file contents, web fetches, and knowledge * base documents that try to override the agent's instructions. * * Two attack categories: * - Direct: user explicitly tries to override ("ignore previous instructions") * - Indirect: embedded in external content (files, web pages) that the agent reads * * Detection is pattern-based (fast, no LLM call). Not exhaustive, but catches * the obvious attacks with low false-positive rates. */ export interface InjectionPattern { /** Unique identifier */ id: string; /** Human-readable description */ description: string; /** Regex pattern (case-insensitive) */ pattern: RegExp; /** Severity: low (suspicious), medium (likely), high (definite) */ severity: 'low' | 'medium' | 'high'; /** Category of attack */ category: 'instruction-override' | 'role-hijack' | 'system-prompt-leak' | 'data-exfiltration'; } /** * Built-in prompt injection patterns. * Ordered by severity (high first). */ export declare const INJECTION_PATTERNS: InjectionPattern[]; /** Result of scanning content for injection */ export interface InjectionDetectionResult { /** Whether any injection was detected */ detected: boolean; /** All matches found */ matches: InjectionMatch[]; /** Highest severity found */ maxSeverity: 'none' | 'low' | 'medium' | 'high'; /** Summary message for the user/agent */ summary: string; } /** A single injection match */ export interface InjectionMatch { patternId: string; description: string; severity: 'low' | 'medium' | 'high'; category: string; /** The text that matched */ matchedText: string; /** Where the content came from (if known) */ source?: string; } /** * Scan text content for prompt injection patterns. * * @param content - Text to scan * @param source - Optional label for where the content came from (e.g., "file: README.md") * @param patterns - Optional custom patterns (defaults to INJECTION_PATTERNS) * @returns Detection result with all matches */ export declare function detectInjection(content: string, source?: string, patterns?: InjectionPattern[]): InjectionDetectionResult; /** * Scan multiple content sources and aggregate results. */ export declare function detectInjectionMultiple(sources: Array<{ content: string; label: string; }>): InjectionDetectionResult;