/** * Semantic comparison utilities for drift detection. * * This module provides robust comparison that handles LLM non-determinism * by normalizing text and extracting structured facts rather than comparing * raw prose strings. */ import type { ChangeConfidence } from './types.js'; /** * Security finding categories (normalized). * These map to common vulnerability patterns. * Extended to include additional security categories like XXE, timing attacks, etc. */ export declare const SECURITY_CATEGORIES: readonly ["path_traversal", "command_injection", "sql_injection", "xss", "xxe", "ssrf", "deserialization", "timing_attack", "race_condition", "file_upload", "access_control", "authentication", "authorization", "information_disclosure", "denial_of_service", "input_validation", "output_encoding", "cryptography", "session_management", "error_handling", "logging", "configuration", "prototype_pollution", "open_redirect", "clickjacking", "cors", "csp_bypass", "other"]; export type SecurityCategory = typeof SECURITY_CATEGORIES[number]; /** * Limitation categories (normalized). */ export declare const LIMITATION_CATEGORIES: readonly ["size_limit", "rate_limit", "timeout", "encoding", "format", "permission", "platform", "dependency", "concurrency", "memory", "network", "other"]; export type LimitationCategory = typeof LIMITATION_CATEGORIES[number]; /** * Structured security finding. */ export interface StructuredSecurityFinding { category: SecurityCategory; tool: string; severity: 'low' | 'medium' | 'high' | 'critical'; description: string; } /** * Structured limitation. */ export interface StructuredLimitation { category: LimitationCategory; tool: string; constraint?: string; description: string; } /** * Normalized assertion for comparison. */ export interface NormalizedAssertion { tool: string; aspect: string; fingerprint: string; description: string; isPositive: boolean; } /** * Extract security category from text. */ export declare function extractSecurityCategory(text: string): SecurityCategory; /** * Extract limitation category from text. */ export declare function extractLimitationCategory(text: string): LimitationCategory; /** * Extract severity from text. * Now uses negation-aware extraction to handle phrases like "not critical". */ export declare function extractSeverity(text: string): 'low' | 'medium' | 'high' | 'critical'; /** * Create a normalized fingerprint from assertion text. * This extracts key semantic elements for comparison. * * For assertions about limitations or security, we primarily use * the category to ensure semantic equivalence (e.g., "10MB limit" and * "files larger than 10 megabytes" both get category 'size_limit'). */ export declare function createFingerprint(tool: string, aspect: string, text: string): string; /** * Convert raw security notes to structured findings. */ export declare function structureSecurityNotes(tool: string, notes: string[]): StructuredSecurityFinding[]; /** * Convert raw limitations to structured limitations. */ export declare function structureLimitations(tool: string, limitations: string[]): StructuredLimitation[]; /** * Compare two structured security findings. * Returns true if they represent the same finding. */ export declare function securityFindingsMatch(a: StructuredSecurityFinding, b: StructuredSecurityFinding): boolean; /** * Compare two structured security findings with confidence. * Returns a confidence score indicating how similar they are. * * ENHANCED (v1.1.0): Uses multi-category detection and relationship scoring * to improve recall. Categories that are related (e.g., authentication and * authorization) now get partial credit instead of 0%. * * ENHANCED (v1.2.0): Added qualifier comparison to prevent false positives from: * - Negation mismatches ("Critical vulnerability found" vs "Not a critical vulnerability") * - Database type mismatches (SQL injection vs NoSQL injection) * * ENHANCED (v1.3.0): Improved recall by: * - Adding synonym-based similarity detection * - Relaxing severity mismatch (no longer blocks matching) * - Lowering thresholds when shared security terms are found * - Better handling of abbreviations (SQLi, XSS, SSRF) */ export declare function securityFindingsMatchWithConfidence(a: StructuredSecurityFinding, b: StructuredSecurityFinding): { matches: boolean; confidence: ChangeConfidence; }; /** * Compare two structured limitations. * Returns true if they represent the same limitation. */ export declare function limitationsMatch(a: StructuredLimitation, b: StructuredLimitation): boolean; /** * Compare two structured limitations with confidence. * Returns a confidence score indicating how similar they are. * * ENHANCED (v1.1.0): Uses multi-category detection and relationship scoring * to improve recall for limitation paraphrases. * * ENHANCED (v1.2.0): Added qualifier comparison to prevent false positives from: * - Direction mismatches (upload limit vs download limit) * - Timeout type mismatches (connection timeout vs read timeout) * - Rate time unit mismatches (per minute vs per hour) * * ENHANCED (v1.3.0): Improved recall by: * - Adding synonym-based similarity for limitation descriptions * - Time expression normalization (30s = 30 seconds) * - Relaxed matching thresholds while maintaining constraint validation * * IMPORTANT: Two limitations with the same category but significantly different * constraint values (e.g., 10MB vs 100MB) are NOT considered matching. */ export declare function limitationsMatchWithConfidence(a: StructuredLimitation, b: StructuredLimitation): { matches: boolean; confidence: ChangeConfidence; }; /** * Compare two normalized assertions. * Returns true if they have the same fingerprint. */ export declare function assertionsMatch(a: NormalizedAssertion, b: NormalizedAssertion): boolean; /** * Compare two normalized assertions with confidence. * Returns a confidence score indicating how similar they are. * * ENHANCED (v1.2.0): Added qualifier comparison to prevent false positives from: * - Opposite terms (synchronous vs asynchronous, enabled vs disabled) * - Status code differences (200 vs 201) * * ENHANCED (v1.3.0): Improved recall by: * - Adding synonym-based similarity for behavioral descriptions * - Relaxed fingerprint matching (partial matches now count) * - Better polarity detection that handles paraphrasing * - Lower thresholds while blocking only clear semantic conflicts */ export declare function assertionsMatchWithConfidence(a: NormalizedAssertion, b: NormalizedAssertion): { matches: boolean; confidence: ChangeConfidence; }; /** * Find matching item in array using matcher function. */ export declare function findMatch(item: T, array: T[], matcher: (a: T, b: T) => boolean): T | undefined; /** * Compare two arrays using semantic matching. * Returns items that are only in first array (removed) and only in second (added). */ export declare function compareArraysSemantic(previous: T[], current: T[], matcher: (a: T, b: T) => boolean): { added: T[]; removed: T[]; }; /** * Result of a semantic comparison with confidence. */ export interface SemanticComparisonResult { /** Items in current but not in previous */ added: Array<{ item: T; confidence: ChangeConfidence; }>; /** Items in previous but not in current */ removed: Array<{ item: T; confidence: ChangeConfidence; }>; /** Items that match between versions */ matched: Array<{ previous: T; current: T; confidence: ChangeConfidence; }>; } /** * Compare two arrays using semantic matching with confidence scores. * Returns detailed comparison results including confidence for each item. */ export declare function compareArraysSemanticWithConfidence(previous: T[], current: T[], matcherWithConfidence: (a: T, b: T) => { matches: boolean; confidence: ChangeConfidence; }): SemanticComparisonResult; /** * Calculate overall confidence for a semantic comparison operation. */ export declare function calculateComparisonConfidence(before: string, after: string, categoryMatch: boolean): ChangeConfidence; //# sourceMappingURL=semantic.d.ts.map