/** * Content Deduplicator for MAMA OS Standalone * * Prevents duplicate content injection into system prompts by using * SHA-256 content hashing and realpath normalization. Handles symlinks, * duplicate file paths, and identical content from different sources. */ /** * Represents a unique content entry after deduplication. */ export interface ContentEntry { /** Original file path as provided */ path: string; /** Resolved real path (symlinks resolved) */ realPath: string; /** The content string */ content: string; /** Semantic distance from query (lower = closer match) */ distance: number; /** SHA-256 hash prefix of the content */ hash: string; } /** * Deduplicates content entries by SHA-256 hash and realpath normalization. * * When two entries share the same content hash, the entry with the smaller * distance (closer semantic match) is preferred. This ensures that even if * the same file is referenced via different paths (e.g., symlinks), only * the most relevant instance is kept. * * @example * ```typescript * const dedup = new ContentDeduplicator(); * dedup.add('/path/to/file.ts', 'const x = 1;', 0.3); * dedup.add('/symlink/to/file.ts', 'const x = 1;', 0.5); // duplicate, ignored (higher distance) * dedup.add('/other/file.ts', 'const y = 2;', 0.1); * const entries = dedup.getEntries(); // 2 entries, sorted by distance * ``` */ export declare class ContentDeduplicator { private seenHashes; /** * Add content for deduplication. * * Returns true if the content is new (not a duplicate). On hash collision, * the entry with the closest distance is preferred. * * @param path - File path of the content source * @param content - Raw content string * @param distance - Semantic distance from query (lower = better) * @returns true if content was added (new), false if duplicate */ add(path: string, content: string, distance: number): boolean; /** * Get all unique entries sorted by distance (closest first). * * @returns Array of deduplicated content entries ordered by ascending distance */ getEntries(): ContentEntry[]; /** * Reset the deduplicator for reuse. * Clears all tracked hashes and entries. */ reset(): void; /** * Hash raw content with SHA-256, returning first 16 hex characters for efficiency. * * 16 hex chars = 64 bits of entropy, sufficient for collision avoidance * in typical prompt injection scenarios (< 10,000 entries). * * @param content - Raw content string to hash * @returns First 16 hex characters of SHA-256 digest */ private hashContent; /** * Resolve the real path of a file, following symlinks. * * Falls back to the original path if realpath resolution fails * (e.g., file doesn't exist yet, permission denied). * * @param filePath - File path to resolve * @returns Resolved real path or original path on error */ private safeRealpath; } //# sourceMappingURL=content-dedup.d.ts.map