/** * Bloom Filters Implementation * * Implements probabilistic existence checks for query pruning. * Bloom filters are space-efficient data structures that can quickly * determine if an element is definitely NOT in a set. * * Synthesis from r2-reverse-hostname indexing exploration: * - Bloom filters for high-cardinality columns (IDs, emails, etc.) * - 1% false positive rate provides 99% accurate pruning * - Hierarchical filters propagate to parent levels * * @module bloom */ /** * Bloom filter configuration */ export interface BloomFilterConfig { /** Enable bloom filters (default: true) */ enabled: boolean; /** High-cardinality columns to filter */ columns: string[]; /** Target false positive rate (default: 0.01) */ falsePositiveRate: number; } /** * Bloom filter metadata (without bit array) */ export interface BloomFilterMeta { /** Path identifier */ path: string; /** Column name */ columnName: string; /** Number of bits in filter */ numBits: number; /** Number of hash functions */ numHashes: number; /** Number of items added */ itemCount: number; /** Target false positive rate */ falsePositiveRate: number; /** Last update timestamp */ updatedAt: number; } /** * Bloom filter with bit array */ export interface BloomFilter extends BloomFilterMeta { /** Bit array as Uint8Array */ bits: Uint8Array; } /** * Simple bloom filter interface for query pruning * This interface is used by query planners for file pruning */ export interface BloomFilterPruning { /** * Tests if a value might be contained in this bloom filter. * * @param value - Value to test for membership * @returns False if definitely not in set; true if possibly in set */ mightContain(value: unknown): boolean; } /** * Bloom filter row from SQLite */ export interface BloomFilterRow { path: string; column_name: string; filter_data: ArrayBuffer; num_bits: number; num_hashes: number; item_count: number; false_positive_rate: number; updated_at: number; } /** * SQLite database interface for BloomFilterManager */ export interface BloomFilterSQLiteDatabase { exec(sql: string): void; prepare(sql: string): BloomFilterSQLiteStatement; } export interface BloomFilterSQLiteStatement { bind(...params: unknown[]): BloomFilterSQLiteStatement; run(): { changes: number; }; get(): T | undefined; all(): T[]; } export declare const BLOOM_FILTERS_TABLE_SQL = "\n CREATE TABLE IF NOT EXISTS _bloom_filters (\n path TEXT NOT NULL,\n column_name TEXT NOT NULL,\n filter_data BLOB NOT NULL,\n num_bits INTEGER NOT NULL,\n num_hashes INTEGER NOT NULL,\n item_count INTEGER NOT NULL,\n false_positive_rate REAL NOT NULL,\n updated_at INTEGER NOT NULL,\n PRIMARY KEY (path, column_name)\n )\n"; export declare const BLOOM_FILTERS_INDEXES_SQL: string[]; /** * Calculate optimal bloom filter parameters * * @param expectedItems - Expected number of items * @param falsePositiveRate - Target false positive rate (e.g., 0.01 for 1%) * @returns Optimal number of bits and hash functions */ export declare function calculateBloomParams(expectedItems: number, falsePositiveRate: number): { numBits: number; numHashes: number; }; /** * Create an empty bloom filter */ export declare function createBloomFilter(path: string, columnName: string, expectedItems: number, falsePositiveRate: number): BloomFilter; /** * Add an item to a bloom filter */ export declare function bloomAdd(filter: BloomFilter, item: string): void; /** * Check if an item might be in the bloom filter * * @returns true if item might be present, false if definitely not present */ export declare function bloomMightContain(filter: BloomFilter, item: string): boolean; /** * Merge two bloom filters (union) * Both filters must have the same parameters */ export declare function bloomMerge(a: BloomFilter, b: BloomFilter): BloomFilter; /** * Estimate current false positive rate based on fill ratio */ export declare function estimateFalsePositiveRate(filter: BloomFilter): number; /** * Manages bloom filters for query pruning */ export declare class BloomFilterManager { private readonly db; private readonly config; private initialized; /** In-memory cache of bloom filters */ private cache; constructor(db: BloomFilterSQLiteDatabase, config: BloomFilterConfig); /** * Initialize bloom filters table and indexes */ initialize(): void; /** * Get or create a bloom filter for a path/column */ getOrCreate(path: string, columnName: string, expectedItems?: number): BloomFilter; /** * Add an item to a bloom filter */ add(path: string, columnName: string, item: string): void; /** * Add multiple items to a bloom filter */ addMany(path: string, columnName: string, items: string[]): void; /** * Check if an item might exist at a path */ mightContain(path: string, columnName: string, item: string): boolean; /** * Save a bloom filter to the database */ save(filter: BloomFilter): void; /** * Flush all cached filters to database */ flushCache(): void; /** * Update bloom filters from a batch of documents */ updateFromDocuments(path: string, documents: Record[], columns?: string[]): void; /** * Check if a path can be pruned for equality check * * @returns true if path definitely doesn't contain the value */ canPruneEquality(path: string, columnName: string, value: string): boolean; /** * Check if a path can be pruned for IN clause * * @returns true if path definitely doesn't contain any of the values */ canPruneIn(path: string, columnName: string, values: string[]): boolean; /** * Get paths that might contain a value */ getMatchingPaths(pathPrefix: string, columnName: string, value: string): string[]; /** * Merge filters from child paths into parent */ mergeIntoParent(parentPath: string, childPaths: string[]): void; /** * Delete bloom filter for a path/column */ delete(path: string, columnName: string): void; /** * Delete all bloom filters under a path prefix */ deleteUnder(pathPrefix: string): number; /** * Clear the in-memory cache */ clearCache(): void; /** * Get filter statistics */ getStats(path: string, columnName: string): BloomFilterMeta | null; /** * Get nested value from object */ private getNestedValue; } /** * Create and initialize a BloomFilterManager */ export declare function createBloomFilterManager(db: BloomFilterSQLiteDatabase, config: BloomFilterConfig): BloomFilterManager; export declare const DEFAULT_BLOOM_FILTERS_CONFIG: BloomFilterConfig; //# sourceMappingURL=bloom.d.ts.map