/** * TurboQuant — WASM SIMD vector compression (3 bits/dim) * * Architecture: * JS Float32Array -> [Zig WASM + relaxed SIMD] -> compressed bytes * * The Zig engine (turboquant.wasm) compresses vectors using polar + QJL * quantization with Gaussian QR rotation, all SIMD-accelerated. * * Browser requirements: * - WASM SIMD128: Chrome 91+, Firefox 89+, Safari 16.4+ * - WASM Relaxed SIMD: Chrome 114+, Firefox 128+, Safari 18+ * * Usage: * import { TurboQuant } from "turboquant"; * const tq = await TurboQuant.init({ dim: 1024, seed: 42 }); * const compressed = tq.encode(myFloat32Array); * const score = tq.dot(queryVector, compressed); * tq.destroy(); */ interface TurboQuantExports { memory: WebAssembly.Memory; tq_engine_create(dim: number, seed: number): number; tq_engine_destroy(handle: number): void; tq_encode(handle: number, inputPtr: number, dim: number, outLenPtr: number): number; tq_decode(handle: number, compPtr: number, compLen: number, outLenPtr: number): number; tq_dot(handle: number, queryPtr: number, dim: number, compPtr: number, compLen: number): number; tq_dot_batch(handle: number, queryPtr: number, dim: number, compPtr: number, bytesPerVector: number, numVectors: number, outScoresPtr: number): void; tq_rotate_query(handle: number, queryPtr: number, dim: number, outPtr: number): void; tq_alloc(len: number): number; tq_free(ptr: number, len: number): void; tq_alloc_f32(count: number): number; tq_free_f32(ptr: number, count: number): void; tq_stream_create(engineHandle: number, maxPositions: number): number; tq_stream_destroy(handle: number): void; tq_stream_append(handle: number, vectorPtr: number, dim: number): number; tq_stream_append_batch(handle: number, vectorsPtr: number, dim: number, count: number): number; tq_stream_get_compressed(handle: number, outLenPtr: number): number; tq_stream_decode_position(handle: number, position: number, outPtr: number, dim: number): number; tq_stream_rewind(handle: number, position: number): void; tq_stream_length(handle: number): number; tq_stream_bytes_per_vector(handle: number): number; } export interface TurboQuantConfig { /** Vector dimension (must be a power of 2). */ dim: number; /** Deterministic seed for rotation matrix. */ seed: number; /** Optional external WASM source (Response, Promise, or ArrayBuffer). */ wasm?: Response | Promise | BufferSource; } export declare class TurboQuant { #private; readonly dim: number; private constructor(); /** * Create a TurboQuant engine. * * ```ts * const tq = await TurboQuant.init({ dim: 1024, seed: 42 }); * ``` */ static init(config: TurboQuantConfig): Promise; /** * Compress a float32 vector (~3 bits/dim). * @param vector - Float32Array of length `dim` * @returns Compressed bytes */ encode(vector: Float32Array): Uint8Array; /** * Decompress back to a float32 vector. * @param compressed - Bytes from `encode()` * @returns Reconstructed Float32Array */ decode(compressed: Uint8Array): Float32Array; /** * Estimate dot product between a query and compressed vector. * Faster than decode + manual dot — operates directly on compressed bytes. * * @param query - Float32Array of length `dim` * @param compressed - Bytes from `encode()` * @returns Estimated inner product */ dot(query: Float32Array, compressed: Uint8Array): number; /** * Batch dot product: compute dot(query, vectors[i]) for all vectors. * Much faster than calling dot() in a loop — one WASM call, query rotated once. * * @param query - Float32Array of length `dim` * @param compressedConcat - All compressed vectors concatenated into one Uint8Array * @param bytesPerVector - Size of each compressed vector (from encode().length) * @returns Float32Array of scores, one per vector. * The returned array is reused across calls — copy with .slice() if you need to retain it. */ dotBatch(query: Float32Array, compressedConcat: Uint8Array, bytesPerVector: number): Promise; /** * Rotate a query vector into TQ's internal rotation space. * Used by WebGPU path: the rotated query is uploaded as a GPU uniform, * then the compute shader computes dot products directly on compressed data. */ rotateQuery(query: Float32Array): Float32Array; /** * Create a streaming compressed vector buffer. * Vectors are TQ-compressed on append, decompressed buffer maintained for readback. * * @param maxPositions - Initial capacity (grows automatically) */ createStream(maxPositions: number): TQStream; /** Release engine resources. Call when done. Safe to call multiple times. */ destroy(): void; } /** * Streaming compressed vector buffer. Compress-only storage. * Use dotBatch on getCompressed() for scoring — never decompress for search. * Use decodePosition() only when you need individual float values. */ export declare class TQStream { #private; readonly dim: number; readonly bytesPerVector: number; /** @internal — use TurboQuant.createStream() */ constructor(ex: TurboQuantExports, handle: number, dim: number, bpv: number); /** Append a single vector. Compresses and stores. No decompression. */ append(vector: Float32Array): void; /** Append multiple vectors at once. */ appendBatch(vectors: Float32Array, count?: number): void; /** Get full compressed store as a copy. Use with dotBatch for scoring. */ getCompressed(): Uint8Array; /** Decode a single position. Only use when you need individual float values. */ decodePosition(position: number): Float32Array; /** Number of vectors currently stored. */ get length(): number; /** Truncate stream to given position. */ rewind(position: number): void; /** Release resources. */ destroy(): void; } export default TurboQuant; //# sourceMappingURL=index.d.ts.map