/** * Embedding Engine * * Implements local vector generation using @huggingface/transformers v3. * Supports dual models: BGE-small for code (384 dims) and BGE-base for docs (768 dims). * Handles model download on first use and batch processing for efficiency. */ import { type ComputeDevice, type DeviceInfo } from './deviceDetection.js'; /** * Model name for code embedding * Using BGE-small for good balance of quality and speed */ export declare const CODE_MODEL_NAME = "Xenova/bge-small-en-v1.5"; /** * Prompt type for embedding operations. * - 'document': Used when indexing content (no prefix for BGE models) * - 'query': Used when searching (adds instruction prefix for better retrieval) */ export type PromptType = 'document' | 'query'; /** * Model-specific prompt configurations. * BGE models benefit from instruction prefixes for queries but not for documents. * Based on BGE model documentation: https://huggingface.co/BAAI/bge-small-en-v1.5 */ export interface ModelPromptConfig { /** Prefix for document/passage embedding (usually empty for BGE) */ documentPrefix: string; /** Prefix for query embedding (instruction for BGE models) */ queryPrefix: string; } /** * Prompt configurations for supported embedding models. * BGE models use an instruction prefix for queries to improve retrieval quality. * Documents are embedded without prefix as per BGE documentation. */ export declare const MODEL_PROMPTS: Record; /** * Dimension of code embedding vectors * BGE-small produces 384-dimensional vectors */ export declare const CODE_EMBEDDING_DIMENSION = 384; /** * Model name for docs embedding * Using BGE-base for higher quality on prose content */ export declare const DOCS_MODEL_NAME = "Xenova/bge-base-en-v1.5"; /** * Dimension of docs embedding vectors * BGE-base produces 768-dimensional vectors */ export declare const DOCS_EMBEDDING_DIMENSION = 768; /** * @deprecated Use CODE_MODEL_NAME instead. Kept for backward compatibility. * Model name for the embedding model * Using MiniLM for good balance of quality and speed */ export declare const MODEL_NAME = "Xenova/bge-small-en-v1.5"; /** * @deprecated Use CODE_EMBEDDING_DIMENSION instead. Kept for backward compatibility. * Dimension of the embedding vectors * MiniLM produces 384-dimensional vectors */ export declare const EMBEDDING_DIMENSION = 384; /** * Batch size for processing multiple texts on CPU * 32 is a good balance between speed and memory usage */ export declare const BATCH_SIZE = 32; /** * Batch size for processing multiple texts on GPU * GPU can handle larger batches efficiently due to parallelism */ export declare const GPU_BATCH_SIZE = 64; /** * Result of embedding a single text */ export interface EmbeddingResult { /** The original text that was embedded */ text: string; /** The 384-dimensional embedding vector */ vector: number[]; /** Whether embedding succeeded */ success: boolean; } /** * Result of batch embedding operation */ export interface BatchEmbeddingResult { /** Successfully embedded vectors in order (skips failures) */ vectors: number[][]; /** Indices of texts that successfully embedded */ successIndices: number[]; /** Number of embeddings that failed */ failedCount: number; } /** * Progress callback for batch embedding operations */ export type EmbeddingProgressCallback = (completed: number, total: number) => void; /** * Progress callback for model download */ export type DownloadProgressCallback = (progress: { status: string; name?: string; file?: string; progress?: number; loaded?: number; total?: number; }) => void; /** * Configuration for the embedding engine */ export interface EmbeddingEngineConfig { /** The model name to use (e.g., 'Xenova/bge-small-en-v1.5') */ modelName: string; /** The dimension of embedding vectors produced by this model */ dimension: number; /** Human-readable display name for logging */ displayName: string; /** * Compute device to use for embedding generation. * - 'webgpu': Use GPU acceleration (browser only, requires WebGPU support) * - 'dml': Use DirectML GPU acceleration (Windows Node.js only) * - 'cpu': Use CPU with WASM backend * - undefined: Auto-detect best available device */ device?: ComputeDevice; } /** * Get the prompt prefix for a given model and prompt type. * Falls back to empty string if model is not in the configuration. * * @param modelName - The model name (e.g., 'Xenova/bge-small-en-v1.5') * @param promptType - The type of embedding ('document' or 'query') * @returns The prefix string to prepend to the text */ export declare function getPromptPrefix(modelName: string, promptType: PromptType): string; /** * Default configuration for code embedding */ export declare const CODE_ENGINE_CONFIG: EmbeddingEngineConfig; /** * Default configuration for docs embedding */ export declare const DOCS_ENGINE_CONFIG: EmbeddingEngineConfig; /** * Embedding Engine for generating vector embeddings from text. * * Supports configurable models for different use cases: * - Code search: BGE-small (384 dims) - fast and efficient * - Docs search: BGE-base (768 dims) - higher quality for prose * * @example * ```typescript * // Use the code embedding engine * const codeEngine = getCodeEmbeddingEngine(); * await codeEngine.initialize(); * const codeVector = await codeEngine.embed('function hello() {}'); * * // Use the docs embedding engine * const docsEngine = getDocsEmbeddingEngine(); * await docsEngine.initialize(); * const docsVector = await docsEngine.embed('# README'); * ``` */ export declare class EmbeddingEngine { private pipeline; private initializationPromise; private config; /** The compute device being used (set after initialization) */ private deviceInfo; /** Whether a fallback from GPU to CPU occurred */ private didFallback; /** Reason for fallback if one occurred */ private fallbackReason; /** * Create a new EmbeddingEngine with the specified configuration. * @param config - The configuration for this engine (defaults to code engine config) */ constructor(config?: EmbeddingEngineConfig); /** * Get the compute device being used by this engine. * Returns null if the engine has not been initialized yet. * @returns Device info or null if not initialized */ getDeviceInfo(): DeviceInfo | null; /** * Get the compute device type being used. * @returns 'webgpu', 'cpu', or undefined if not initialized */ getDevice(): ComputeDevice | undefined; /** * Check if a fallback from GPU to CPU occurred during initialization. * @returns True if fallback occurred */ didFallbackToCPU(): boolean; /** * Get the reason for fallback if one occurred. * @returns Fallback reason string or null */ getFallbackReason(): string | null; /** * Get the effective batch size based on the compute device. * GPU (WebGPU or DirectML) can handle larger batches efficiently. * @returns Batch size to use */ getEffectiveBatchSize(): number; /** * Check if GPU acceleration is being used. * @returns True if using WebGPU or DirectML */ isUsingGPU(): boolean; /** * Check if an error is a DirectML GPU memory/allocation error. * These errors occur when the GPU runs out of memory or fails to allocate resources. * @param error - The error to check * @returns True if this is a recoverable DirectML error that should trigger CPU fallback */ private isDirectMLAllocationError; /** * Detect if this is a hybrid GPU system (multiple GPUs from different vendors). * On hybrid systems, DirectML may select the wrong GPU (weak integrated instead of discrete). * @returns True if multiple GPUs detected (hybrid system) */ private detectHybridGPU; /** * Fallback to CPU at runtime when GPU fails during embedding. * This re-initializes the pipeline with CPU and logs the transition. * @returns True if fallback succeeded */ private fallbackToCPUAtRuntime; /** * Initialize the embedding model. * * Downloads the model on first use (~90MB to ~/.cache/huggingface/). * This operation is idempotent - calling it multiple times is safe. * * BUG #9 FIX: Uses atomic state transitions to ensure consistent state * after failures. The initializationPromise is only cleared if the * pipeline was not successfully set, allowing proper retry behavior. * * @param onProgress - Optional callback for download progress * @throws MCPError with MODEL_DOWNLOAD_FAILED if download fails */ initialize(onProgress?: DownloadProgressCallback): Promise; /** * Load the embedding model with GPU support and automatic fallback to CPU. * * Device selection priority: * 1. If config.device is specified, use that device * 2. Otherwise, auto-detect the best available device: * - Browser: WebGPU > CPU * - Windows Node.js: DirectML > CPU * - macOS/Linux Node.js: CPU only * * If GPU initialization fails, automatically falls back to CPU. */ private loadModel; /** * Initialize the pipeline with a specific device. * Handles shader compilation detection for WebGPU and DirectML initialization. * Suppresses ONNX runtime warnings that pollute console output. */ private initializePipelineWithDevice; /** * Check if the model is initialized and ready to use */ isInitialized(): boolean; /** * Get the model name being used by this engine * @returns The model name (e.g., 'Xenova/bge-small-en-v1.5') */ getModelName(): string; /** * Get the dimension of embedding vectors * @returns The embedding dimension for this engine's model */ getDimension(): number; /** * Get the display name for this engine * @returns Human-readable display name (e.g., 'Code (BGE-small)') */ getDisplayName(): string; /** * Embed a single text string into a vector. * * SMCP-096: Supports domain-specific prompts for improved retrieval quality. * - Use 'document' when indexing content (no prefix for BGE models) * - Use 'query' when searching (adds instruction prefix for BGE models) * * @param text - The text to embed * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching. * Defaults to 'document' for backward compatibility. * @returns A vector with dimensions matching the configured model * @throws MCPError with MODEL_DOWNLOAD_FAILED if model not initialized */ embed(text: string, promptType?: PromptType): Promise; /** * Embed multiple texts in batches for efficiency. * * SMCP-096: Supports domain-specific prompts for improved retrieval quality. * - Use 'document' when indexing content (no prefix for BGE models) * - Use 'query' when searching (adds instruction prefix for BGE models) * * Batch size is optimized based on compute device: * - GPU: 64 texts per batch (higher parallelism) * - CPU: 32 texts per batch (balance speed and memory) * * SECURITY (SMCP-054): This method returns ONLY successful embeddings. * Use embedBatchWithStats to get detailed information about which texts * succeeded and which failed. Never inserts zero vectors. * * @param texts - Array of texts to embed * @param onProgress - Optional callback for progress updates * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching. * Defaults to 'document' for backward compatibility. * @returns BatchEmbeddingResult with only successful embeddings, their indices, and failure count */ embedBatch(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise; /** * Embed multiple texts with failure tracking (MCP-13) * * SMCP-096: Supports domain-specific prompts for improved retrieval quality. * - Use 'document' when indexing content (no prefix for BGE models) * - Use 'query' when searching (adds instruction prefix for BGE models) * * Unlike embedBatch, this method returns detailed statistics about failures * and only includes successfully embedded vectors. * * Performance logging includes: * - Compute device being used (WebGPU/CPU) * - Chunks per second throughput * - Total processing time * * @param texts - Array of texts to embed * @param onProgress - Optional callback for progress updates * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching. * Defaults to 'document' for backward compatibility. * @returns BatchEmbeddingResult with vectors, success indices, and failure count */ embedBatchWithStats(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise; /** * Embed texts and return full results with original text. * * SMCP-096: Supports domain-specific prompts for improved retrieval quality. * - Use 'document' when indexing content (no prefix for BGE models) * - Use 'query' when searching (adds instruction prefix for BGE models) * * SECURITY (SMCP-054): Returns only successful embeddings. * Failed embeddings are excluded from results (no zero vectors). * * @param texts - Array of texts to embed * @param onProgress - Optional callback for progress updates * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching. * Defaults to 'document' for backward compatibility. * @returns Array of EmbeddingResult objects for successful embeddings only */ embedWithResults(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise; } /** * Set the preferred compute device for embedding generation. * Must be called BEFORE getCodeEmbeddingEngine() or getDocsEmbeddingEngine() * to take effect. If engines are already created, call resetEmbeddingEngine() first. * * @param device - The device to use: 'cpu', 'dml' (DirectML GPU), or undefined for auto-detect * * @example * ```typescript * // Force CPU usage (slower but doesn't impact system responsiveness) * setPreferredDevice('cpu'); * * // Force DirectML GPU (faster but may cause system stuttering) * setPreferredDevice('dml'); * * // Auto-detect best device (default behavior) * setPreferredDevice(undefined); * ``` */ export declare function setPreferredDevice(device: ComputeDevice | undefined): void; /** * Get the currently configured preferred device. * @returns The preferred device or undefined if auto-detect is enabled */ export declare function getPreferredDevice(): ComputeDevice | undefined; /** * Get the singleton code embedding engine instance. * * Uses BGE-small model (384 dimensions) optimized for code search. * Creates a new instance if one doesn't exist. * The instance must be initialized before use via initialize(). * * @returns The singleton EmbeddingEngine instance for code */ export declare function getCodeEmbeddingEngine(): EmbeddingEngine; /** * Get the singleton docs embedding engine instance. * * Uses BGE-base model (768 dimensions) optimized for prose/documentation search. * Creates a new instance if one doesn't exist. * The instance must be initialized before use via initialize(). * * @returns The singleton EmbeddingEngine instance for docs */ export declare function getDocsEmbeddingEngine(): EmbeddingEngine; /** * @deprecated Use getCodeEmbeddingEngine() or getDocsEmbeddingEngine() instead. * Get the singleton embedding engine instance. * * For backward compatibility, returns the code embedding engine. * Creates a new instance if one doesn't exist. * The instance must be initialized before use via initialize(). * * @returns The singleton EmbeddingEngine instance (code engine) */ export declare function getEmbeddingEngine(): EmbeddingEngine; /** * Reset the code embedding engine singleton instance. * Mainly used for testing purposes. */ export declare function resetCodeEmbeddingEngine(): void; /** * Reset the docs embedding engine singleton instance. * Mainly used for testing purposes. */ export declare function resetDocsEmbeddingEngine(): void; /** * Reset all singleton instances. * Mainly used for testing purposes. */ export declare function resetEmbeddingEngine(): void; /** * Embed a single text string using the singleton engine. * * SMCP-096: Supports domain-specific prompts for improved retrieval quality. * - Use 'document' when indexing content (no prefix for BGE models) * - Use 'query' when searching (adds instruction prefix for BGE models) * * @param text - The text to embed * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching. * Defaults to 'document' for backward compatibility. * @returns A 384-dimensional vector */ export declare function embedText(text: string, promptType?: PromptType): Promise; /** * Embed multiple texts using the singleton engine. * * SMCP-096: Supports domain-specific prompts for improved retrieval quality. * - Use 'document' when indexing content (no prefix for BGE models) * - Use 'query' when searching (adds instruction prefix for BGE models) * * SECURITY (SMCP-054): Returns BatchEmbeddingResult with only successful embeddings. * No zero vectors are inserted for failed embeddings. * * @param texts - Array of texts to embed * @param onProgress - Optional callback for progress updates * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching. * Defaults to 'document' for backward compatibility. * @returns BatchEmbeddingResult with successful embeddings, their indices, and failure count */ export declare function embedBatch(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise; //# sourceMappingURL=embedding.d.ts.map