/**
 * Embedding Engine
 *
 * Implements local vector generation using @huggingface/transformers v3.
 * Supports dual models: BGE-small for code (384 dims) and BGE-base for docs (768 dims).
 * Handles model download on first use and batch processing for efficiency.
 */
import { type ComputeDevice, type DeviceInfo } from './deviceDetection.js';
/**
 * Model name for code embedding
 * Using BGE-small for good balance of quality and speed
 */
export declare const CODE_MODEL_NAME = "Xenova/bge-small-en-v1.5";
/**
 * Prompt type for embedding operations.
 * - 'document': Used when indexing content (no prefix for BGE models)
 * - 'query': Used when searching (adds instruction prefix for better retrieval)
 */
export type PromptType = 'document' | 'query';
/**
 * Model-specific prompt configurations.
 * BGE models benefit from instruction prefixes for queries but not for documents.
 * Based on BGE model documentation: https://huggingface.co/BAAI/bge-small-en-v1.5
 */
export interface ModelPromptConfig {
    /** Prefix for document/passage embedding (usually empty for BGE) */
    documentPrefix: string;
    /** Prefix for query embedding (instruction for BGE models) */
    queryPrefix: string;
}
/**
 * Prompt configurations for supported embedding models.
 * BGE models use an instruction prefix for queries to improve retrieval quality.
 * Documents are embedded without prefix as per BGE documentation.
 */
export declare const MODEL_PROMPTS: Record<string, ModelPromptConfig>;
/**
 * Dimension of code embedding vectors
 * BGE-small produces 384-dimensional vectors
 */
export declare const CODE_EMBEDDING_DIMENSION = 384;
/**
 * Model name for docs embedding
 * Using BGE-base for higher quality on prose content
 */
export declare const DOCS_MODEL_NAME = "Xenova/bge-base-en-v1.5";
/**
 * Dimension of docs embedding vectors
 * BGE-base produces 768-dimensional vectors
 */
export declare const DOCS_EMBEDDING_DIMENSION = 768;
/**
 * @deprecated Use CODE_MODEL_NAME instead. Kept for backward compatibility.
 * Model name for the embedding model
 * Using MiniLM for good balance of quality and speed
 */
export declare const MODEL_NAME = "Xenova/bge-small-en-v1.5";
/**
 * @deprecated Use CODE_EMBEDDING_DIMENSION instead. Kept for backward compatibility.
 * Dimension of the embedding vectors
 * MiniLM produces 384-dimensional vectors
 */
export declare const EMBEDDING_DIMENSION = 384;
/**
 * Batch size for processing multiple texts on CPU
 * 32 is a good balance between speed and memory usage
 */
export declare const BATCH_SIZE = 32;
/**
 * Batch size for processing multiple texts on GPU
 * GPU can handle larger batches efficiently due to parallelism
 */
export declare const GPU_BATCH_SIZE = 64;
/**
 * Result of embedding a single text
 */
export interface EmbeddingResult {
    /** The original text that was embedded */
    text: string;
    /** The 384-dimensional embedding vector */
    vector: number[];
    /** Whether embedding succeeded */
    success: boolean;
}
/**
 * Result of batch embedding operation
 */
export interface BatchEmbeddingResult {
    /** Successfully embedded vectors in order (skips failures) */
    vectors: number[][];
    /** Indices of texts that successfully embedded */
    successIndices: number[];
    /** Number of embeddings that failed */
    failedCount: number;
}
/**
 * Progress callback for batch embedding operations
 */
export type EmbeddingProgressCallback = (completed: number, total: number) => void;
/**
 * Progress callback for model download
 */
export type DownloadProgressCallback = (progress: {
    status: string;
    name?: string;
    file?: string;
    progress?: number;
    loaded?: number;
    total?: number;
}) => void;
/**
 * Configuration for the embedding engine
 */
export interface EmbeddingEngineConfig {
    /** The model name to use (e.g., 'Xenova/bge-small-en-v1.5') */
    modelName: string;
    /** The dimension of embedding vectors produced by this model */
    dimension: number;
    /** Human-readable display name for logging */
    displayName: string;
    /**
     * Compute device to use for embedding generation.
     * - 'webgpu': Use GPU acceleration (browser only, requires WebGPU support)
     * - 'dml': Use DirectML GPU acceleration (Windows Node.js only)
     * - 'cpu': Use CPU with WASM backend
     * - undefined: Auto-detect best available device
     */
    device?: ComputeDevice;
}
/**
 * Get the prompt prefix for a given model and prompt type.
 * Falls back to empty string if model is not in the configuration.
 *
 * @param modelName - The model name (e.g., 'Xenova/bge-small-en-v1.5')
 * @param promptType - The type of embedding ('document' or 'query')
 * @returns The prefix string to prepend to the text
 */
export declare function getPromptPrefix(modelName: string, promptType: PromptType): string;
/**
 * Default configuration for code embedding
 */
export declare const CODE_ENGINE_CONFIG: EmbeddingEngineConfig;
/**
 * Default configuration for docs embedding
 */
export declare const DOCS_ENGINE_CONFIG: EmbeddingEngineConfig;
/**
 * Embedding Engine for generating vector embeddings from text.
 *
 * Supports configurable models for different use cases:
 * - Code search: BGE-small (384 dims) - fast and efficient
 * - Docs search: BGE-base (768 dims) - higher quality for prose
 *
 * @example
 * ```typescript
 * // Use the code embedding engine
 * const codeEngine = getCodeEmbeddingEngine();
 * await codeEngine.initialize();
 * const codeVector = await codeEngine.embed('function hello() {}');
 *
 * // Use the docs embedding engine
 * const docsEngine = getDocsEmbeddingEngine();
 * await docsEngine.initialize();
 * const docsVector = await docsEngine.embed('# README');
 * ```
 */
export declare class EmbeddingEngine {
    private pipeline;
    private initializationPromise;
    private config;
    /** The compute device being used (set after initialization) */
    private deviceInfo;
    /** Whether a fallback from GPU to CPU occurred */
    private didFallback;
    /** Reason for fallback if one occurred */
    private fallbackReason;
    /**
     * Create a new EmbeddingEngine with the specified configuration.
     * @param config - The configuration for this engine (defaults to code engine config)
     */
    constructor(config?: EmbeddingEngineConfig);
    /**
     * Get the compute device being used by this engine.
     * Returns null if the engine has not been initialized yet.
     * @returns Device info or null if not initialized
     */
    getDeviceInfo(): DeviceInfo | null;
    /**
     * Get the compute device type being used.
     * @returns 'webgpu', 'cpu', or undefined if not initialized
     */
    getDevice(): ComputeDevice | undefined;
    /**
     * Check if a fallback from GPU to CPU occurred during initialization.
     * @returns True if fallback occurred
     */
    didFallbackToCPU(): boolean;
    /**
     * Get the reason for fallback if one occurred.
     * @returns Fallback reason string or null
     */
    getFallbackReason(): string | null;
    /**
     * Get the effective batch size based on the compute device.
     * GPU (WebGPU or DirectML) can handle larger batches efficiently.
     * @returns Batch size to use
     */
    getEffectiveBatchSize(): number;
    /**
     * Check if GPU acceleration is being used.
     * @returns True if using WebGPU or DirectML
     */
    isUsingGPU(): boolean;
    /**
     * Check if an error is a DirectML GPU memory/allocation error.
     * These errors occur when the GPU runs out of memory or fails to allocate resources.
     * @param error - The error to check
     * @returns True if this is a recoverable DirectML error that should trigger CPU fallback
     */
    private isDirectMLAllocationError;
    /**
     * Detect if this is a hybrid GPU system (multiple GPUs from different vendors).
     * On hybrid systems, DirectML may select the wrong GPU (weak integrated instead of discrete).
     * @returns True if multiple GPUs detected (hybrid system)
     */
    private detectHybridGPU;
    /**
     * Fallback to CPU at runtime when GPU fails during embedding.
     * This re-initializes the pipeline with CPU and logs the transition.
     * @returns True if fallback succeeded
     */
    private fallbackToCPUAtRuntime;
    /**
     * Initialize the embedding model.
     *
     * Downloads the model on first use (~90MB to ~/.cache/huggingface/).
     * This operation is idempotent - calling it multiple times is safe.
     *
     * BUG #9 FIX: Uses atomic state transitions to ensure consistent state
     * after failures. The initializationPromise is only cleared if the
     * pipeline was not successfully set, allowing proper retry behavior.
     *
     * @param onProgress - Optional callback for download progress
     * @throws MCPError with MODEL_DOWNLOAD_FAILED if download fails
     */
    initialize(onProgress?: DownloadProgressCallback): Promise<void>;
    /**
     * Load the embedding model with GPU support and automatic fallback to CPU.
     *
     * Device selection priority:
     * 1. If config.device is specified, use that device
     * 2. Otherwise, auto-detect the best available device:
     *    - Browser: WebGPU > CPU
     *    - Windows Node.js: DirectML > CPU
     *    - macOS/Linux Node.js: CPU only
     *
     * If GPU initialization fails, automatically falls back to CPU.
     */
    private loadModel;
    /**
     * Initialize the pipeline with a specific device.
     * Handles shader compilation detection for WebGPU and DirectML initialization.
     * Suppresses ONNX runtime warnings that pollute console output.
     */
    private initializePipelineWithDevice;
    /**
     * Check if the model is initialized and ready to use
     */
    isInitialized(): boolean;
    /**
     * Get the model name being used by this engine
     * @returns The model name (e.g., 'Xenova/bge-small-en-v1.5')
     */
    getModelName(): string;
    /**
     * Get the dimension of embedding vectors
     * @returns The embedding dimension for this engine's model
     */
    getDimension(): number;
    /**
     * Get the display name for this engine
     * @returns Human-readable display name (e.g., 'Code (BGE-small)')
     */
    getDisplayName(): string;
    /**
     * Embed a single text string into a vector.
     *
     * SMCP-096: Supports domain-specific prompts for improved retrieval quality.
     * - Use 'document' when indexing content (no prefix for BGE models)
     * - Use 'query' when searching (adds instruction prefix for BGE models)
     *
     * @param text - The text to embed
     * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching.
     *                     Defaults to 'document' for backward compatibility.
     * @returns A vector with dimensions matching the configured model
     * @throws MCPError with MODEL_DOWNLOAD_FAILED if model not initialized
     */
    embed(text: string, promptType?: PromptType): Promise<number[]>;
    /**
     * Embed multiple texts in batches for efficiency.
     *
     * SMCP-096: Supports domain-specific prompts for improved retrieval quality.
     * - Use 'document' when indexing content (no prefix for BGE models)
     * - Use 'query' when searching (adds instruction prefix for BGE models)
     *
     * Batch size is optimized based on compute device:
     * - GPU: 64 texts per batch (higher parallelism)
     * - CPU: 32 texts per batch (balance speed and memory)
     *
     * SECURITY (SMCP-054): This method returns ONLY successful embeddings.
     * Use embedBatchWithStats to get detailed information about which texts
     * succeeded and which failed. Never inserts zero vectors.
     *
     * @param texts - Array of texts to embed
     * @param onProgress - Optional callback for progress updates
     * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching.
     *                     Defaults to 'document' for backward compatibility.
     * @returns BatchEmbeddingResult with only successful embeddings, their indices, and failure count
     */
    embedBatch(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise<BatchEmbeddingResult>;
    /**
     * Embed multiple texts with failure tracking (MCP-13)
     *
     * SMCP-096: Supports domain-specific prompts for improved retrieval quality.
     * - Use 'document' when indexing content (no prefix for BGE models)
     * - Use 'query' when searching (adds instruction prefix for BGE models)
     *
     * Unlike embedBatch, this method returns detailed statistics about failures
     * and only includes successfully embedded vectors.
     *
     * Performance logging includes:
     * - Compute device being used (WebGPU/CPU)
     * - Chunks per second throughput
     * - Total processing time
     *
     * @param texts - Array of texts to embed
     * @param onProgress - Optional callback for progress updates
     * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching.
     *                     Defaults to 'document' for backward compatibility.
     * @returns BatchEmbeddingResult with vectors, success indices, and failure count
     */
    embedBatchWithStats(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise<BatchEmbeddingResult>;
    /**
     * Embed texts and return full results with original text.
     *
     * SMCP-096: Supports domain-specific prompts for improved retrieval quality.
     * - Use 'document' when indexing content (no prefix for BGE models)
     * - Use 'query' when searching (adds instruction prefix for BGE models)
     *
     * SECURITY (SMCP-054): Returns only successful embeddings.
     * Failed embeddings are excluded from results (no zero vectors).
     *
     * @param texts - Array of texts to embed
     * @param onProgress - Optional callback for progress updates
     * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching.
     *                     Defaults to 'document' for backward compatibility.
     * @returns Array of EmbeddingResult objects for successful embeddings only
     */
    embedWithResults(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise<EmbeddingResult[]>;
}
/**
 * Set the preferred compute device for embedding generation.
 * Must be called BEFORE getCodeEmbeddingEngine() or getDocsEmbeddingEngine()
 * to take effect. If engines are already created, call resetEmbeddingEngine() first.
 *
 * @param device - The device to use: 'cpu', 'dml' (DirectML GPU), or undefined for auto-detect
 *
 * @example
 * ```typescript
 * // Force CPU usage (slower but doesn't impact system responsiveness)
 * setPreferredDevice('cpu');
 *
 * // Force DirectML GPU (faster but may cause system stuttering)
 * setPreferredDevice('dml');
 *
 * // Auto-detect best device (default behavior)
 * setPreferredDevice(undefined);
 * ```
 */
export declare function setPreferredDevice(device: ComputeDevice | undefined): void;
/**
 * Get the currently configured preferred device.
 * @returns The preferred device or undefined if auto-detect is enabled
 */
export declare function getPreferredDevice(): ComputeDevice | undefined;
/**
 * Get the singleton code embedding engine instance.
 *
 * Uses BGE-small model (384 dimensions) optimized for code search.
 * Creates a new instance if one doesn't exist.
 * The instance must be initialized before use via initialize().
 *
 * @returns The singleton EmbeddingEngine instance for code
 */
export declare function getCodeEmbeddingEngine(): EmbeddingEngine;
/**
 * Get the singleton docs embedding engine instance.
 *
 * Uses BGE-base model (768 dimensions) optimized for prose/documentation search.
 * Creates a new instance if one doesn't exist.
 * The instance must be initialized before use via initialize().
 *
 * @returns The singleton EmbeddingEngine instance for docs
 */
export declare function getDocsEmbeddingEngine(): EmbeddingEngine;
/**
 * @deprecated Use getCodeEmbeddingEngine() or getDocsEmbeddingEngine() instead.
 * Get the singleton embedding engine instance.
 *
 * For backward compatibility, returns the code embedding engine.
 * Creates a new instance if one doesn't exist.
 * The instance must be initialized before use via initialize().
 *
 * @returns The singleton EmbeddingEngine instance (code engine)
 */
export declare function getEmbeddingEngine(): EmbeddingEngine;
/**
 * Reset the code embedding engine singleton instance.
 * Mainly used for testing purposes.
 */
export declare function resetCodeEmbeddingEngine(): void;
/**
 * Reset the docs embedding engine singleton instance.
 * Mainly used for testing purposes.
 */
export declare function resetDocsEmbeddingEngine(): void;
/**
 * Reset all singleton instances.
 * Mainly used for testing purposes.
 */
export declare function resetEmbeddingEngine(): void;
/**
 * Embed a single text string using the singleton engine.
 *
 * SMCP-096: Supports domain-specific prompts for improved retrieval quality.
 * - Use 'document' when indexing content (no prefix for BGE models)
 * - Use 'query' when searching (adds instruction prefix for BGE models)
 *
 * @param text - The text to embed
 * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching.
 *                     Defaults to 'document' for backward compatibility.
 * @returns A 384-dimensional vector
 */
export declare function embedText(text: string, promptType?: PromptType): Promise<number[]>;
/**
 * Embed multiple texts using the singleton engine.
 *
 * SMCP-096: Supports domain-specific prompts for improved retrieval quality.
 * - Use 'document' when indexing content (no prefix for BGE models)
 * - Use 'query' when searching (adds instruction prefix for BGE models)
 *
 * SECURITY (SMCP-054): Returns BatchEmbeddingResult with only successful embeddings.
 * No zero vectors are inserted for failed embeddings.
 *
 * @param texts - Array of texts to embed
 * @param onProgress - Optional callback for progress updates
 * @param promptType - The type of embedding: 'document' for indexing, 'query' for searching.
 *                     Defaults to 'document' for backward compatibility.
 * @returns BatchEmbeddingResult with successful embeddings, their indices, and failure count
 */
export declare function embedBatch(texts: string[], onProgress?: EmbeddingProgressCallback, promptType?: PromptType): Promise<BatchEmbeddingResult>;
//# sourceMappingURL=embedding.d.ts.map