/**
 * VectorIndex
 *
 * Builds and queries a LanceDB vector index over the call graph functions.
 * Each function is represented by a document combining its signature, docstring,
 * file path, language, and topological metadata (fanIn/fanOut, hub, entry point).
 *
 * Storage: <outputDir>/vector-index/  (LanceDB database folder)
 * Table name: "functions"
 *
 * Usage:
 *   // Build (after openlore analyze --embed)
 *   await VectorIndex.build(outputDir, nodes, signatures, hubIds, entryPointIds, embedSvc);
 *
 *   // Search
 *   const results = await VectorIndex.search(outputDir, "authenticate user with JWT", embedSvc);
 */
import type { FunctionNode } from './call-graph.js';
import type { FileSignatureMap } from './signature-extractor.js';
import type { EmbeddingService } from './embedding-service.js';
export interface FunctionRecord {
    id: string;
    name: string;
    filePath: string;
    className: string;
    language: string;
    signature: string;
    docstring: string;
    fanIn: number;
    fanOut: number;
    isHub: boolean;
    isEntryPoint: boolean;
    /** Concatenated text used for embedding */
    text: string;
    /** Embedding vector */
    vector: number[];
}
export interface SearchResult {
    record: Omit<FunctionRecord, 'vector'>;
    /**
     * Relevance score.  For hybrid search (default): RRF score, higher = more relevant.
     * For dense-only search: cosine distance from LanceDB, lower = more similar.
     */
    score: number;
}
export interface VectorIndexMeta {
    hasEmbeddings: boolean;
    dim: number;
    model: string | null;
    builtAt: string;
    schemaVersion: number;
}
export interface Bm25Corpus {
    docs: Array<{
        id: string;
        tfMap: Map<string, number>;
        length: number;
    }>;
    /** term → number of documents containing it */
    df: Map<string, number>;
    avgLength: number;
    N: number;
}
export declare function tokenize(text: string): string[];
export declare function buildBm25Corpus(records: Array<{
    id: string;
    text: string;
}>): Bm25Corpus;
export declare function bm25Score(corpus: Bm25Corpus, queryTokens: string[], docIdx: number): number;
/** Test-only: clear in-memory BM25 + LanceDB caches to force cold path. */
export declare function _resetVectorIndexCachesForTesting(): void;
export declare class VectorIndex {
    /**
     * Build (or rebuild) the vector index from call graph nodes + signatures.
     *
     * When `incremental` is true and an existing index is found, only functions
     * whose text has changed since the last build are re-embedded.  Unchanged
     * functions reuse their cached vectors.  Pass `incremental: false` (or omit
     * when no index exists) to do a full rebuild.
     *
     * Returns a summary of how many functions were embedded vs reused.
     *
     * When `embedSvc` is null, builds a **keyword-only (BM25)** index: the corpus
     * rows are written without a `vector` column and the meta sidecar records
     * `hasEmbeddings: false`. Search then serves BM25 results and never attempts
     * ANN. Re-building a previously-embedded index with `embedSvc=null` downgrades
     * it to BM25-only (overwrite + meta update), and vice-versa upgrades it.
     */
    static build(outputDir: string, nodes: FunctionNode[], signatures: FileSignatureMap[], hubIds: Set<string>, entryPointIds: Set<string>, embedSvc: EmbeddingService | null, 
    /** Optional map of filePath → source content for skeleton-based body indexing */
    fileContents?: Map<string, string>, 
    /** When true, reuse cached vectors for unchanged functions */
    incremental?: boolean): Promise<{
        embedded: number;
        reused: number;
        total: number;
        hasEmbeddings: boolean;
    }>;
    /**
     * Watch-mode incremental update (Spec 13.1). Replace only the rows for the
     * changed files with freshly-built records — a row-level delete+add instead of
     * the full-corpus read+overwrite that build() performs. The cold build() path
     * is untouched, protecting the `analyze --embed` contract (G7).
     *
     *  - Embedded index: reuse existing vectors for rows whose embed-text is
     *    unchanged (queried for the changed files only, not the whole corpus),
     *    embed just the new/changed texts, then delete the changed files' old rows
     *    and add the rebuilt ones. The LanceDB table handle in _tableCache stays
     *    valid across row ops, so search() does not pay a reconnect.
     *  - BM25-only index: delete+add the changed files' documents and patch the
     *    cached BM25 corpus in place rather than dropping the whole corpus cache.
     */
    static updateFiles(outputDir: string, nodes: FunctionNode[], changedFilePaths: Set<string>, signatures: FileSignatureMap[], hubIds: Set<string>, entryPointIds: Set<string>, embedSvc: EmbeddingService | null | undefined, fileContents?: Map<string, string>): Promise<{
        embedded: number;
        reused: number;
        total: number;
        hasEmbeddings: boolean;
    }>;
    /**
     * Hybrid search over the index: dense (ANN) + sparse (BM25) merged via RRF.
     *
     * Dense recall fetches top `limit*5` candidates from the vector index.
     * Sparse recall scores the full corpus with BM25 (cached per session).
     * Reciprocal Rank Fusion (RRF) combines both rankings into a single list.
     *
     * Set `hybrid: false` to use dense-only search (original behaviour).
     * Returns up to `limit` results sorted by relevance (highest first).
     */
    static search(outputDir: string, query: string, embedSvc: EmbeddingService | null | undefined, opts?: {
        limit?: number;
        language?: string;
        minFanIn?: number;
        /** Enable hybrid dense+sparse retrieval via RRF (default: true when embedSvc available) */
        hybrid?: boolean;
    }): Promise<SearchResult[]>;
    /**
     * BM25-only search: used when no embedding service is available.
     * Scores the full corpus with BM25 and returns the top `limit` results.
     */
    private static _bm25Only;
    /**
     * Returns true if a vector index has been built for this output directory.
     */
    static exists(outputDir: string): boolean;
}
//# sourceMappingURL=vector-index.d.ts.map