/**
 * @license
 * Copyright 2026 Steven Roussey <sroussey@gmail.com>
 * SPDX-License-Identifier: Apache-2.0
 */
import type { ITextIndex, TextFields, TextSearchOptions, TextSearchResult } from "./ITextIndex";
import type { Tokenizer } from "./Tokenizer";
/**
 * Default per-field weights for {@link BM25Index} when indexing
 * {@link ChunkRecord}-shaped data. Tuned for hierarchical chunks — title and
 * section headings are heavier than the body to reflect their navigational
 * value, summaries are slightly above body, parent summaries are de-weighted
 * because they are inherited context rather than direct content.
 */
export declare const DEFAULT_CHUNK_FIELD_WEIGHTS: Readonly<Record<string, number>>;
export interface BM25IndexOptions {
    /** Tokenizer used at index- and query-time. Defaults to {@link createDefaultTokenizer}. */
    readonly tokenizer?: Tokenizer;
    /**
     * Per-field weight map. Fields not in this map are ignored at index time.
     * Defaults to {@link DEFAULT_CHUNK_FIELD_WEIGHTS}.
     */
    readonly fieldWeights?: Readonly<Record<string, number>>;
    /** BM25 term-saturation parameter. Typical values 1.2–2.0. */
    readonly k1?: number;
    /** BM25 length-normalisation parameter. Typical values 0.5–0.75. */
    readonly b?: number;
}
interface SerialisedBM25State {
    readonly version: 1;
    readonly k1: number;
    readonly b: number;
    readonly fieldWeights: Record<string, number>;
    readonly fieldStats: Record<string, {
        docCount: number;
        totalLength: number;
    }>;
    readonly postings: Record<string, Record<string, Array<{
        chunkId: string;
        tf: number;
    }>>>;
    readonly docLengths: Record<string, Record<string, number>>;
    readonly chunkToDoc: Record<string, string>;
}
/**
 * In-memory BM25F text index. State is JSON-serialisable via
 * {@link toJSON} / {@link fromJSON}. The serialised state captures all
 * scoring inputs — `k1`, `b`, `fieldWeights`, postings, and per-document
 * stats — so a `fromJSON` round-trip reproduces search results exactly. The
 * tokenizer is *not* serialised; callers must restore an index configured
 * with the same tokenizer that produced the state, otherwise query
 * tokenisation will diverge from indexed terms.
 *
 * Scoring formula (Lucene-style BM25F):
 *
 * ```
 * idf(t)        = ln(1 + (N - df(t) + 0.5) / (df(t) + 0.5))
 * tilde_tf(t,d) = sum_f weight(f) * tf(t,d,f) / (1 - b + b * len_f(d) / avgLen_f)
 * score(t,d)    = idf(t) * tilde_tf(t,d) / (k1 + tilde_tf(t,d))
 * ```
 *
 * Document frequency `df(t)` is the number of distinct chunks containing `t`
 * in *any* field. `N` is the total number of chunks. `tf(t,d,f)` is raw
 * term frequency in field `f` of chunk `d`. `len_f(d)` is the field length
 * (token count) for that chunk, and `avgLen_f` is the average length over
 * chunks that have field `f` populated.
 */
export declare class BM25Index implements ITextIndex {
    private readonly tokenizer;
    private fieldWeights;
    private k1;
    private b;
    private postings;
    private docLengths;
    private fieldStats;
    private chunkToDoc;
    private docToChunks;
    private chunkPostings;
    private termDf;
    constructor(options?: BM25IndexOptions);
    size(): number;
    add(chunkId: string, docId: string, fields: TextFields): void;
    remove(chunkId: string): void;
    removeByDocument(docId: string): void;
    clear(): void;
    search(query: string, options?: TextSearchOptions): TextSearchResult[];
    toJSON(): SerialisedBM25State;
    fromJSON(state: unknown): void;
}
export {};
//# sourceMappingURL=BM25Index.d.ts.map