/** * Lexical full-text ranking over artifact bodies (BM25). * * The artifact index stores only metadata + a 500-char summary per node * (see `MetadataEntry`), so `aiwg index query` is metadata-scoped. This * module adds a lexical full-text vector: `aiwg index query "..." --fulltext` * reads the candidate nodes' source bodies and ranks them with Okapi BM25. * * This is deliberately distinct from the semantic/embedding vector * (`embedding-index.ts`, `--semantic`): full-text is lexical recall (exact * term matches in the body), semantic is conceptual. Both are composable. * * No persisted inverted index is built — ranking reads candidate bodies at * query time. This keeps the on-disk index free of regenerable full-text * bloat (the anti-bloat posture of #1488) and stays a pure query-side * feature with no index-build change. * * @implements #1494 * @tests @test/unit/artifacts/fulltext.test.ts */ /** A document to rank: an opaque id plus its searchable body text. */ export interface FullTextDoc { id: string; text: string; } /** A ranked hit: the doc id, its normalized score (top = 1.0), and which query terms matched. */ export interface FullTextHit { id: string; /** Normalized BM25 score in (0, 1]; the top hit is 1.0. */ score: number; /** Raw (un-normalized) BM25 score — useful for thresholding/debugging. */ rawScore: number; /** Distinct query terms that occurred in this doc's body. */ matchedTerms: string[]; } export interface Bm25Options { /** Term-frequency saturation. Higher → repeated terms keep mattering. Default 1.5. */ k1?: number; /** Length normalization. 0 = none, 1 = full. Default 0.75. */ b?: number; } /** * Tokenize body/query text into lowercased terms. Splits on non-alphanumeric * (keeping intra-word hyphens), drops stop-words and single-char tokens. */ export declare function tokenizeText(text: string): string[]; /** * Rank documents against a query using Okapi BM25. * * Returns only docs that matched at least one query term, sorted by score * descending. Scores are normalized so the top hit is 1.0 (BM25 is otherwise * unbounded), matching the 0–1 score convention the rest of the query surface * uses; `rawScore` carries the un-normalized value. */ export declare function bm25Rank(docs: FullTextDoc[], query: string, options?: Bm25Options): FullTextHit[]; //# sourceMappingURL=fulltext.d.ts.map