/** * Runner-native RAG retrieval seam (P1). * * KERN doctrine: "own the meaning, borrow the calculator." The RAG *contract* * layer (grounding/citation/eval rules) already exists; this module supplies a * real, deterministic retrieval *engine* behind the existing seam signature * `RagContractRetriever = (query, options?) => RetrieveResult` so that * `evaluateRagEvalContract` can run against actual cosine retrieval instead of * the lexical-Jaccard reference corpus. * * Two pieces: * - {@link DeterministicHashEmbedder}: a zero-dependency, byte-reproducible * embedder (feature-hashed token presence, L2-normalised). It is a * *determinism substrate*, NOT a quality model — semantic embedders plug in * behind the same {@link Embedder} seam later (P1.5). * - {@link EmbeddingRagIndex}: cosine ranking over embedded chunks, mirroring * the ordering/filtering/citation-defaulting contract of * `InMemoryRagCorpus` so downstream eval behaviour is unchanged except for * the scoring function. * * Determinism (so eval is reproducible across runs and, later, across emitted * targets): pinned tokenisation (`tokenizeForRetrieval`, NFKC + casefold), * FNV-1a-32 feature hashing with a fixed offset basis, left-fold accumulation, * and integerised fixed-point score rounding with signed-zero normalisation. */ import { type AsyncRagContractRetriever, type RagChunkInput, type RagContractRetriever, type RetrieveOptions, type RetrieveResult } from './rag-runtime.js'; /** Pluggable text→vector embedder. Implementations must be pure + deterministic. */ export interface Embedder { /** Stable identity (model + version) recorded for reproducibility, e.g. `local-hash-v1`. */ readonly id: string; /** Vector dimensionality. */ readonly dims: number; /** Embed `text` into a fixed-length vector. Same input → identical output. */ embed(text: string): Float64Array; } /** Async text→vector embedder for provider-backed models. */ export interface AsyncEmbedder { /** Stable identity (provider + model + version) recorded for reproducibility. */ readonly id: string; /** Vector dimensionality. */ readonly dims: number; /** Embed `text` into a fixed-length vector. */ embed(text: string): Promise; /** Embed a batch of texts into fixed-length vectors, preserving input order. */ embedMany?(texts: readonly string[]): Promise; } export interface EmbeddingFingerprintInput { readonly provider: string; readonly model: string; readonly dims: number; readonly metric: 'cosine'; } export declare const DEFAULT_EMBEDDING_DIMS = 256; export declare const DEFAULT_HASH_EMBEDDER_ID = "local-hash-v1"; export declare const DEFAULT_LOCAL_SEMANTIC_EMBEDDER_ID = "local-semantic-v1"; /** Scores are rounded to this many decimals (integerised fixed-point) for stable equality. */ export declare const EMBEDDING_SCORE_DECIMALS = 6; export declare const RAG_VECTOR_STORE_SNAPSHOT_VERSION = "kern-rag-vector-store-v1"; /** Deterministic 32-bit FNV-1a over the UTF-8 bytes of `token`. Never the platform `hash()`. */ export declare function fnv1a32(token: string): number; /** * Zero-dependency determinism-substrate embedder: feature-hash the pinned token * set into `dims` buckets (presence), then L2-normalise. Pure string/integer + * float math, so it reproduces byte-identically wherever it is reimplemented. */ export declare class DeterministicHashEmbedder implements Embedder { readonly id: string; readonly dims: number; constructor(options?: { readonly dims?: number; readonly id?: string; }); embed(text: string): Float64Array; } /** * Small zero-network semantic embedder for runner-native eval. * * This is deliberately versioned separately from `local-hash-v1`: known * synonym/domain terms project into shared semantic axes, while unknown words * still contribute deterministic hashed lexical features. It is not a neural * model, but it is a real semantic lookup embedder with deterministic OOV * fallback and a stable upgrade path for a future provider/model embedder. */ export declare class LocalSemanticEmbedder implements Embedder { readonly id: string; readonly dims: number; constructor(options?: { readonly id?: string; }); embed(text: string): Float64Array; } export declare function asAsyncEmbedder(embedder: Embedder): AsyncEmbedder; export interface OpenAIEmbeddingAdapterOptions { readonly model: string; readonly dims: number; readonly apiKey?: string; readonly endpoint?: string; readonly fetch?: typeof fetch; } export declare class OpenAIEmbeddingAdapter implements AsyncEmbedder { readonly id: string; readonly dims: number; private readonly model; private readonly apiKey?; private readonly endpoint; private readonly fetchImpl; constructor(options: OpenAIEmbeddingAdapterOptions); embed(text: string): Promise; embedMany(texts: readonly string[]): Promise; } /** * Cosine similarity of two vectors, rounded to a stable fixed-point score in * [0, 1]. For non-negative (feature-hash) vectors cosine is already in [0, 1]; * the clamp guards float error, and signed zero is normalised to `+0`. */ export declare function embeddingCosine(a: Float64Array, b: Float64Array): number; /** * Cosine-ranked retrieval index implementing the `RagContractRetriever` seam. * Ordering, filtering and citation defaulting mirror `InMemoryRagCorpus`; only * the scoring function (cosine over embeddings vs. Jaccard over tokens) differs. */ export declare class EmbeddingRagIndex { private readonly embedder; private readonly entries; constructor(chunks?: Iterable, options?: { readonly embedder?: Embedder; }); get size(): number; /** Embedder identity, recorded in eval provenance for reproducibility. */ get embedderId(): string; add(chunk: RagChunkInput): void; retrieve(query: string, options?: RetrieveOptions): RetrieveResult; } export interface StoredVectorChunk { readonly chunk: RagChunkInput; readonly vector: Float64Array; readonly fingerprint: string; } export type RagVectorStoreKind = 'memory' | 'local-persistent'; export type RagVectorStoreMetric = 'cosine'; export interface SerializedVectorChunk { readonly chunk: RagChunkInput; readonly vector: readonly number[]; readonly fingerprint: string; } export interface RagVectorStoreUpsert { readonly chunk: RagChunkInput; readonly vector: Float64Array; readonly fingerprint?: string; } export interface RagVectorStoreSnapshot { readonly version: typeof RAG_VECTOR_STORE_SNAPSHOT_VERSION; readonly fingerprint: string; readonly dims: number; readonly metric: RagVectorStoreMetric; readonly entries: readonly SerializedVectorChunk[]; } export interface RagVectorStoreAdapter { readonly kind: RagVectorStoreKind; readonly fingerprint: string; readonly dims: number; readonly metric: RagVectorStoreMetric; upsert(chunk: RagChunkInput, vector: Float64Array, fingerprint?: string): void; upsertMany(entries: Iterable): void; search(query: string, queryVector: Float64Array, options?: RetrieveOptions, fingerprint?: string): RetrieveResult; snapshot(): RagVectorStoreSnapshot; clear(): void; close(): void; } export declare class InMemoryPgVectorRagStore implements RagVectorStoreAdapter { readonly fingerprint: string; readonly dims: number; readonly kind: RagVectorStoreKind; readonly metric: RagVectorStoreMetric; private readonly entries; constructor(fingerprint: string, dims: number); upsert(chunk: RagChunkInput, vector: Float64Array, fingerprint?: string): void; upsertMany(entries: Iterable): void; search(query: string, queryVector: Float64Array, options?: RetrieveOptions, fingerprint?: string): RetrieveResult; snapshot(): RagVectorStoreSnapshot; clear(): void; close(): void; private assertCompatible; } export declare class AsyncEmbeddingRagIndex { private readonly embedder; private readonly store; private readonly fingerprint; private readonly queryVectorByText; private constructor(); static create(chunks: Iterable | undefined, options: { readonly embedder: AsyncEmbedder; readonly metric?: 'cosine'; }): Promise; get embedderId(): string; get embeddingFingerprint(): string; add(chunk: RagChunkInput): Promise; retrieve(query: string, options?: RetrieveOptions): Promise; } /** Adapt an {@link EmbeddingRagIndex} to the `RagContractRetriever` seam consumed by eval. */ export declare function createEmbeddingRetriever(index: EmbeddingRagIndex): RagContractRetriever; export declare function createAsyncEmbeddingRetriever(index: AsyncEmbeddingRagIndex): AsyncRagContractRetriever; export declare function embedderFingerprint(embedder: Pick, metric: 'cosine'): string; export declare function embeddingFingerprint(input: EmbeddingFingerprintInput): string;