/** * Types for the optional PDF-context module. * * A PdfIndex is built once at the start of the polish pipeline (when * --context-pdf is supplied), then queried per LLM call to inject small * retrieval-selected excerpts. The index lives entirely in-memory after load; * the on-disk cache only avoids re-extracting unchanged PDFs. */ export type PdfChunk = { id: string; source: string; section?: string; text: string; /** Lowercased alphanumeric tokens (length >= 2, stopwords stripped). */ tokens: string[]; /** Total token count — equals tokens.length. Cached for BM25 dl factor. */ length: number; }; export type PdfIndex = { chunks: PdfChunk[]; /** term -> document frequency across chunks */ df: Map; /** average chunk length (in tokens) */ avgdl: number; /** total chunks (== chunks.length) */ totalDocs: number; /** PDF sources that contributed at least one chunk (for logging) */ sources: string[]; }; export type RetrievedExcerpts = { excerpts: string[]; sources: string[]; /** Total characters of excerpt text returned (excluding header tags). */ totalChars: number; }; //# sourceMappingURL=types.d.ts.map