/** * PDF text extraction. * * Wraps `unpdf` (pure-ESM, zero native deps) behind a narrow, well-typed * boundary so the Read tool never touches pdfjs directly. Swapping the * backend later is a one-file change. * * Responsibilities: * - Parse the caller's `pages` spec and clamp it to the document and * the per-call page cap. * - Extract per-page text. * - Detect "no extractable text" (scanned / image-only PDFs) and * return a structured signal rather than silently-empty output. * - Render the extracted text with `[Page N]` markers so the caller * can line-number it exactly like any other file content. * * Pure-ish: does no filesystem I/O. Callers are expected to have * already loaded the PDF bytes. */ export interface PdfExtractionRequest { /** PDF bytes. Accepts a Node Buffer or any Uint8Array. */ data: Buffer | Uint8Array; /** * Page spec: `"N"`, `"N-M"`, or undefined. When undefined, extracts * the first `maxPages` pages starting at page 1. */ pagesSpec?: string | undefined; /** Upper bound on how many pages may be extracted in one call. */ maxPages?: number; } export interface PdfExtractionOk { kind: 'ok'; totalPages: number; /** First page extracted (1-based, inclusive). */ firstPage: number; /** Last page extracted (1-based, inclusive). */ lastPage: number; /** Per-page text. `pages[i].pageNumber` is 1-based. */ pages: Array<{ pageNumber: number; text: string; }>; /** * Full rendered text with `[Page N]` markers separating pages, ready * to be handed to the same line-numbering pipeline Read uses for * plain text files. */ rendered: string; } export interface PdfExtractionEmpty { kind: 'empty'; totalPages: number; firstPage: number; lastPage: number; message: string; } export interface PdfExtractionInvalidRange { kind: 'invalid-range'; totalPages: number; message: string; } export interface PdfExtractionError { kind: 'error'; message: string; } export type PdfExtractionResult = PdfExtractionOk | PdfExtractionEmpty | PdfExtractionInvalidRange | PdfExtractionError; /** * Default cap on pages extracted per call. Matches the advertised * schema default for Read's `pages` param ("max 20 pages per request"). */ export declare const DEFAULT_MAX_PAGES = 20; export type PagesSpecResult = { ok: true; first: number; last: number; } | { ok: false; message: string; }; /** * Parse a pages-spec string against a document's page count and the * per-call cap. Enforces: * - Format: `"N"` or `"N-M"` (base 10, whitespace tolerant) * - 1 <= first <= last <= totalPages * - (last - first + 1) <= maxPages * * Returns `{ ok: false }` with an actionable message on any failure. */ export declare function parsePagesSpec(spec: string, totalPages: number, maxPages: number): PagesSpecResult; /** * Extract text from a PDF buffer. Never throws — all failure modes are * returned as structured results so the caller can render them as * tool-content messages. */ export declare function extractPdfText(req: PdfExtractionRequest): Promise; //# sourceMappingURL=pdf-extractor.d.ts.map