import type { DocumentInitParameters, PDFPageProxy, TextItem } from 'pdfjs-dist/types/src/display/api.js'; import type { RequireExactlyOne } from 'type-fest'; export type { DocumentInitParameters } from 'pdfjs-dist/types/src/display/api.js'; /** A single page within a PDF file. */ export type PdfPage = { lines: string[]; }; /** Progress updates sent by the `pdfjs-dist` dependency. */ export type PdfProgressData = { loaded: number; total: number; }; /** * Same as the built-in Partial type but also allows each property to be undefined. Compatible with * PartialAndUndefined from @augment-vir/common. */ export type PartialWithUndefined = { [Prop in keyof T]?: T[Prop] | undefined; }; /** All options for reading pdf text to function. Most are optional. */ export type ReadPdfTextParams = PartialWithUndefined<{ /** Password used to open a PDF that's password protected. */ password: string; /** This callback will be periodically called while PDF reading is in progress. */ progressCallback: (progressData: PdfProgressData) => void; /** * Set this as an absolute path to the `pdfjs-dist` directory in your `node_modules` directory. * This is not required for proper operation, but may help with a warning like the following: * * Warning: fetchStandardFontData: failed to fetch file "" with * "UnknownErrorException: The standard font "baseUrl" parameter must be specified, ensure that * the "standardFontDataUrl" API parameter is provided.". * * For more details see https://github.com/mozilla/pdf.js/issues/4244 * * Example: /home/ubuntu/this-repo/node_modules/pdfjs-dist */ pathToPdfJsDistNodeModule: string; /** * All options that the Mozilla's `pdfjs-dist` package supports. This will override any options * that this package passes to `pdfjs-dist`. */ options: Partial>; }> & RequireExactlyOne<{ /** File path to the PDF file to read. */ filePath: NonNullable; /** URL to the PDF. */ url: NonNullable; /** PDF file data that has already been read from a PDF file. */ data: NonNullable; }>; /** * Read a PDF and convert it into lines of text. * * If a URL is used to fetch the PDF data a standard XMLHttpRequest(XHR) is used, which means it * must follow the same origin rules that any XHR does e.g. No cross domain requests without CORS. */ export declare function readPdfPages({ data, filePath, password, pathToPdfJsDistNodeModule, progressCallback, url, options, }: ReadPdfTextParams): Promise; /** Reads a PDF into a single string. */ export declare function readPdfText(params: ReadPdfTextParams): Promise; /** Combine all PDF pages into a single string. */ export declare function combinePagesIntoSingleString(pages: PdfPage[]): string; /** Parse a single PDF page. */ export declare function parsePage(pdfPage: PDFPageProxy): Promise; /** * Parses individual text items generated by pdf.js This allows lower level control of what actually * gets parsed. For example, a consumer of this function may remove entire sections of the pdf text * prior to passing items in here. See parsePage function above for example usage. * * @param pdfItems An array of TextItem items. */ export declare function parsePageItems(pdfItems: TextItem[]): PdfPage;