/** * CMap parser for PDF text extraction. * * Parses /ToUnicode CMap programs to build character code → Unicode mappings. * This is essential for extracting text from PDFs that use CIDFonts or * custom encodings. * * Supports: * - beginbfchar / endbfchar (single character mappings) * - beginbfrange / endbfrange (range mappings, including array form) * - begincodespacerange / endcodespacerange * - Multi-byte character codes (1-4 bytes) * - UTF-16BE encoded target strings (including surrogate pairs) * * @see PDF Reference 1.7, §5.9 - ToUnicode CMaps * @see Adobe Technical Note #5411 - CMap Resources */ /** * A parsed CMap that maps character codes to Unicode strings. */ export declare class CMap { private codeSpaceRanges; private bfChars; private bfRanges; /** Number of bytes per character code (detected from codespace ranges) */ bytesPerCode: number; constructor(); /** * Look up the Unicode string for a character code. * Uses binary search over sorted bfRanges for efficient lookup. */ lookup(code: number): string | undefined; /** * Add a code space range. */ addCodeSpaceRange(low: number, high: number, bytes: number): void; /** * Add a bfchar mapping. */ addBfChar(code: number, unicode: string): void; /** * Add a bfrange mapping. */ addBfRange(low: number, high: number, mapping: string | string[]): void; /** * Sort bfRanges by low value for binary search. * Should be called after all ranges have been added. */ sortRanges(): void; /** * Determine the code length (in bytes) for a given first byte, * using the codespace ranges. When multiple ranges match (e.g. a 1-byte * range covering 0x00-0xFF and a 2-byte range whose first byte overlaps), * returns the longest match per the PDF spec's greedy matching rule. * Falls back to bytesPerCode if no range matches. */ getCodeLength(firstByte: number): number; /** * Check if this CMap has any mappings. */ get isEmpty(): boolean; /** * Check if this CMap has codespace ranges defined. */ get hasCodeSpaceRanges(): boolean; } /** * Parse a CMap program (typically from a /ToUnicode stream). */ export declare function parseCMap(data: Uint8Array): CMap;