/** * PDF font decoder for text extraction. * * Handles the mapping from character codes in content streams to Unicode * strings. Supports all major PDF font types: * * - Type 1 fonts (standard 14 + custom with /Encoding) * - TrueType fonts (with /Encoding and /ToUnicode) * - Type 0 (CID) composite fonts (with /ToUnicode CMap) * - Type 3 fonts (with /Encoding and /ToUnicode) * * @see PDF Reference 1.7, Chapter 5 - Text * @see PDF Reference 1.7, §5.5 - Character Encoding */ import type { CMap } from "./cmap-parser.js"; import type { PdfDocument } from "./pdf-document.js"; import type { PdfDictValue } from "./pdf-parser.js"; /** * A resolved font used for text extraction. */ export interface ResolvedFont { /** Font name */ name: string; /** Font subtype: Type1, TrueType, Type0, Type3, CIDFontType0, CIDFontType2, MMType1 */ subtype: string; /** ToUnicode CMap (if available) */ toUnicode: CMap | null; /** Encoding lookup: char code → unicode string */ encoding: Map; /** Number of bytes per character code (1 for simple fonts, 1-2 for CID fonts) */ bytesPerCode: number; /** Base font name */ baseFontName: string; /** Whether this is a symbolic font */ isSymbolic: boolean; /** Character widths (code → width in thousandths of text space units) */ widths: Map; /** Default width */ defaultWidth: number; /** Missing width for characters not in widths table */ missingWidth: number; /** Whether the font uses Identity-H or Identity-V encoding (codes are Unicode code points) */ isIdentityEncoding: boolean; /** Writing mode: 0 = horizontal, 1 = vertical */ wmode: number; } /** * Resolve a PDF font dictionary into a ResolvedFont for text extraction. */ export declare function resolveFont(fontDict: PdfDictValue, doc: PdfDocument): ResolvedFont; /** * Decode character codes to Unicode text using a resolved font. */ export declare function decodeText(codes: Uint8Array, font: ResolvedFont): string; /** * Get the character width for a given code. */ export declare function getCharWidth(code: number, font: ResolvedFont): number;