/** * PDF image extraction. * * Extracts images from PDF pages including: * - Inline images (BI/ID/EI operators) * - XObject images (/Subtype /Image) * - Images with various color spaces and filters * * Supported image formats: * - JPEG (DCTDecode) — extracted as-is * - JPEG2000 (JPXDecode) — extracted as-is * - Raw/Flate-compressed pixel data — extracted with metadata * - CCITT fax — extracted as-is * * @see PDF Reference 1.7, §4.8 - Images */ import type { PdfDocument } from "./pdf-document.js"; import type { PdfDictValue } from "./pdf-parser.js"; /** * An extracted image from a PDF page. */ export interface ExtractedImage { /** Image index within the page (0-based) */ index: number; /** Image width in pixels */ width: number; /** Image height in pixels */ height: number; /** Bits per component */ bitsPerComponent: number; /** Color space name */ colorSpace: string; /** Number of color components (1=gray, 3=RGB, 4=CMYK) */ components: number; /** * Image data format: * - "jpeg" — raw JPEG data (can be written directly as .jpg) * - "jpx" — JPEG 2000 data * - "raw" — raw pixel data (RGB/CMYK/Gray, decompressed) * - "ccitt" — CCITT fax compressed data */ format: "jpeg" | "jpx" | "raw" | "ccitt" | "jbig2"; /** The image data */ data: Uint8Array; /** Alpha mask data (if present) — same dimensions, 1 component, 8 bits */ alphaMask: Uint8Array | null; /** Filter name from the original stream */ filter: string; /** XObject name (if it was a named XObject) */ name: string; } /** * Extract all images from a PDF page. */ export declare function extractImagesFromPage(pageDict: PdfDictValue, doc: PdfDocument): ExtractedImage[];