/** * PDF Image Extractor - Universal Browser-Compatible Implementation * * Optimized for universal browser compatibility: * - Uses Canvas API for image processing (no Node.js dependencies) * - Browser-native compression via canvas.toBlob() * - Direct PDF.js object processing for maximum compatibility * - Supports multiple image formats and color spaces * - Uses zlib compression in Node.js for efficient PNG creation */ import type { ExtractedImage } from '../types/image.types.js'; export declare class PdfImageExtractor { /** * Memory-safe pixel threshold for auto-scaling * Reduced from 8M to 2M pixels for better memory management on large PDFs */ private static readonly MAX_SAFE_PIXELS; private static readonly MAX_DIMENSION; /** * Maximum images per page to prevent memory explosion */ private static readonly MAX_IMAGES_PER_PAGE; /** * Browser-compatible image processing using Canvas API */ private static imageToBlob; /** * Extract images from a PDF page using comprehensive detection methods * @param page The PDF page to extract images from * @returns Array of extracted images as base64 strings */ extractImagesFromPage(page: any): Promise; /** * Extract images by analyzing PDF operator list * This detects inline images and Do (XObject) operations */ private extractFromOperatorList; /** * Convert Uint8Array to base64 string */ private arrayToBase64; /** * Check if an object is an image based on PDF.js patterns */ private static isImageObject; /** * Create ExtractedImage from PDF.js object using EXACT WORKING LOGIC * Based on successful test-exact-browser-implementation.js */ private static createExtractedImageFromObject; /** * Create extracted image from object with position information from transform matrix */ private static createExtractedImageFromObjectWithTransform; /** * Create RGB data from raw pixel data with proper format detection and auto-scaling * EXACT LOGIC from successful test implementation */ private static createImageDataFromPixelData; /** * Process RGBA pixel data with NO CONVERSION - keep RGBA format * Based on working implementation */ private static processRGBADataNoConversion; /** * Process RGB pixel data with memory-safe scaling support * Based on working implementation */ private static processRGBDataWithScaling; /** * CRC32 calculation */ private static calculateCRC32; /** * Convert Uint8Array to base64 (universal implementation) */ private static uint8ArrayToBase64; /** * Remove duplicate images based on dimensions and content */ private static removeDuplicateImages; /** * Create simple data URL using raw pixel data * Uses zlib compression in Node.js for smaller output and less memory */ private static createSimpleDataUrl; /** * Create compressed PNG using zlib (Node.js only) * Much smaller output size and uses streaming for better memory management */ private static createCompressedPNG; /** * Create minimal but valid uncompressed PNG (zero dependencies approach) * PNG format supports uncompressed data - perfect for universal compatibility */ private static createMinimalValidPNG; /** * Create uncompressed deflate stream manually (zero dependencies) * This creates a valid deflate format without using compression libraries */ private static createUncompressedDeflateStream; /** * Calculate Adler-32 checksum (required for deflate format) */ private static calculateAdler32; /** * Create simple PNG chunk */ private static createSimplePNGChunk; } //# sourceMappingURL=PdfImageExtractor.d.ts.map