/// /// import { PdfDocument } from '../core/PdfDocument.js'; import type { PdfDecomposerOptions, PdfDecomposerState, PdfDecomposerError } from '../types/decomposer.types.js'; import type { DecomposeResult } from '../types/decompose.types.js'; import type { DataOptions, DataResult } from '../types/data.types.js'; import type { SliceOptions, SliceResult } from '../types/slice.types.js'; import type { PdfPageRenderer } from '../types/renderer.types.js'; import '../utils/DOMMatrixPolyfill.js'; import type { ScreenshotOptions, ScreenshotResult } from '../types/screenshot.types.js'; /** * Construction options for PdfDecomposer. * * `renderer` is optional. When omitted, screenshots use the default * node-canvas (Node) / HTMLCanvasElement (browser) path. When provided, * `screenshot()` and `data()` (when generating images) delegate page * rasterization to the renderer. Useful for swapping in PuppeteerRenderer to * avoid node-canvas's GetImageData OOM on large PDFs. * * Existing consumers passing no options continue to work unchanged. */ export interface PdfDecomposerConstructorOptions { renderer?: PdfPageRenderer; } /** * Enhanced PDF Decomposer Class - Load Once, Use Many Times * * Provides a unified interface for PDF processing operations: * - Load PDF once, use multiple times * - Decompose PDF to extract text, images, and structure * - Generate page screenshots * - Memory efficient and universal (Node.js + Browser) * * @example * ```typescript * // Basic usage * const pdf = new PdfDecomposer(buffer) * await pdf.initialize() * * // Multiple operations on same PDF * const pages = await pdf.decompose({ elementComposer: true }) * const screenshots = await pdf.screenshot({ imageWidth: 1024 }) * * // Access PDF info * console.log(`Pages: ${pdf.numPages}, Fingerprint: ${pdf.fingerprint}`) * ``` */ export declare class PdfDecomposer { private pdfDocument; private buffer; private isInitialized; private isDisposed; private readonly renderer; private observable; private currentProgress; decomposeError: Array<(error: PdfDecomposerError) => void>; get pdfDoc(): PdfDocument; /** * Create a new PDF decomposer instance. * * @param input PDF buffer (Buffer, ArrayBuffer, or Uint8Array) * @param options Optional construction options. `renderer` swaps the * default node-canvas screenshot path for a pluggable renderer (e.g. * PuppeteerRenderer for large PDFs). */ constructor(input: Buffer | ArrayBuffer | Uint8Array, options?: PdfDecomposerConstructorOptions); /** * Factory method to create and initialize PDF decomposer in one step. * * @param input PDF buffer (Buffer, ArrayBuffer, or Uint8Array) * @param options See constructor. * @returns Promise resolving to initialized PdfDecomposer instance */ static create(input: Buffer | ArrayBuffer | Uint8Array, options?: PdfDecomposerConstructorOptions): Promise; /** * Initialize the PDF document by loading and processing it * This must be called before using decompose() or screenshot() */ initialize(): Promise; /** * Decompose PDF to extract content and structure * @param options Optional configuration for decomposition * @returns Promise resolving to array of PdfPageContent or PdfData objects based on options.pdfData */ decompose(options?: PdfDecomposerOptions): Promise; /** * Generate screenshots for PDF pages. * * When a custom renderer was passed in the constructor, rasterization is * delegated to it (e.g. PuppeteerRenderer renders inside Chromium). When no * renderer is set, the default node-canvas / browser canvas path runs. * * @param options Optional configuration for screenshot generation * @returns Promise resolving to ScreenshotResult object */ screenshot(options?: ScreenshotOptions): Promise; /** * Generate pdfData structure compatible with pwa-admin * @param options Optional configuration for pdfData generation * @returns Promise resolving to DataResult with pdfData and pages */ data(options?: DataOptions): Promise; /** * Slice PDF to include only specified number of pages and replace internal document * @param options Configuration for slicing operation * @returns Promise resolving to SliceResult with sliced PDF data and metadata */ slice(options?: SliceOptions): Promise; /** * Replace internal PDF document with new buffer and reinitialize * @param newBuffer New PDF buffer to replace current document * @private */ private replaceInternalDocument; /** * Get total number of pages in the PDF */ get numPages(): number; /** * Get PDF document fingerprint for caching */ get fingerprint(): string | undefined; /** * Check if PDF is initialized */ get initialized(): boolean; /** * Check if dispose() has been called on this instance. */ get disposed(): boolean; /** * Release all pdf.js resources held by this decomposer and terminate the * underlying PDF.js document worker. After dispose(), the instance is * unusable; create a new one if more work is needed. * * Use this instead of nulling the reference and re-creating the instance — * that pattern can't release pdf.js worker state, which is what causes the * "RSS keeps climbing across pages" pattern in long-running consumers. */ dispose(): Promise; /** * Release pdf.js worker-side state for a specific page or range while * keeping the decomposer instance alive and usable. Useful for consumers * that process pages in a loop and want bounded RSS growth. * * Pages are rebuilt automatically on next access; this is a hint to drop * worker-side caches, not a hard delete. */ releasePages(startPage?: number, endPage?: number): Promise; /** * Ensure PDF is initialized before operations */ private ensureInitialized; /** * Update progress with message */ private update; /** * Subscribe to progress updates */ subscribe(fn: (state: PdfDecomposerState) => void): void; /** * Notify progress observers */ private notify; /** * Notify decompose error observers */ private notifyDecomposeError; /** * Get PDF and page fingerprints */ getFingerprints(): Promise<{ pdfHash: any; pageHashes: string[]; total: any; }>; } //# sourceMappingURL=PdfDecomposer.d.ts.map