/** * Puppeteer-based page renderer. * * Renders PDF pages inside a headless Chromium browser using the same * `document.createElement('canvas')` + pdf.js pipeline that pdf-decomposer's * BROWSER code path uses (and that flexpdf relies on at scale). The OOM that * node-canvas's `Context2d::GetImageData` hits never occurs here because the * canvas backing store lives inside Chromium, where canvas memory is managed * by the browser engine. * * Trade-offs vs the default node-canvas renderer: * - Cold-start cost: spawning Chromium adds ~500-2000ms per PdfDecomposer * lifetime (one-time, not per page). * - Disk footprint: requires Chromium (~300 MB) — already present in the * drone-jobs Cloud Function image via puppeteer. * - Reliability: handles 100+ page documents without external-memory OOM. * * PDF byte transfer: * The PDF is served to Chromium over a tiny localhost HTTP server bound to * a random port on 127.0.0.1. Earlier attempts passed the bytes through * `page.evaluate` as base64, but that round-trips 100-200 MB strings * through CDP's JSON serializer and crashes the tab with OOM. Serving via * HTTP lets pdf.js fetch the file using the same code path it uses in * production browsers, with zero protocol-level overhead. * * Failure modes designed around: * - "Blank image" issue from past attempts: caused by `page.screenshot()` * capturing viewport-only or by racing pdf.js's async render. This renderer * uses `canvas.toDataURL()` inside `page.evaluate`, after `await * renderTask.promise`. No viewport size dependence; no race condition. * - pdf.js worker memory leakage (mozilla/pdf.js#10730): mitigated by calling * `pageProxy.cleanup()` after every render and `doc.destroy()` on dispose. */ import type { PdfPageRenderResult, PdfPageRenderOptions, PdfPageRenderer } from '../types/renderer.types.js'; export interface PuppeteerRendererOptions { /** * Path to a Chromium/Chrome executable. Defaults to puppeteer's bundled * Chromium. Useful for Cloud Functions where the runtime image already * ships with Chromium at a known path. */ executablePath?: string; /** * Extra `--flag` arguments for the Chromium launch. The renderer always * passes `--no-sandbox --disable-setuid-sandbox` for container compat. */ launchArgs?: string[]; /** * Verbose console logging from the browser context. Useful for debugging * blank-screenshot or render-failure scenarios. */ debug?: boolean; } export declare class PuppeteerRenderer implements PdfPageRenderer { private readonly options; private browser; private page; private server; private serverUrl; private initialized; private numPages; constructor(options?: PuppeteerRendererOptions); initialize(pdfData: Uint8Array): Promise; renderPage(pageNumber: number, opts?: PdfPageRenderOptions): Promise; dispose(): Promise; /** * Spin up a localhost HTTP server that exposes the PDF + pdf.js worker so * the headless Chromium can fetch them as same-origin resources. */ private startServer; /** * Load puppeteer dynamically so it stays an optional dependency. Returns * null if puppeteer isn't installed in the consumer's project. */ private loadPuppeteer; /** * Locate and read pdfjs-dist's legacy build from the consumer's * node_modules. Uses CommonJS `require.resolve` directly — pdf-decomposer's * tsconfig targets `module: commonjs`, so `require` is module-scoped and * available. */ private readBundledPdfJs; /** * Optional pdf.js worker file. If pdfjs-dist's worker build isn't reachable * we return an empty string and pdf.js falls back to its "fake worker" * (main-thread) mode. Slower but still correct. */ private readBundledPdfJsWorker; /** Internal-use helper for tests/diagnostics. */ get _pageCount(): number; } //# sourceMappingURL=PuppeteerRenderer.d.ts.map