/**
 * PDF reader — public API.
 *
 * Provides a high-level, zero-dependency interface for reading PDF files.
 * Supports:
 * - Text extraction with multilingual support (WinAnsi, MacRoman, CJK via
 *   ToUnicode CMap, Identity-H/V, Symbol, ZapfDingbats)
 * - Image extraction (JPEG, JPEG2000, raw/Flate, CCITT, JBIG2)
 * - Annotation extraction (links, comments, highlights, stamps, etc.)
 * - Form field extraction (AcroForm: text inputs, checkboxes, radio buttons, dropdowns)
 * - Metadata reading (Info dictionary + XMP)
 * - Encrypted PDFs:
 *   - RC4 (40-bit and 128-bit) — tested via roundtrip
 *   - AES-128 (V=4, R=4) — implemented, requires external test fixtures
 *   - AES-256 (V=5, R=5) — implemented, requires external test fixtures
 * - Cross-reference tables and streams (PDF 1.5+)
 * - Incremental updates and xref recovery
 *
 * @example Text extraction:
 * ```typescript
 * import { readPdf } from "excelts/pdf";
 *
 * const pdf = await readPdf(pdfBytes);
 * console.log(pdf.text);           // All text from all pages
 * console.log(pdf.pages[0].text);  // Text from page 1
 * ```
 *
 * @example Image extraction:
 * ```typescript
 * const pdf = await readPdf(pdfBytes);
 * for (const image of pdf.pages[0].images) {
 *   console.log(image.format, image.width, image.height);
 *   fs.writeFileSync(`image.${image.format}`, image.data);
 * }
 * ```
 *
 * @example Metadata:
 * ```typescript
 * const pdf = await readPdf(pdfBytes);
 * console.log(pdf.metadata.title);
 * console.log(pdf.metadata.author);
 * console.log(pdf.metadata.pageCount);
 * ```
 *
 * @example Encrypted PDF:
 * ```typescript
 * const pdf = await readPdf(pdfBytes, { password: "secret" });
 * ```
 */
import type { PdfAnnotation } from "./annotation-extractor.js";
import type { PdfBookmark } from "./bookmark-extractor.js";
import type { TextFragment } from "./content-interpreter.js";
import type { PdfFormField } from "./form-extractor.js";
import type { ExtractedImage } from "./image-extractor.js";
import type { PdfMetadata } from "./metadata-reader.js";
import type { PdfTable } from "./table-extractor.js";
import type { TextLine } from "./text-reconstruction.js";
/**
 * Options for reading a PDF.
 */
export interface ReadPdfOptions {
    /**
     * Password for encrypted PDFs.
     * Can be either the user password or owner password.
     * @default ""
     */
    password?: string;
    /**
     * Which pages to extract (1-based).
     * If omitted, all pages are extracted.
     * @example [1, 3, 5] — extract pages 1, 3, and 5
     */
    pages?: number[];
    /**
     * Whether to extract text.
     * @default true
     */
    extractText?: boolean;
    /**
     * Whether to extract images.
     * @default true
     */
    extractImages?: boolean;
    /**
     * Whether to extract metadata.
     * @default true
     */
    extractMetadata?: boolean;
    /**
     * Whether to extract annotations (links, comments, highlights, etc.).
     * @default true
     */
    extractAnnotations?: boolean;
    /**
     * Whether to extract form fields (AcroForm: text inputs, checkboxes, dropdowns, etc.).
     * @default true
     */
    extractFormFields?: boolean;
    /**
     * Whether to extract bookmarks (document outline / table of contents).
     * @default true
     */
    extractBookmarks?: boolean;
    /**
     * Whether to extract tables from pages using text positioning heuristics.
     * Opt-in since table detection is heavier than plain text extraction.
     * @default false
     */
    extractTables?: boolean;
}
/**
 * A single page from a read PDF.
 */
export interface ReadPdfPage {
    /** 1-based page number */
    pageNumber: number;
    /** Extracted text content */
    text: string;
    /** Structured text lines with position information */
    textLines: TextLine[];
    /** Raw text fragments with exact positions */
    textFragments: TextFragment[];
    /** Extracted images */
    images: ExtractedImage[];
    /** Extracted annotations (links, comments, highlights, etc.) */
    annotations: PdfAnnotation[];
    /** Tables detected from text fragment positioning (opt-in via extractTables) */
    tables: PdfTable[];
    /** Page width in points */
    width: number;
    /** Page height in points */
    height: number;
    /** Warnings encountered during extraction (non-fatal errors) */
    warnings: string[];
}
/**
 * Result of reading a PDF.
 */
export interface ReadPdfResult {
    /** All text from all pages concatenated */
    text: string;
    /** Per-page results */
    pages: ReadPdfPage[];
    /** Document metadata */
    metadata: PdfMetadata;
    /** Form fields extracted from AcroForm (document-level) */
    formFields: PdfFormField[];
    /** Bookmarks (document outline) extracted from the outline tree */
    bookmarks: PdfBookmark[];
}
/**
 * Read a PDF file and extract text, images, and metadata.
 * Yields to the event loop between pages to avoid blocking.
 *
 * @param data - Raw PDF file bytes
 * @param options - Extraction options
 * @returns Promise of extracted content
 * @throws {PdfStructureError} If the PDF structure is invalid
 * @throws {PdfError} If decryption fails (wrong password)
 */
export declare function readPdf(data: Uint8Array, options?: ReadPdfOptions): Promise<ReadPdfResult>;