/**
 * PDF content stream interpreter for text extraction.
 *
 * Implements a full PDF graphics state machine that processes content stream
 * operators to extract positioned text fragments. These fragments are then
 * assembled into readable text by the text reconstruction module.
 *
 * Supported operator categories:
 * - Text state: Tf, Tc, Tw, Tz, TL, Ts, Tr
 * - Text positioning: Td, TD, Tm, T*
 * - Text showing: Tj, TJ, ', "
 * - Text objects: BT, ET
 * - Graphics state: q, Q, cm, gs, i, M, ri, W, W*
 * - Color: CS, cs, SC, sc, SCN, scn
 * - Marked content: BDC, BMC, EMC, MP, DP
 * - Type3 glyph: d0, d1
 * - Shading: sh
 * - Inline images: BI/ID/EI
 * - XObject invocation: Do (for form XObjects containing text)
 *
 * @see PDF Reference 1.7, Chapter 5 - Text
 * @see PDF Reference 1.7, Chapter 4 - Graphics
 */
import type { PdfDocument } from "./pdf-document.js";
import type { PdfDictValue } from "./pdf-parser.js";
/**
 * A text fragment extracted from a PDF page.
 * Contains the text string and its position in page coordinates.
 */
export interface TextFragment {
    /** The extracted text */
    text: string;
    /** X position in page coordinates (points, origin = bottom-left) */
    x: number;
    /** Y position in page coordinates */
    y: number;
    /** Font size in points */
    fontSize: number;
    /** Font name */
    fontName: string;
    /** Width of the text in points */
    width: number;
    /** Character spacing */
    charSpacing: number;
    /** Word spacing */
    wordSpacing: number;
    /** Horizontal scaling factor (100 = normal) */
    horizontalScaling: number;
    /** Whether the text is vertical (WMode=1) */
    isVertical: boolean;
    /** Whether the text is right-to-left (Arabic, Hebrew, etc.) */
    isRtl: boolean;
}
/**
 * Extract text fragments from a page's content stream(s).
 */
export declare function extractTextFromPage(pageDict: PdfDictValue, doc: PdfDocument): TextFragment[];