/** * PDF tokenizer / lexer. * * Scans raw PDF bytes and produces a stream of typed tokens. * Handles all PDF token types: numbers, strings (literal and hex), * names, booleans, null, keywords, and delimiters. * * @see PDF Reference 1.7, ยง3.1 - Lexical Conventions */ export declare const enum TokenType { /** Integer or real number */ Number = 0, /** Literal string delimited by parentheses `(...)` */ LiteralString = 1, /** Hex string delimited by angle brackets `<...>` */ HexString = 2, /** Name object starting with `/` */ Name = 3, /** Boolean `true` or `false` */ Boolean = 4, /** The `null` keyword */ Null = 5, /** Keywords: obj, endobj, stream, endstream, xref, trailer, startxref, R */ Keyword = 6, /** `<<` dict begin */ DictBegin = 7, /** `>>` dict end */ DictEnd = 8, /** `[` array begin */ ArrayBegin = 9, /** `]` array end */ ArrayEnd = 10, /** End of input */ EOF = 11 } export interface Token { type: TokenType; /** Numeric value (for Number tokens) */ numValue?: number; /** String value (for String, Name, Keyword, Boolean tokens) */ strValue?: string; /** Raw bytes (for LiteralString and HexString tokens) */ rawBytes?: Uint8Array; /** Boolean value (for Boolean tokens) */ boolValue?: boolean; /** Byte offset where this token starts */ offset: number; } /** * Byte-level PDF tokenizer. * * Provides a `next()` method that returns the next token from the input. * The tokenizer maintains a mutable position pointer that advances through * the input bytes. */ export declare class PdfTokenizer { private data; pos: number; constructor(data: Uint8Array, offset?: number); /** Get current position */ get position(): number; /** Set current position */ set position(offset: number); /** Get the underlying data */ get bytes(): Uint8Array; /** Peek at the byte at the current position without consuming it */ peek(): number; /** Read the next token */ next(): Token; skipWhitespaceAndComments(): void; private readLiteralString; private readHexString; private readName; private readNumber; private readKeyword; /** * Search forward for a byte sequence starting from the current position. * Returns the offset where the sequence starts, or -1 if not found. * Does NOT advance the position. */ findSequence(seq: Uint8Array, from?: number): number; /** * Search backward for a byte sequence starting from `from` (or end of data). * Returns the offset where the sequence starts, or -1 if not found. */ findSequenceBackward(seq: Uint8Array, from?: number): number; /** * Read a line of text at the current position. Advances past the line ending. */ readLine(): string; /** * Extract a slice of the underlying data. */ slice(start: number, end: number): Uint8Array; /** * Read the stream content following a `stream` keyword. * The tokenizer should be positioned right after the `stream` keyword. * Returns the raw stream bytes (between stream\n and endstream). */ readStreamContent(length: number): Uint8Array; }