/** * XmlLexer * ───────── * Phase 1 of the XML pipeline: converts a raw XML string into a flat, * ordered stream of typed tokens. * * The parser consumes this token stream — it never touches raw characters. * This separation keeps each layer testable and replaceable independently. * * Token types * ─────────── * XML_DECL * DOCTYPE * OPEN_TAG * TAG_SELF_CLOSE /> * CLOSE_TAG * TEXT raw text between tags * CDATA * COMMENT * PI * EOF */ export type TokenType = 'XML_DECL' | 'DOCTYPE' | 'OPEN_TAG' | 'ATTR_NAME' | 'ATTR_VALUE' | 'TAG_END' | 'TAG_SELF_CLOSE' | 'CLOSE_TAG' | 'TEXT' | 'CDATA' | 'COMMENT' | 'PI' | 'EOF'; export interface Token { type: TokenType; value: string; line: number; col: number; /** For ATTR_VALUE: the raw (un-decoded) value */ raw?: string; } export interface LexerOptions { /** Maximum number of attributes allowed on a single element (default: 256) */ maxAttributes?: number; /** Maximum text node character length (default: 10_000_000) */ maxTextLength?: number; /** Maximum total node count in the document (default: 1_000_000) */ maxNodeCount?: number; /** Additional named entity map */ entityMap?: Readonly>; } export declare class XmlLexer { private r; private opts; private nodeCount; constructor(input: string, options?: LexerOptions); /** * Tokenize the entire input and return an array of tokens ending with EOF. * For large documents prefer `tokenizeStream()` which yields tokens lazily. */ tokenize(): Token[]; /** * Generator that yields tokens one at a time. * The parser drives this generator rather than buffering everything. */ tokenizeStream(): Generator; private _lexXmlDecl; private _lexDoctype; private _lexComment; private _lexCData; private _lexPI; private _lexOpenTag; private _lexCloseTag; private _lexText; private _bumpNodeCount; private _lexName; private _loc; private _tok; } //# sourceMappingURL=XmlLexer.d.ts.map