/**
* XmlLexer
* ─────────
* Phase 1 of the XML pipeline: converts a raw XML string into a flat,
* ordered stream of typed tokens.
*
* The parser consumes this token stream — it never touches raw characters.
* This separation keeps each layer testable and replaceable independently.
*
* Token types
* ───────────
* XML_DECL
* DOCTYPE
* OPEN_TAG
* TAG_SELF_CLOSE />
* CLOSE_TAG
* TEXT raw text between tags
* CDATA
* COMMENT
* PI
* EOF
*/
export type TokenType = 'XML_DECL' | 'DOCTYPE' | 'OPEN_TAG' | 'ATTR_NAME' | 'ATTR_VALUE' | 'TAG_END' | 'TAG_SELF_CLOSE' | 'CLOSE_TAG' | 'TEXT' | 'CDATA' | 'COMMENT' | 'PI' | 'EOF';
export interface Token {
type: TokenType;
value: string;
line: number;
col: number;
/** For ATTR_VALUE: the raw (un-decoded) value */
raw?: string;
}
export interface LexerOptions {
/** Maximum number of attributes allowed on a single element (default: 256) */
maxAttributes?: number;
/** Maximum text node character length (default: 10_000_000) */
maxTextLength?: number;
/** Maximum total node count in the document (default: 1_000_000) */
maxNodeCount?: number;
/** Additional named entity map */
entityMap?: Readonly>;
}
export declare class XmlLexer {
private r;
private opts;
private nodeCount;
constructor(input: string, options?: LexerOptions);
/**
* Tokenize the entire input and return an array of tokens ending with EOF.
* For large documents prefer `tokenizeStream()` which yields tokens lazily.
*/
tokenize(): Token[];
/**
* Generator that yields tokens one at a time.
* The parser drives this generator rather than buffering everything.
*/
tokenizeStream(): Generator;
private _lexXmlDecl;
private _lexDoctype;
private _lexComment;
private _lexCData;
private _lexPI;
private _lexOpenTag;
private _lexCloseTag;
private _lexText;
private _bumpNodeCount;
private _lexName;
private _loc;
private _tok;
}
//# sourceMappingURL=XmlLexer.d.ts.map