/** * SAX XML Parser * * High-performance streaming SAX parser for XML. * Zero external dependencies. Optimized for common XML patterns. * * Migrated from the excel module's parse-sax.ts into the standalone XML module. * Enhancements over the original: * - Uses @xml/types for event types (SaxTag, SaxEvent, SaxHandlers) * - Uses @xml/errors for parse errors (XmlParseError) * - Exposes CDATA, comment, and processing-instruction event handlers * - Provides both callback API and async-generator API * * Based on XML 1.0 specification with fast-path optimizations for ASCII. */ import type { SaxEventAny, SaxOptions, SaxTag } from "./types.js"; /** * Streaming SAX XML parser. * * Feed XML text via {@link write}() in chunks; events fire synchronously * as elements are encountered. Call {@link close}() when done. * * @example * ```ts * const parser = new SaxParser(); * parser.on("opentag", tag => console.log("open:", tag.name)); * parser.on("text", text => console.log("text:", text)); * parser.on("closetag", tag => console.log("close:", tag.name)); * parser.write('hello'); * parser.close(); * ``` */ declare class SaxParser { private trackPosition; private fileName?; private fragment; private xmlns; private maxDepth; private maxEntityExpansions; private invalidCharHandling; private _entityExpansionCount; private _nsStack; private state; private chunk; private i; private prevI; private text; private name; private q; private tags; private tag; private attribList; private entity; private entityReturnState; private openWakaBang; private sawRoot; private closedRoot; private carriedFromPrevious?; private _bomStripped; private _closed; private reportedTextBeforeRoot; private reportedTextAfterRoot; line: number; column: number; private positionAtNewLine; private chunkPosition; ENTITIES: Record; private _handlers; constructor(options?: SaxOptions); get closed(): boolean; get position(): number; private _init; on(name: "opentag", handler: (tag: SaxTag) => void): void; on(name: "text", handler: (text: string) => void): void; on(name: "closetag", handler: (tag: SaxTag) => void): void; on(name: "cdata", handler: (text: string) => void): void; on(name: "comment", handler: (text: string) => void): void; on(name: "pi", handler: (target: string, body: string) => void): void; on(name: "error", handler: (err: Error) => void): void; off(name: string): void; private makeError; fail(message: string): this; write(chunk: string | null): this; close(): this; /** * Handle an invalid XML character according to the configured strategy. * * Used by `handleTextInRoot()` fast path which manages its own text accumulation * and cannot use the `getCode()` loop approach. * * - `"error"`: call `fail()` and return the original code. * - `"skip"`: return `REPLACEMENT_CHAR` as a sentinel (caller handles skip). * - `"replace"`: return `REPLACEMENT_CHAR`. * * Note: For `getCode()`, invalid char handling is inlined to avoid recursion. * * @param code - The invalid character code point. * @param kind - Optional description (e.g. "lone surrogate") for error messages. * @returns The code point to use. */ private handleInvalidChar; /** * Handle an invalid character inside the `handleTextInRoot()` fast loop. * * Unlike `handleInvalidChar()` (which returns a code point for `getCode()`), * this method manages the text accumulation state (`this.text`, `start`) that * the fast text loop relies on. * * - `"error"`: call `fail()`, leave text accumulation unchanged (char stays in output). * - `"skip"`: flush text up to the invalid char, skip it, return new `start`. * - `"replace"`: flush text up to the invalid char, append U+FFFD, return new `start`. * * @returns The updated `start` index for the text accumulation loop. */ private handleInvalidCharInText; /** * Handle an invalid character inside `sAttribValueQuoted()`. * * Same pattern as `handleInvalidCharInText()` but for attribute value * accumulation (always uses `this.text`, no conditional handler check). * * @returns The updated `start` index. */ private handleInvalidCharInAttr; private getCode; private unget; private processState; private sText; private handleTextInRoot; private handleTextOutsideRoot; private sOpenWaka; private sOpenWakaBang; private sOpenTag; private sOpenTagSlash; private sAttrib; private sAttribName; private sAttribNameSawWhite; private sAttribValue; private sAttribValueQuoted; private sAttribValueClosed; private sCloseTag; private sCloseTagSawWhite; private sComment; private sCommentEnding; private sCommentEnded; private sCData; private sCDataEnding; private sCDataEnding2; private sPI; private sPIEnding; private sDoctype; private sDoctypeQuote; private sDoctypeDTD; private sDoctypeDTDQuoted; private sEntity; private parseEntity; private skipSpaces; /** Split "prefix:local" into [prefix, local]. Returns ["", name] if no prefix. */ private splitQName; /** Look up the URI for a namespace prefix by walking the stack top-down. */ private resolveNs; /** Extract xmlns declarations from tag attributes and populate tag namespace fields. */ private applyNamespaces; private openTag; private openSelfClosingTag; private closeTag; private end; } /** * Parse an async-iterable of XML chunks as a stream of SAX event batches. * * Yields an array of {@link SaxEvent} per input chunk. This is the * primary integration point for streaming XML parsing. * * @param iterable - Async iterable of string or Uint8Array chunks. * @param options - Parser options. * * @example * ```ts * for await (const events of parseSax(stream)) { * for (const event of events) { * if (event.eventType === "opentag") console.log(event.value.name); * } * } * ``` */ declare function parseSax(iterable: AsyncIterable | Iterable, options?: SaxOptions): AsyncGenerator; /** * High-performance direct SAX streaming: feed an async-iterable to a * pre-configured SaxParser **without** creating intermediate event objects. * * The caller registers callbacks on the parser before calling this function. * This eliminates per-event `{ eventType, value }` object allocation and the * overhead of the async generator protocol, making it significantly faster * for hot inner loops like worksheet/shared-string parsing. * * @param parser - A SaxParser with handlers already registered via `.on()`. * @param iterable - Async iterable of string or Uint8Array chunks. * * @example * ```ts * const parser = new SaxParser(); * parser.on("opentag", tag => handleOpen(tag)); * parser.on("text", text => handleText(text)); * parser.on("closetag", tag => handleClose(tag)); * await saxStream(parser, stream); * ``` */ declare function saxStream(parser: SaxParser, iterable: AsyncIterable | Iterable): Promise; export { SaxParser, parseSax, saxStream };