import { Tokenizer, Token } from './' import { BS } from './bs' import { Parser } from './parser' type OpenElement = { parser: Parser.IParser parent?: OpenElement ctx: any } /** * Defines a document parser. * @public */ export class DocumentParser { private openElement?: OpenElement constructor(private tokenizer: Tokenizer) {} /** * Defines how a set of nodes should be parsed * @public * @param path - a node name of a path to a node (similar to file path, this is not a xpath) * @param parser - a parser to be applied */ on(path: string, parser: Parser.IParser) { if (this.openElement) { if (this.openElement.parser instanceof Parser.Switch) { this.openElement.parser.case(Parser.PassThrough.on(path, parser)) } else { this.openElement.parser = Parser.Switch.create() .case(this.openElement.parser) .case(Parser.PassThrough.on(path, parser)) } } else { this.openElement = { parser: Parser.PassThrough.on(path, parser), ctx: undefined, } } return this.openElement.parser } /** * Defines, as for `on()` how the whole document should be parsed * @param parser - parser to be applied */ onRoot(parser: Parser.IParser) { this.openElement = { parser: { onChild(_: Token.StartTag) { return parser }, }, ctx: undefined, } } /** * Writes xml chunks to the parser * @remarks * Before writing a new chunk the current one should be exhausted (all tokens have been processed, this means {@link DocumentParser.next} has returned `undefined`) * @param chunk- byte sequence (utf8) */ write(chunk: BS | string | Buffer | Uint8Array) { this.tokenizer.write(chunk) } /** * Getting the next element */ next(): any { let token: Token.Token | undefined while (this.openElement && (token = this.tokenizer.nextToken())) { if (token instanceof Token.StartTag) { const ctx = this.resolveTag(this.openElement, token) if (ctx) { // selfClosing return ctx } } else if (token instanceof Token.EndTag) { const ctx = this.openElement.parser.onEnd && this.openElement.parser.onEnd(this.openElement.ctx, this.openElement.parent?.ctx) this.openElement = this.openElement.parent if (ctx) { return ctx } } else if (token instanceof Token.Text || token instanceof Token.CDATA) { this.openElement.parser.onText && this.openElement.parser.onText(token, this.openElement.ctx) } } return undefined } private resolveTag(openElement: OpenElement, startTag: Token.StartTag) { const parser = openElement.parser.onChild ? openElement.parser.onChild(startTag) : openElement.parser if (parser) { const ctx = parser.onStart && parser.onStart(startTag, openElement.ctx) if (!startTag.selfClosing && (ctx === false || parser.skipChildNodes)) { this.tokenizer.skipChildNodes() } if (startTag.selfClosing) { return parser.onEnd && parser.onEnd(ctx, openElement.ctx) } else { this.openElement = { parent: openElement, parser, ctx: ctx || openElement.ctx, } } } else if (!startTag.selfClosing) { this.tokenizer.skipChildNodes(true) } } }