import { BS } from './bs' export type Attribute = { name: BS; value: BS } export type Ns = { name?: BS; uriString: string; uri: BS } /** * Defininition of the tokens emmited by the parser * @public */ export namespace Token { /** @internal */ export type Token = Token.EndTag | Token.StartTag | Token.Text | Token.CDATA | Token.Comment /** * Represent a start-tag, ie, `` * @public */ export class StartTag { constructor(readonly name: BS, atts: Attribute[], ns: Ns[], selfclosing?: true) { if (atts.length) this.atts = atts if (ns.length) this.ns = ns if (selfclosing) this.selfClosing = true } /** @internal */ readonly atts?: { name: BS; value: BS }[] /** @internal */ readonly ns?: Ns[] /** self-closing start-tag, like `` */ readonly selfClosing?: true /** * namespace URI * * with ``, aStartTag.namespaceUri === '//uri/x' */ get namespaceUri() { if (this.ns) { const coIndex = this.name.indexOf(0x3a /*:*/) const ns = coIndex > -1 ? this.ns.find(({ name }) => name?.isStartOf(this.name, coIndex)) : this.ns.find(({ name }) => !name) return ns ? ns.uriString : undefined } else { return } } /** * with ``, aStartTag.tagName === 'x:a' */ get tagName() { return this.name.toString() } /** * with ``, aStartTag.localName === 'a' */ get localName() { const coIndex = this.name.indexOf(0x3a /*:*/) return coIndex > -1 ? this.name.subarray(coIndex + 1).toString() : this.name.toString() } /** * with ``, aStartTag.getAttribute('att') === 'value' */ getAttribute(name: string) { return this.getAttributeFQN(BS.create(name)) } /** @internal */ getAttributeFQN(fqName: BS) { const att = this.atts && this.atts.find(({ name }) => name.equals(fqName)) return att && att.value.toString() } /** * with ``, aStartTag.getAttributeNS('//uri/x', 'att') === 'value' */ getAttributeNS(nsUri: string, localName: string) { if (!this.atts) return const localNameBS = BS.create(localName) const ns = this.ns && this.ns.find(({ uriString }) => uriString === nsUri) if (ns && ns.name) { const nsName = ns.name const prefixLen = ns.name.length + 1 const att = this.atts.find(({ name }) => { return ( localNameBS.length + prefixLen === name.length && nsName.every((b, i) => name[i] === b) && localNameBS.every((b, i) => name[i + prefixLen] === b) ) }) return att && att.value.toString() } else { return this.getAttributeFQN(localNameBS) } } /** @internal */ get length() { let length = 1 + this.name.length + 1 if (this.atts) length += this.atts.length + this.atts.reduce((al, { name, value }) => al + name.length + value.length + 3, 0) if (this.ns) length += this.ns.length + this.ns.reduce((al, { name, uri }) => al + (name ? 6 + name.length : 5) + uri.length + 3, 0) if (this.selfClosing) length++ return length } /** @internal */ get bs() { const length = this.length const bs = new BS(length) bs[0] = 0x3c /*<*/ bs.set(this.name, 1) let at = this.name.length + 1 if (this.atts) { this.atts.forEach(({ name, value }) => { bs[at++] = 0x20 /* */ bs.set(name, at) at += name.length bs[at++] = 0x3d /*=*/ bs[at++] = 0x22 /*"*/ bs.set(value, at) at += value.length bs[at++] = 0x22 /*"*/ }) } if (this.ns) { this.ns.forEach(({ name, uri }) => { bs[at++] = 0x20 /* */ bs[at++] = 0x78 /*x*/ bs[at++] = 0x6d /*m*/ bs[at++] = 0x6c /*l*/ bs[at++] = 0x6e /*n*/ bs[at++] = 0x73 /*s*/ if (name) { bs[at++] = 0x3a /*:*/ bs.set(name, at) at += name.length } bs[at++] = 0x3d /*=*/ bs[at++] = 0x22 /*"*/ bs.set(uri, at) at += uri.length bs[at++] = 0x22 /*"*/ }) } if (this.selfClosing) bs[length - 2] = 0x2f /*/*/ bs[length - 1] = 0x3e /*>*/ return bs } /** return this tag as a string (extra space-like character ommited) */ toString() { return this.bs.toString() } } /** * Represent a CDATA node, ie, `` * @public */ export class CDATA { constructor(readonly content: BS) {} /** return text as a string */ get textContent() { return this.content.toString() } /** return this tag as a string */ toString(): string { return `` } } /** * Represent a Text node * @public */ export class Text { constructor(readonly content: BS) {} /** return text as a string, decoding xml entities */ toString(): string { return this.content.toString() } /** return text as a string */ get textContent() { return this.decode() } private decode(): string { let o = 0 let at = 0 let end = -1 const bs = this.content const chunks: (BS | string)[] = [] while ((o = bs.indexOf(0x26 /*&*/, at)) > -1 && (end = bs.indexOf(0x3b /*;*/, o + 1)) > -1) { chunks.push(bs.subarray(at, o)) const entityLength = end - o - 1 const entity = Text.entities.find(e => { return e.bs.length === entityLength && e.bs.every((b, i) => b === bs[o + 1 + i]) }) if (entity) { chunks.push(entity.c) } else if (bs[o + 1] === 0x23 /*#*/) { if (bs[o + 2] === 0x78 /*x*/) { try { chunks.push(String.fromCodePoint(parseInt(bs.subarray(o + 3, end).toString(), 16))) } catch (_) { throw new Error(`Invalid entity codepoint (hex) ${bs.subarray(o, end)}`) } } else { try { chunks.push(String.fromCodePoint(parseInt(bs.subarray(o + 2, end).toString()))) } catch (_) { throw new Error(`Invalid entity codepoint (dec) ${bs.subarray(o, end + 1)}`) } } } else { throw new Error(`Unknown entity ${bs.subarray(o, end)}`) } at = end + 1 } if (chunks.length > 0) { chunks.push(bs.subarray(at, bs.length)) return chunks.join('') } else { return bs.toString() } } /** Supported entities (in addition to hex and dec codepoints) * * @remarks * * Default entities: `&`, `>`, `<`, `"` and `'` */ static entities = [ { c: '&', bs: [0x61, 0x6d, 0x70] }, { c: '<', bs: [0x6c, 0x74] }, { c: '>', bs: [0x67, 0x74] }, { c: '"', bs: [0x71, 0x75, 0x6f, 0x74] }, { c: "'", bs: [0x61, 0x70, 0x6f, 0x73] }, ] } /** * Represent a comment node, ie, `` * @public */ export class Comment { constructor(readonly content: BS) {} toString(): string { return `` } /** return comment as a string */ get textContent() { return this.content.toString() } } /** * Represent a end-tag, ie `` * @public */ export class EndTag { constructor(readonly name: BS) {} toString(): string { return `` } } }