// HtmlElement represents an HTML document as a data structure on which JQuery-like operations can
// be applied and from which all changes can be reflected via messages to a remote client.
// Copyright (c) 2023 by Thorsten von Eicken, MIT License
import { parse, HTMLElement, TextNode, Node } from "node-html-parser"
// parse an HTML string into an HtmlElement tree, return the root element
export function parse_html(html: string): HtmlArray {
if (!html) return new HtmlArray()
const root = parse(html, {
// the trmi is controversial, but `\n...\n` is a pain without
lowerCaseTagName: true,
comment: false,
fixNestedATags: true,
parseNoneClosedTags: true,
}).removeWhitespace()
//console.log(root.childNodes)
const res = root.childNodes.map(convert).filter(el => el !== null) as HtmlElement[]
return new HtmlArray(...res)
}
// convert an (already parsed) json structure to an HtmlElement tree
export function fromJSON(json: JSON): HtmlElement | null {
if (json == null) return null
if (typeof json == "string") return new HtmlElement("span", json)
if (typeof json != "object") throw new Error("json must be string or object")
let children: HtmlElement[] = []
if ("children" in json) {
if (!Array.isArray(json.children)) throw new Error("children must be an array")
children = json.children.map(fromJSON).filter(el => el != null) as HtmlElement[]
}
const j: any = json // let HtmlElement constructor do the checking...
if (j.tag == "span") return new HtmlElement("span", j.text)
return new HtmlElement(j.tag, j.attrs, children)
}
// HtmlArray is an array of HtmlElement with methods added to select a subset of elements using
// JQuery-style finders and perform mutations using JQuery style methods.
// HtmlArray is called from plain javascript, thus the type annotations of methods are no
// guarantee of the actual types of the arguments.
export class HtmlArray extends Array {
[index: number]: HtmlElement
constructor(...args: any) {
if (args.length == 1 && typeof args[0] == "number") {
super(args[0])
return
}
if (!args.every((a: any) => a instanceof HtmlElement)) throw new Error("HtmlElements required")
args = args.filter((a: HtmlElement) => a !== null)
super(...args)
}
// select all nodes in the subtrees of all elements in the array
// x may be a DOM selector (tag#id.class.class[attr=val][attr=val]) or
// x may be a predicate function (el: HtmlElement => boolean)
select(x: any): HtmlArray {
if (typeof x !== "string") throw new Error("selector must be a string")
return new HtmlArray(...(this.flatMap(el => el.select(x)) as HtmlElement[]))
}
// get/set the element IDs
id(): string[] // get returns an array of IDs
id(id: string): this // set returns the array for chaining
id(id?: any): any {
if (id === undefined) return this.map(el => el.id)
if (typeof id !== "string") throw new Error("id must be a string")
this.map(el => (el.id = id))
return this
}
// add one or multiple classes to the elements, returns the array for chaining
addClass(...classes: string[]): this {
if (!classes.every((c: any) => typeof c == "string")) throw new Error("classes must be strings")
this.forEach(el => el.addClass(...classes))
return this
}
// check whether any of the elements has a class, returns a boolean
hasClass(cls: string): boolean {
return this.some(el => el.hasClass(cls))
}
// remove one or multiple classes from the elements, returns the array for chaining
removeClass(...classes: string[]): this {
if (!classes.every((c: any) => typeof c == "string")) throw new Error("classes must be strings")
this.forEach(el => el.removeClass(...classes))
return this
}
// get an attribute for all elements in the array (different from JQuery, it gets an attr of
// the first element only) or set an attribute on all elements in the array
attr(name: string, value?: string): (AttrValue | undefined)[] | this {
if (typeof name !== "string") throw new Error("attr name must be a string")
if (value === undefined) return this.map(el => el.attr(name))
if (typeof value !== "string") throw new Error("attr value must be a string")
this.forEach(el => el.attr(name, value))
return this
}
// check whether any of the elements has an attribute, returns a boolean (doesn't exist in JQuery)
hasAttr(name: string): boolean {
return this.some(el => el.hasAttr(name))
}
// operations on the inner content of all elements in the array
// get or set the innerText or innerHTML of all elements in the array
text(text?: string): string | this {
if (text === undefined) return this.map(el => el.text()).join("")
if (typeof text !== "string") throw new Error("text must be a string")
this.forEach(el => el.text(text))
return this
}
html(html: string): string[] | this {
//if (html === undefined) return this.map(el => el.html())
if (html == null || typeof html !== "string") throw new Error("html must be a string")
const els = html ? parse_html(html) : []
this.empty()
this.append(...els)
return this
}
// operations on the child nodes of all elements in the array
// append one or multiple elements to the children of all elements in the array
// the elements are appended to the first element in the array, the rest are cloned
append(...children: HtmlElement[]): this {
if (!children.every(c => c instanceof HtmlElement)) throw new Error("HtmlElements required")
this.forEach((el, ix) => {
if (ix == 0) el.append(...children)
else el.append(...children.map(c => c.clone()))
})
return this
}
// prepend one or multiple elements to the children of all elements in the array
// the elements are prepended to the first element in the array, the rest are cloned
prepend(...children: HtmlElement[]): this {
if (!children.every(c => c instanceof HtmlElement)) throw new Error("HtmlElements required")
this.forEach((el, ix) => {
if (ix == 0) el.prepend(...children)
else el.prepend(...children.map(c => c.clone()))
})
return this
}
// remove all children from all elements in the array
empty(): this {
this.forEach(el => el.empty())
return this
}
// operations on all elements of the array
// replace all elements in the array with one or multiple elements
replaceWith(...elements: HtmlElement[]): this {
if (!elements.every(c => c instanceof HtmlElement)) throw new Error("HtmlElements required")
this.forEach((el, ix) => {
if (ix == 0) el.replaceWith(...elements)
else el.replaceWith(...elements.map(el => el.clone()))
})
return this
}
// remove all elements in the array from their parents
remove(): this {
this.forEach(el => el.remove())
return this
}
}
// in HTML attribute values must be strings, but Vue converts string arrays for us, and that's
// a good match with the class attribute
type AttrValue = string | string[] // not sure this is useful: | Record
function assertAttrValue(v: any, what: string) {
if (typeof v == "string") return
if (!Array.isArray(v)) throw new Error(`${what} must be string or array of strings`)
if (!v.every((s: any) => typeof s == "string"))
throw new Error(`${what} must be array of strings`)
}
interface Attrs {
id?: string
class?: string[]
_text?: string
[name: string]: AttrValue | undefined
}
export class HtmlElement {
tag: string
children: HtmlElement[]
attrs: Attrs = {}
_text?: string
parent: HtmlElement | null = null
constructor(
tag: string,
id: string | null,
classes: AttrValue | null,
attrs: Attrs,
children?: HtmlElement[]
)
constructor(tag: string, attrs?: Attrs, children?: HtmlElement[])
constructor(tag: "span", text: string)
constructor(tag: string, ...args: any[]) {
if (!tag || typeof tag != "string") throw new Error("tag must be non-empty string")
this.tag = tag
if (tag == "span" && args.length == 1 && typeof args[0] == "string") {
this._text = args[0]
this.children = []
} else if (args.length <= 2) {
// new HtmlElement(tag, attrs?, children?)
this.attrs = {}
this.children = []
if (args.length > 0 && !Array.isArray(args[0])) {
if (args[0] != null && typeof args[0] !== "object") throw new Error("attrs must be object")
this.attrs = { ...(args.shift() || {}) }
for (const k in this.attrs) {
if (typeof k != "string") throw new Error("attrs keys must be strings")
assertAttrValue(this.attrs[k], "attrs")
}
this.normalize()
}
if (args.length == 1) {
if (!Array.isArray(args[0])) throw new Error("children must be array")
if (!args[0].every(c => c instanceof HtmlElement))
throw new Error("children must be array of HtmlElements")
this.children = args[0]
}
} else {
// new HtmlElement(tag, id, classes, attrs, children?)
if (args[2] == null || typeof args[2] !== "object") throw new Error("attrs must be object")
this.attrs = { ...args[2] }
for (const k in this.attrs) {
if (typeof k != "string") throw new Error("attrs keys must be strings")
assertAttrValue(this.attrs[k], "attrs")
}
// handle id and classes
if (args[0]) this.attrs["id"] = args[0]
if (args[1]) this.attrs["class"] = args[1]
this.normalize()
// handle children
if (args.length == 4 && args[3] != null) {
if (!Array.isArray(args[3])) throw new Error("children must be array")
if (!args[3].every(c => c instanceof HtmlElement))
throw new Error("children must be array of HtmlElements")
this.children = args[3]
} else {
this.children = []
}
}
this.setParent()
}
toJSON(): any {
const ret: any = {
tag: this.tag,
attrs: this.attrs,
children: this.children.map(c => {
if (c instanceof HtmlElement) return c.toJSON()
else console.log("$$$$$", c)
}),
}
if ("_text" in this) ret.text = this._text
return ret
}
// clone an HtmlElement tree
clone(): HtmlElement {
// deep clone of attrs
const attrs = { ...this.attrs }
for (const k in attrs) {
const v = attrs[k]
if (Array.isArray(v)) attrs[k] = [...v]
}
const children = this.children.map(c => c.clone())
return new HtmlElement(this.tag, attrs, children)
}
// normalize id and class attributes
normalize(): void {
if ("id" in this.attrs) {
var id = this.attrs.id
if (id == null || id == "") delete this.attrs.id
else if (typeof id != "string") throw new Error("id must be string")
}
if ("class" in this.attrs) {
const c = this.attrs.class as any // we don't know what it really is
if (c == null) delete this.attrs.class
else if (typeof c === "string") this.attrs.class = c.split(" ")
else if (!Array.isArray(c)) throw new Error("class attr must be string or array of strings")
else if (!c.every((s: any) => typeof s == "string"))
throw new Error("class attr must be string or array of strings")
if ("class" in this.attrs) this.attrs.class = (c as string[]).filter(s => s != "")
}
}
// set the parent property on all children
setParent(): void {
this.children.forEach(c => {
c.parent = this
})
}
get id(): string {
return (this.attrs.id as string | undefined) || ""
}
set id(id: string) {
this.attrs.id = id
this.normalize()
}
// return an array of all HtmlElements that match a selector (x:string) or that match the
// a predicate (x: HtmlElement=>boolean)
select(x: string): HtmlArray
select(predicate: (el: HtmlElement) => boolean): HtmlArray
select(x: any): HtmlArray {
if (typeof x === "string") return new HtmlArray(...this.find_by_selector(x))
if (typeof x === "function") return new HtmlArray(...this.find_by_predicate(x))
return new HtmlArray()
}
// find all nodes in the subtree that match the predicate
find_by_predicate(predicate: (el: HtmlElement) => boolean): HtmlElement[] {
let res: HtmlElement[] = []
if (predicate(this)) res = [this]
if (this.children)
return res.concat(this.children.flatMap(child => child.find_by_predicate(predicate)))
return res
}
// return the list of subtree elements that match a selector
find_by_selector(selector: string): HtmlElement[] {
const parts = selector.split(" ")
if (parts.length == 1) return this.find_by_predicate(matcher(parts[0]))
const [first, ...rest] = parts
const firsts = this.find_by_predicate(matcher(first))
return firsts.flatMap(el => el.find_by_selector(rest.join(" ")))
}
// check whether this element matches a selector
matches(selector: string): boolean {
return matcher(selector)(this)
}
// operations on classes
addClass(...classes: string[]): this {
if (!("class" in this.attrs)) this.attrs.class = []
for (const c of classes) if (!this.attrs.class!.includes(c)) this.attrs.class!.push(c)
return this
}
hasClass(cls: string): boolean {
return "class" in this.attrs && this.attrs.class!.includes(cls)
}
removeClass(...classes: string[]): this {
if (!("class" in this.attrs)) return this
this.attrs.class = this.attrs.class!.filter(c => !classes.includes(c))
return this
}
// operations on attributes
attr(name: string): AttrValue | undefined
attr(name: string, value: string): this
attr(name: string, value?: string): AttrValue | undefined | this {
if (value === undefined) return this.attrs[name]
this.attrs[name] = value
return this
}
hasAttr(name: string): boolean {
return name in this.attrs
}
// inner content
text(text?: string): string | this {
if (text === undefined) {
if (this.tag == "span") return this._text || ""
return this.children.map(el => el.text()).join("")
}
if (this.tag == "span") this._text = text
else {
for (const c of this.children) c.parent = null
this.children = [new HtmlElement("span", text)]
this.setParent()
}
return this
}
html(html: string): this {
for (const c of this.children) c.parent = null
this.children = parse_html(html)
this.setParent()
return this
}
// append a child or children
append(...children: HtmlElement[]): this {
this.assertElArray(children)
this.children.push(...children)
this.setParent()
return this
}
// prepend a child or children
prepend(...children: HtmlElement[]): this {
this.assertElArray(children)
this.children.unshift(...children)
this.setParent()
return this
}
// remove all children
empty(): this {
for (const c of this.children) c.parent = null
this.children = []
return this
}
// replace this element with another
replaceWith(...elements: HtmlElement[]): this {
this.assertElArray(elements)
if (this.parent) {
const idx = this.parent.children.indexOf(this)
this.parent.children.splice(idx, 1, ...elements)
this.parent.setParent()
this.parent = null
}
return this
}
// remove this element from its parent
remove(): this {
if (this.parent) {
const idx = this.parent.children.indexOf(this)
this.parent.children.splice(idx, 1)
this.parent = null
}
return this
}
assertEl(el: any): void {
if (!(el instanceof HtmlElement)) throw new Error(`not an HtmlElement, got ${el}`)
}
assertElArray(els: any[]): void {
for (const el of els) {
if (!(el instanceof HtmlElement)) throw new Error(`not an HtmlElement, got ${el}`)
}
}
}
// produce a matcher to match HtmlElement tags, #ids, .classes, and [attributes=val]
// i.e. return a function that takes an HtmlElement and returns true if it matches the selector
function matcher(selector: string): (el: HtmlElement) => boolean {
function match(str: string, regexp: RegExp) {
const mm = str.match(regexp)
return mm ? mm[0] : ""
}
const tag = selector.match(/^[^#:.[ ]+/)
const id = match(selector, /#[^#.[ ]+/).substring(1)
const classes = (selector.match(/\.[^.#[ ]+/g) || []).map(c => c.substring(1))
// [foo] -> "foo" in attrs, [foo=] -> attrs["foo"]=="" [foo=bar] -> attrs["foo"]=="bar"
const attrs = selector.match(/\[[^\]=]+(=[^\]]*)?\]/g)?.map(a => a.slice(1, -1).split("="))
return (el: HtmlElement) => {
if (tag && tag[0] != el.tag) return false
if (id && id != el.attrs.id) return false
if (classes.length > 0 && !classes.every(c => el.attrs.class?.includes(c))) return false
if (attrs && !attrs.every(([k, v]) => (v === undefined ? k in el.attrs : el.attrs[k] == v)))
return false
return true
}
}
// convert a Node (HTMLElement or TextNode) to an HtmlElement (i.e. import from node-html-parser)
function convert(el: Node): HtmlElement | null {
if (el instanceof HTMLElement) {
const ret = new HtmlElement(
el.rawTagName,
el.id,
el.classList.value,
el.attributes,
el.childNodes.map(convert).filter(el => el !== null) as HtmlElement[]
)
ret.setParent()
return ret
}
if (el instanceof TextNode) return new HtmlElement("span", el.textContent)
return null
}