import { Parser } from 'htmlparser2'; import { DomHandler, Element, Text, Comment, ProcessingInstruction, Node as DomNode } from 'domhandler'; import { ElementType } from 'domelementtype'; // Define types for HTML DOM nodes type Node = Element | Text | Comment | ProcessingInstruction; // Function to normalize HTML by parsing and cleaning it function normalizeHTML(html: string): string { const handler = new DomHandler(); const parser = new Parser(handler); parser.write(html); parser.end(); return cleanHTML(handler.dom); } // Function to recursively clean and reconstruct HTML from nodes function cleanHTML(dom: DomNode[]): string { if (!Array.isArray(dom)) { throw new TypeError("Expected 'dom' to be an array"); } // Construct HTML string from the node array return dom.map(node => { if (isElement(node)) { // Sort attributes by name node.attribs = Object.fromEntries( Object.entries(node.attribs).sort(([a], [b]) => a.localeCompare(b)) ); // Recursively clean children const childrenHTML = cleanHTML(node.children as DomNode[]); // Return reconstructed HTML for this element return `<${node.name}${formatAttributes(node.attribs)}>${childrenHTML}`; } else if (isText(node)) { return node.data; } else if (isComment(node)) { // Handle comments if necessary (usually ignored in output) return ''; } else if (isProcessingInstruction(node)) { // Handle processing instructions if necessary (usually ignored in output) return ''; } return ''; }).join('').replace(/\s+/g, ' ').trim(); } // Helper function to format attributes function formatAttributes(attribs: { [key: string]: string }): string { const attributes = Object.entries(attribs) .map(([key, value]) => `${key}="${value}"`) .join(' '); return attributes ? ' ' + attributes : ''; } // Type guards function isElement(node: DomNode): node is Element { return node.type === ElementType.Tag; } function isText(node: DomNode): node is Text { return node.type === ElementType.Text; } function isComment(node: DomNode): node is Comment { return node.type === ElementType.Comment; } function isProcessingInstruction(node: DomNode): node is ProcessingInstruction { return node.type === ElementType.Directive; } export { normalizeHTML };