/**
* DomParser.js
*
* Released under LGPL License.
* Copyright (c) 1999-2017 Ephox Corp. All rights reserved
*
* License: http://www.tinymce.com/license
* Contributing: http://www.tinymce.com/contributing
*/
import LegacyFilter from '../../html/LegacyFilter';
import * as ParserFilters from '../../html/ParserFilters';
import { paddEmptyNode, isPaddedWithNbsp, hasOnlyChild, isEmpty, isLineBreakNode } from '../../html/ParserUtils';
import Node from './Node';
import SaxParser from './SaxParser';
import Schema from './Schema';
import Tools from '../util/Tools';
export type ParserArgs = any;
export type ParserFilterCallback = (nodes: Node[], name: string, args: ParserArgs) => void;
export interface ParserFilter {
name: string;
callbacks: ParserFilterCallback[];
}
/**
* This class parses HTML code into a DOM like structure of nodes it will remove redundant whitespace and make
* sure that the node tree is valid according to the specified schema.
* So for example:
a
b
c will become a
b
c
*
* @example
* var parser = new tinymce.html.DomParser({validate: true}, schema);
* var rootNode = parser.parse('content
');
*
* @class tinymce.html.DomParser
* @version 3.4
*/
const makeMap = Tools.makeMap, each = Tools.each, explode = Tools.explode, extend = Tools.extend;
export default function (settings?, schema = Schema()) {
const nodeFilters = {};
const attributeFilters = [];
let matchedNodes = {};
let matchedAttributes = {};
settings = settings || {};
settings.validate = 'validate' in settings ? settings.validate : true;
settings.root_name = settings.root_name || 'body';
const fixInvalidChildren = function (nodes) {
let ni, node, parent, parents, newParent, currentNode, tempNode, childNode, i;
let nonEmptyElements, whitespaceElements, nonSplitableElements, textBlockElements, specialElements, sibling, nextNode;
nonSplitableElements = makeMap('tr,td,th,tbody,thead,tfoot,table');
nonEmptyElements = schema.getNonEmptyElements();
whitespaceElements = schema.getWhiteSpaceElements();
textBlockElements = schema.getTextBlockElements();
specialElements = schema.getSpecialElements();
for (ni = 0; ni < nodes.length; ni++) {
node = nodes[ni];
// Already removed or fixed
if (!node.parent || node.fixed) {
continue;
}
// If the invalid element is a text block and the text block is within a parent LI element
// Then unwrap the first text block and convert other sibling text blocks to LI elements similar to Word/Open Office
if (textBlockElements[node.name] && node.parent.name === 'li') {
// Move sibling text blocks after LI element
sibling = node.next;
while (sibling) {
if (textBlockElements[sibling.name]) {
sibling.name = 'li';
sibling.fixed = true;
node.parent.insert(sibling, node.parent);
} else {
break;
}
sibling = sibling.next;
}
// Unwrap current text block
node.unwrap(node);
continue;
}
// Get list of all parent nodes until we find a valid parent to stick the child into
parents = [node];
for (parent = node.parent; parent && !schema.isValidChild(parent.name, node.name) &&
!nonSplitableElements[parent.name]; parent = parent.parent) {
parents.push(parent);
}
// Found a suitable parent
if (parent && parents.length > 1) {
// Reverse the array since it makes looping easier
parents.reverse();
// Clone the related parent and insert that after the moved node
newParent = currentNode = filterNode(parents[0].clone());
// Start cloning and moving children on the left side of the target node
for (i = 0; i < parents.length - 1; i++) {
if (schema.isValidChild(currentNode.name, parents[i].name)) {
tempNode = filterNode(parents[i].clone());
currentNode.append(tempNode);
} else {
tempNode = currentNode;
}
for (childNode = parents[i].firstChild; childNode && childNode !== parents[i + 1];) {
nextNode = childNode.next;
tempNode.append(childNode);
childNode = nextNode;
}
currentNode = tempNode;
}
if (!isEmpty(schema, nonEmptyElements, whitespaceElements, newParent)) {
parent.insert(newParent, parents[0], true);
parent.insert(node, newParent);
} else {
parent.insert(node, parents[0], true);
}
// Check if the element is empty by looking through it's contents and special treatment for
parent = parents[0];
if (isEmpty(schema, nonEmptyElements, whitespaceElements, parent) || hasOnlyChild(parent, 'br')) {
parent.empty().remove();
}
} else if (node.parent) {
// If it's an LI try to find a UL/OL for it or wrap it
if (node.name === 'li') {
sibling = node.prev;
if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
sibling.append(node);
continue;
}
sibling = node.next;
if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
sibling.insert(node, sibling.firstChild, true);
continue;
}
node.wrap(filterNode(new Node('ul', 1)));
continue;
}
// Try wrapping the element in a DIV
if (schema.isValidChild(node.parent.name, 'div') && schema.isValidChild('div', node.name)) {
node.wrap(filterNode(new Node('div', 1)));
} else {
// We failed wrapping it, then remove or unwrap it
if (specialElements[node.name]) {
node.empty().remove();
} else {
node.unwrap();
}
}
}
}
};
/**
* Runs the specified node though the element and attributes filters.
*
* @method filterNode
* @param {tinymce.html.Node} Node the node to run filters on.
* @return {tinymce.html.Node} The passed in node.
*/
const filterNode = (node: Node): Node => {
let i, name, list;
// Run element filters
if (name in nodeFilters) {
list = matchedNodes[name];
if (list) {
list.push(node);
} else {
matchedNodes[name] = [node];
}
}
// Run attribute filters
i = attributeFilters.length;
while (i--) {
name = attributeFilters[i].name;
if (name in node.attributes.map) {
list = matchedAttributes[name];
if (list) {
list.push(node);
} else {
matchedAttributes[name] = [node];
}
}
}
return node;
};
/**
* Adds a node filter function to the parser, the parser will collect the specified nodes by name
* and then execute the callback ones it has finished parsing the document.
*
* @example
* parser.addNodeFilter('p,h1', function(nodes, name) {
* for (var i = 0; i < nodes.length; i++) {
* console.log(nodes[i].name);
* }
* });
* @method addNodeFilter
* @method {String} name Comma separated list of nodes to collect.
* @param {function} callback Callback function to execute once it has collected nodes.
*/
const addNodeFilter = (name: string, callback: (nodes: Node[], name: string, args: ParserArgs) => void) => {
each(explode(name), function (name) {
let list = nodeFilters[name];
if (!list) {
nodeFilters[name] = list = [];
}
list.push(callback);
});
};
const getNodeFilters = (): ParserFilter[] => {
const out = [];
for (const name in nodeFilters) {
if (nodeFilters.hasOwnProperty(name)) {
out.push({ name, callbacks: nodeFilters[name] });
}
}
return out;
};
/**
* Adds a attribute filter function to the parser, the parser will collect nodes that has the specified attributes
* and then execute the callback ones it has finished parsing the document.
*
* @example
* parser.addAttributeFilter('src,href', function(nodes, name) {
* for (var i = 0; i < nodes.length; i++) {
* console.log(nodes[i].name);
* }
* });
* @method addAttributeFilter
* @method {String} name Comma separated list of nodes to collect.
* @param {function} callback Callback function to execute once it has collected nodes.
*/
const addAttributeFilter = (name: string, callback: (nodes: Node[], name: string, args: ParserArgs) => void) => {
each(explode(name), function (name) {
let i;
for (i = 0; i < attributeFilters.length; i++) {
if (attributeFilters[i].name === name) {
attributeFilters[i].callbacks.push(callback);
return;
}
}
attributeFilters.push({ name, callbacks: [callback] });
});
};
const getAttributeFilters = (): ParserFilter[] => [].concat(attributeFilters);
/**
* Parses the specified HTML string into a DOM like node tree and returns the result.
*
* @example
* var rootNode = new DomParser({...}).parse('text');
* @method parse
* @param {String} html Html string to sax parse.
* @param {Object} args Optional args object that gets passed to all filter functions.
* @return {tinymce.html.Node} Root node containing the tree.
*/
const parse = (html: string, args?: ParserArgs): Node => {
let parser, nodes, i, l, fi, fl, list, name;
let blockElements;
const invalidChildren = [];
let isInWhiteSpacePreservedElement;
let node: Node;
args = args || {};
matchedNodes = {};
matchedAttributes = {};
blockElements = extend(makeMap('script,style,head,html,body,title,meta,param'), schema.getBlockElements());
const nonEmptyElements = schema.getNonEmptyElements();
const children = schema.children;
const validate = settings.validate;
const rootBlockName = 'forced_root_block' in args ? args.forced_root_block : settings.forced_root_block;
const whiteSpaceElements = schema.getWhiteSpaceElements();
const startWhiteSpaceRegExp = /^[ \t\r\n]+/;
const endWhiteSpaceRegExp = /[ \t\r\n]+$/;
const allWhiteSpaceRegExp = /[ \t\r\n]+/g;
const isAllWhiteSpaceRegExp = /^[ \t\r\n]+$/;
const addRootBlocks = function () {
let node = rootNode.firstChild, next, rootBlockNode;
// Removes whitespace at beginning and end of block so:
// x
-> x
const trim = function (rootBlockNode) {
if (rootBlockNode) {
node = rootBlockNode.firstChild;
if (node && node.type === 3) {
node.value = node.value.replace(startWhiteSpaceRegExp, '');
}
node = rootBlockNode.lastChild;
if (node && node.type === 3) {
node.value = node.value.replace(endWhiteSpaceRegExp, '');
}
}
};
// Check if rootBlock is valid within rootNode for example if P is valid in H1 if H1 is the contentEditabe root
if (!schema.isValidChild(rootNode.name, rootBlockName.toLowerCase())) {
return;
}
while (node) {
next = node.next;
if (node.type === 3 || (node.type === 1 && node.name !== 'p' &&
!blockElements[node.name] && !node.attr('data-mce-type'))) {
if (!rootBlockNode) {
// Create a new root block element
rootBlockNode = createNode(rootBlockName, 1);
rootBlockNode.attr(settings.forced_root_block_attrs);
rootNode.insert(rootBlockNode, node);
rootBlockNode.append(node);
} else {
rootBlockNode.append(node);
}
} else {
trim(rootBlockNode);
rootBlockNode = null;
}
node = next;
}
trim(rootBlockNode);
};
const createNode = function (name, type) {
const node = new Node(name, type);
let list;
if (name in nodeFilters) {
list = matchedNodes[name];
if (list) {
list.push(node);
} else {
matchedNodes[name] = [node];
}
}
return node;
};
const removeWhitespaceBefore = function (node) {
let textNode, textNodeNext, textVal, sibling;
const blockElements = schema.getBlockElements();
for (textNode = node.prev; textNode && textNode.type === 3;) {
textVal = textNode.value.replace(endWhiteSpaceRegExp, '');
// Found a text node with non whitespace then trim that and break
if (textVal.length > 0) {
textNode.value = textVal;
return;
}
textNodeNext = textNode.next;
// Fix for bug #7543 where bogus nodes would produce empty
// text nodes and these would be removed if a nested list was before it
if (textNodeNext) {
if (textNodeNext.type === 3 && textNodeNext.value.length) {
textNode = textNode.prev;
continue;
}
if (!blockElements[textNodeNext.name] && textNodeNext.name !== 'script' && textNodeNext.name !== 'style') {
textNode = textNode.prev;
continue;
}
}
sibling = textNode.prev;
textNode.remove();
textNode = sibling;
}
};
const cloneAndExcludeBlocks = function (input) {
let name;
const output = {};
for (name in input) {
if (name !== 'li' && name !== 'p') {
output[name] = input[name];
}
}
return output;
};
parser = SaxParser({
validate,
allow_script_urls: settings.allow_script_urls,
allow_conditional_comments: settings.allow_conditional_comments,
// Exclude P and LI from DOM parsing since it's treated better by the DOM parser
self_closing_elements: cloneAndExcludeBlocks(schema.getSelfClosingElements()),
cdata (text) {
node.append(createNode('#cdata', 4)).value = text;
},
text (text, raw) {
let textNode;
// Trim all redundant whitespace on non white space elements
if (!isInWhiteSpacePreservedElement) {
text = text.replace(allWhiteSpaceRegExp, ' ');
if (isLineBreakNode(node.lastChild, blockElements)) {
text = text.replace(startWhiteSpaceRegExp, '');
}
}
// Do we need to create the node
if (text.length !== 0) {
textNode = createNode('#text', 3);
textNode.raw = !!raw;
node.append(textNode).value = text;
}
},
comment (text) {
node.append(createNode('#comment', 8)).value = text;
},
pi (name, text) {
node.append(createNode(name, 7)).value = text;
removeWhitespaceBefore(node);
},
doctype (text) {
let newNode;
newNode = node.append(createNode('#doctype', 10));
newNode.value = text;
removeWhitespaceBefore(node);
},
start (name, attrs, empty) {
let newNode, attrFiltersLen, elementRule, attrName, parent;
elementRule = validate ? schema.getElementRule(name) : {};
if (elementRule) {
newNode = createNode(elementRule.outputName || name, 1);
newNode.attributes = attrs;
newNode.shortEnded = empty;
node.append(newNode);
// Check if node is valid child of the parent node is the child is
// unknown we don't collect it since it's probably a custom element
parent = children[node.name];
if (parent && children[newNode.name] && !parent[newNode.name]) {
invalidChildren.push(newNode);
}
attrFiltersLen = attributeFilters.length;
while (attrFiltersLen--) {
attrName = attributeFilters[attrFiltersLen].name;
if (attrName in attrs.map) {
list = matchedAttributes[attrName];
if (list) {
list.push(newNode);
} else {
matchedAttributes[attrName] = [newNode];
}
}
}
// Trim whitespace before block
if (blockElements[name]) {
removeWhitespaceBefore(newNode);
}
// Change current node if the element wasn't empty i.e not
or
if (!empty) {
node = newNode;
}
// Check if we are inside a whitespace preserved element
if (!isInWhiteSpacePreservedElement && whiteSpaceElements[name]) {
isInWhiteSpacePreservedElement = true;
}
}
},
end (name) {
let textNode, elementRule, text, sibling, tempNode;
elementRule = validate ? schema.getElementRule(name) : {};
if (elementRule) {
if (blockElements[name]) {
if (!isInWhiteSpacePreservedElement) {
// Trim whitespace of the first node in a block
textNode = node.firstChild;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(startWhiteSpaceRegExp, '');
// Any characters left after trim or should we remove it
if (text.length > 0) {
textNode.value = text;
textNode = textNode.next;
} else {
sibling = textNode.next;
textNode.remove();
textNode = sibling;
// Remove any pure whitespace siblings
while (textNode && textNode.type === 3) {
text = textNode.value;
sibling = textNode.next;
if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) {
textNode.remove();
textNode = sibling;
}
textNode = sibling;
}
}
}
// Trim whitespace of the last node in a block
textNode = node.lastChild;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(endWhiteSpaceRegExp, '');
// Any characters left after trim or should we remove it
if (text.length > 0) {
textNode.value = text;
textNode = textNode.prev;
} else {
sibling = textNode.prev;
textNode.remove();
textNode = sibling;
// Remove any pure whitespace siblings
while (textNode && textNode.type === 3) {
text = textNode.value;
sibling = textNode.prev;
if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) {
textNode.remove();
textNode = sibling;
}
textNode = sibling;
}
}
}
}
// Trim start white space
// Removed due to: #5424
/*textNode = node.prev;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(startWhiteSpaceRegExp, '');
if (text.length > 0)
textNode.value = text;
else
textNode.remove();
}*/
}
// Check if we exited a whitespace preserved element
if (isInWhiteSpacePreservedElement && whiteSpaceElements[name]) {
isInWhiteSpacePreservedElement = false;
}
if (elementRule.removeEmpty && isEmpty(schema, nonEmptyElements, whiteSpaceElements, node)) {
// Leave nodes that have a name like
if (!node.attributes.map.name && !node.attr('id')) {
tempNode = node.parent;
if (blockElements[node.name]) {
node.empty().remove();
} else {
node.unwrap();
}
node = tempNode;
return;
}
}
if (elementRule.paddEmpty && (isPaddedWithNbsp(node) || isEmpty(schema, nonEmptyElements, whiteSpaceElements, node))) {
paddEmptyNode(settings, args, blockElements, node);
}
node = node.parent;
}
}
}, schema);
const rootNode = node = new Node(args.context || settings.root_name, 11);
parser.parse(html);
// Fix invalid children or report invalid children in a contextual parsing
if (validate && invalidChildren.length) {
if (!args.context) {
fixInvalidChildren(invalidChildren);
} else {
args.invalid = true;
}
}
// Wrap nodes in the root into block elements if the root is body
if (rootBlockName && (rootNode.name === 'body' || args.isRootContent)) {
addRootBlocks();
}
// Run filters only when the contents is valid
if (!args.invalid) {
// Run node filters
for (name in matchedNodes) {
list = nodeFilters[name];
nodes = matchedNodes[name];
// Remove already removed children
fi = nodes.length;
while (fi--) {
if (!nodes[fi].parent) {
nodes.splice(fi, 1);
}
}
for (i = 0, l = list.length; i < l; i++) {
list[i](nodes, name, args);
}
}
// Run attribute filters
for (i = 0, l = attributeFilters.length; i < l; i++) {
list = attributeFilters[i];
if (list.name in matchedAttributes) {
nodes = matchedAttributes[list.name];
// Remove already removed children
fi = nodes.length;
while (fi--) {
if (!nodes[fi].parent) {
nodes.splice(fi, 1);
}
}
for (fi = 0, fl = list.callbacks.length; fi < fl; fi++) {
list.callbacks[fi](nodes, list.name, args);
}
}
}
}
return rootNode;
};
const exports = {
schema,
addAttributeFilter,
getAttributeFilters,
addNodeFilter,
getNodeFilters,
filterNode,
parse
};
ParserFilters.register(exports, settings);
LegacyFilter.register(exports, settings);
return exports;
}