import { Tag, TagPlacement } from "src/compilation/tag";
import { xml, XmlGeneralNode, XmlNode, XmlNodeType, XmlTextNode } from "src/xml";
import { OmlAttribute, OmlNode } from "./omlNode";
//
// Wordprocessing Markup Language (WML) intro:
//
// In Word text nodes are contained in "run" nodes (which specifies text
// properties such as font and color). The "run" nodes in turn are
// contained in paragraph nodes which is the core unit of content.
//
// Example:
//
// <-- paragraph
// <-- run
// <-- run properties
// <-- bold
//
// This is text. <-- actual text
//
//
//
// - For an easy introduction, see: http://officeopenxml.com/WPcontentOverview.php
// - For the complete specification, see: https://ecma-international.org/publications-and-standards/standards/ecma-376/
//
/**
* Office Markup Language (OML) utilities.
*
* Office Markup Language is my generic term for the markup languages that are
* used in Office Open XML documents. Including but not limited to
* Wordprocessing Markup Language, Drawing Markup Language and Spreadsheet
* Markup Language.
*/
export class OfficeMarkup {
/**
* Office Markup query utilities.
*/
public readonly query = new Query();
/**
* Office Markup modify utilities.
*/
public readonly modify = new Modify();
}
/**
* Wordprocessing Markup Language (WML) query utilities.
*/
class Query {
public isTextNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.W.Text || node.nodeName === OmlNode.A.Text;
}
public isRunNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.W.Run || node.nodeName === OmlNode.A.Run;
}
public isRunPropertiesNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.W.RunProperties || node.nodeName === OmlNode.A.RunProperties;
}
public isTableNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.W.Table;
}
public isTableCellNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.W.TableCell;
}
public isParagraphNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.W.Paragraph || node.nodeName === OmlNode.A.Paragraph;
}
public isParagraphPropertiesNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.W.ParagraphProperties || node.nodeName === OmlNode.A.ParagraphProperties;
}
public isListParagraph(paragraphNode: XmlNode): boolean {
const paragraphProperties = officeMarkup.query.findParagraphPropertiesNode(paragraphNode);
const listNumberProperties = xml.query.findByPath(paragraphProperties, XmlNodeType.General, OmlNode.W.NumberProperties);
return !!listNumberProperties;
}
public isInlineDrawingNode(node: XmlNode): boolean {
return node.nodeName === OmlNode.Wp.Inline && node.parentNode?.nodeName === OmlNode.W.Drawing;
}
public findParagraphPropertiesNode(paragraphNode: XmlNode): XmlNode {
if (!officeMarkup.query.isParagraphNode(paragraphNode))
throw new Error(`Expected paragraph node but received a '${paragraphNode.nodeName}' node.`);
return xml.query.findChild(paragraphNode, officeMarkup.query.isParagraphPropertiesNode);
}
/**
* Search for the first direct child **Word** text node (i.e. a node).
*/
public firstTextNodeChild(node: XmlNode): XmlNode {
if (!node)
return null;
if (!officeMarkup.query.isRunNode(node))
return null;
if (!node.childNodes)
return null;
for (const child of node.childNodes) {
if (officeMarkup.query.isTextNode(child))
return child;
}
return null;
}
/**
* Search **upwards** for the first **Office** text node (i.e. a or node).
*/
public containingTextNode(node: XmlTextNode): XmlGeneralNode {
if (!node)
return null;
if (!xml.query.isTextNode(node))
throw new Error(`'Invalid argument node. Expected a XmlTextNode.`);
return xml.query.findParent(node, officeMarkup.query.isTextNode);
}
/**
* Search **upwards** for the first run node.
*/
public containingRunNode(node: XmlNode): XmlGeneralNode {
return xml.query.findParent(node, officeMarkup.query.isRunNode);
}
/**
* Search **upwards** for the first paragraph node.
*/
public containingParagraphNode(node: XmlNode): XmlGeneralNode {
return xml.query.findParent(node, officeMarkup.query.isParagraphNode);
}
/**
* Search **upwards** for the first "table row" node.
*/
public containingTableRowNode(node: XmlNode): XmlGeneralNode {
return xml.query.findParentByName(node, OmlNode.W.TableRow);
}
/**
* Search **upwards** for the first "table cell" node.
*/
public containingTableCellNode(node: XmlNode): XmlGeneralNode {
return xml.query.findParent(node, officeMarkup.query.isTableCellNode);
}
/**
* Search **upwards** for the first "table" node.
*/
public containingTableNode(node: XmlNode): XmlGeneralNode {
return xml.query.findParentByName(node, OmlNode.W.Table);
}
/**
* Search **upwards** for the first `w:sdtContent` node.
*/
public containingStructuredTagContentNode(node: XmlNode): XmlGeneralNode {
return xml.query.findParentByName(node, OmlNode.W.StructuredTagContent);
}
//
// Advanced queries
//
public isEmptyTextNode(node: XmlNode): boolean {
if (!officeMarkup.query.isTextNode(node))
throw new Error(`Text node expected but '${node.nodeName}' received.`);
if (!node.childNodes?.length)
return true;
const xmlTextNode = node.childNodes[0];
if (!xml.query.isTextNode(xmlTextNode))
throw new Error("Invalid XML structure. 'w:t' node should contain a single text node only.");
if (!xmlTextNode.textContent)
return true;
return false;
}
public isEmptyRun(node: XmlNode): boolean {
if (!officeMarkup.query.isRunNode(node))
throw new Error(`Run node expected but '${node.nodeName}' received.`);
for (const child of (node.childNodes ?? [])) {
if (officeMarkup.query.isRunPropertiesNode(child))
continue;
if (officeMarkup.query.isTextNode(child) && officeMarkup.query.isEmptyTextNode(child))
continue;
return false;
}
return true;
}
}
/**
* Office Markup Language (OML) modify utilities.
*/
class Modify {
/**
* Split the text node into two text nodes, each with it's own wrapping node.
* Returns the newly created text node.
*
* @param textNode
* @param splitIndex
* @param addBefore Should the new node be added before or after the original node.
*/
public splitTextNode(textNode: XmlTextNode, splitIndex: number, addBefore: boolean): XmlTextNode {
let firstXmlTextNode: XmlTextNode;
let secondXmlTextNode: XmlTextNode;
// Split nodes
const wordTextNode = officeMarkup.query.containingTextNode(textNode);
const newWordTextNode = xml.create.cloneNode(wordTextNode, true);
// Set space preserve to prevent display differences after splitting
// (otherwise if there was a space in the middle of the text node and it
// is now at the beginning or end of the text node it will be ignored)
officeMarkup.modify.setSpacePreserveAttribute(wordTextNode);
officeMarkup.modify.setSpacePreserveAttribute(newWordTextNode);
if (addBefore) {
// Insert new node before existing one
xml.modify.insertBefore(newWordTextNode, wordTextNode);
firstXmlTextNode = xml.query.lastTextChild(newWordTextNode);
secondXmlTextNode = textNode;
} else {
// Insert new node after existing one
const curIndex = wordTextNode.parentNode.childNodes.indexOf(wordTextNode);
xml.modify.insertChild(wordTextNode.parentNode, newWordTextNode, curIndex + 1);
firstXmlTextNode = textNode;
secondXmlTextNode = xml.query.lastTextChild(newWordTextNode);
}
// Edit text
const firstText = firstXmlTextNode.textContent;
const secondText = secondXmlTextNode.textContent;
firstXmlTextNode.textContent = firstText.substring(0, splitIndex);
secondXmlTextNode.textContent = secondText.substring(splitIndex);
return (addBefore ? firstXmlTextNode : secondXmlTextNode);
}
/**
* Split the paragraph around the specified text node.
*
* @returns Two paragraphs - `left` and `right`. If the `removeTextNode` argument is
* `false` then the original text node is the first text node of `right`.
*/
public splitParagraphByTextNode(paragraph: XmlNode, textNode: XmlTextNode, removeTextNode: boolean): [XmlNode, XmlNode] {
// Input validation
const containingParagraph = officeMarkup.query.containingParagraphNode(textNode);
if (containingParagraph != paragraph)
throw new Error(`Node 'textNode' is not contained in the specified paragraph.`);
const runNode = officeMarkup.query.containingRunNode(textNode);
const wordTextNode = officeMarkup.query.containingTextNode(textNode);
// 1. Split the run
// Create run clone (left) and keep the original run (right).
const leftRun = xml.create.cloneNode(runNode, false);
const rightRun = runNode;
xml.modify.insertBefore(leftRun, rightRun);
// Copy props from original run node (preserve style)
const runProps = rightRun.childNodes.find(node => officeMarkup.query.isRunPropertiesNode(node));
if (runProps) {
const leftRunProps = xml.create.cloneNode(runProps, true);
xml.modify.appendChild(leftRun, leftRunProps);
}
// Move all text nodes up to the specified text node, to the new run.
const firstRunChildIndex = (runProps ? 1 : 0);
let curChild = rightRun.childNodes[firstRunChildIndex];
while (curChild != wordTextNode) {
xml.modify.remove(curChild);
xml.modify.appendChild(leftRun, curChild);
curChild = rightRun.childNodes[firstRunChildIndex];
}
// Remove text node
if (removeTextNode) {
xml.modify.removeChild(rightRun, firstRunChildIndex);
}
// 2. Split the paragraph
// Create paragraph clone (left) and keep the original paragraph (right).
const leftPara = xml.create.cloneNode(containingParagraph, false);
const rightPara = containingParagraph;
xml.modify.insertBefore(leftPara, rightPara);
// Copy props from original paragraph (preserve style)
const paragraphProps = rightPara.childNodes.find(node => officeMarkup.query.isParagraphPropertiesNode(node));
if (paragraphProps) {
const leftParagraphProps = xml.create.cloneNode(paragraphProps, true);
xml.modify.appendChild(leftPara, leftParagraphProps);
}
// Move all run nodes up to the original run (right), to the new paragraph (left).
const firstParaChildIndex = (paragraphProps ? 1 : 0);
curChild = rightPara.childNodes[firstParaChildIndex];
while (curChild != rightRun) {
xml.modify.remove(curChild);
xml.modify.appendChild(leftPara, curChild);
curChild = rightPara.childNodes[firstParaChildIndex];
}
// Clean paragraphs - remove empty runs
if (officeMarkup.query.isEmptyRun(leftRun))
xml.modify.remove(leftRun);
if (officeMarkup.query.isEmptyRun(rightRun))
xml.modify.remove(rightRun);
return [leftPara, rightPara];
}
/**
* Move all text between the 'from' and 'to' nodes to the 'from' node.
*/
public joinTextNodesRange(from: XmlTextNode, to: XmlTextNode): void {
// Find run nodes
const firstRunNode = officeMarkup.query.containingRunNode(from);
const secondRunNode = officeMarkup.query.containingRunNode(to);
const paragraphNode = firstRunNode.parentNode;
if (secondRunNode.parentNode !== paragraphNode)
throw new Error('Can not join text nodes from separate paragraphs.');
// Find "word text nodes"
const firstWordTextNode = officeMarkup.query.containingTextNode(from);
const secondWordTextNode = officeMarkup.query.containingTextNode(to);
const totalText: string[] = [];
// Iterate runs
let curRunNode: XmlNode = firstRunNode;
while (curRunNode) {
// Iterate text nodes
let curWordTextNode: XmlNode;
if (curRunNode === firstRunNode) {
curWordTextNode = firstWordTextNode;
} else {
curWordTextNode = officeMarkup.query.firstTextNodeChild(curRunNode);
}
while (curWordTextNode) {
if (!officeMarkup.query.isTextNode(curWordTextNode)) {
curWordTextNode = curWordTextNode.nextSibling;
continue;
}
// Move text to first node
const curXmlTextNode = xml.query.lastTextChild(curWordTextNode);
totalText.push(curXmlTextNode.textContent);
// Next text node
const textToRemove = curWordTextNode;
if (curWordTextNode === secondWordTextNode) {
curWordTextNode = null;
} else {
curWordTextNode = curWordTextNode.nextSibling;
}
// Remove current text node
if (textToRemove !== firstWordTextNode) {
xml.modify.remove(textToRemove);
}
}
// Next run
const runToRemove = curRunNode;
if (curRunNode === secondRunNode) {
curRunNode = null;
} else {
curRunNode = curRunNode.nextSibling;
}
// Remove current run
if (!runToRemove.childNodes || !runToRemove.childNodes.length) {
xml.modify.remove(runToRemove);
}
}
// Set the text content
const firstXmlTextNode = xml.query.lastTextChild(firstWordTextNode);
firstXmlTextNode.textContent = totalText.join('');
}
/**
* Take all runs from 'second' and move them to 'first'.
*/
public joinParagraphs(first: XmlNode, second: XmlNode): void {
if (first === second)
return;
let childIndex = 0;
while (second.childNodes && childIndex < second.childNodes.length) {
const curChild = second.childNodes[childIndex];
if (officeMarkup.query.isRunNode(curChild)) {
xml.modify.removeChild(second, childIndex);
xml.modify.appendChild(first, curChild);
} else {
childIndex++;
}
}
}
public setSpacePreserveAttribute(node: XmlGeneralNode): void {
if (!node.attributes) {
node.attributes = {};
}
if (!node.attributes[OmlAttribute.SpacePreserve]) {
node.attributes[OmlAttribute.SpacePreserve] = 'preserve';
}
}
public removeTag(tag: Tag): void {
if (tag.placement === TagPlacement.TextNode) {
const wordTextNode = officeMarkup.query.containingTextNode(tag.xmlTextNode);
const runNode = officeMarkup.query.containingRunNode(tag.xmlTextNode);
// Remove the word text node
xml.modify.remove(wordTextNode);
// Remove the run node if it's empty
if (officeMarkup.query.isEmptyRun(runNode)) {
xml.modify.remove(runNode);
}
return;
}
if (tag.placement === TagPlacement.Attribute) {
if (!tag.xmlNode.attributes || !(tag.attributeName in tag.xmlNode.attributes)) {
return;
}
// Remove the tag from the attribute value
tag.xmlNode.attributes[tag.attributeName] = tag.xmlNode.attributes[tag.attributeName].replace(tag.rawText, "");
// Remove the attribute if it's empty
if (tag.xmlNode.attributes[tag.attributeName] === "") {
delete tag.xmlNode.attributes[tag.attributeName];
}
return;
}
const anyTag = tag as any;
throw new Error(`Unexpected tag placement "${anyTag.placement}" for tag "${anyTag.rawText}".`);
}
}
/**
* Office Markup Language utilities.
*/
export const officeMarkup = new OfficeMarkup();