import { Aligner, AlignType, SeqNode } from 'sequence-align' /** * Generates a unique id to generate @xml:id attributes. */ const generateUid = (): number => { let firstPart = (Math.random() * 46656) | 0 let secondPart = (Math.random() * 46656) | 0 let first = ('000' + firstPart.toString(36)).slice(-3) let second = ('000' + secondPart.toString(36)).slice(-3) return firstPart + secondPart } /** * Compares two nodes excluding their xml:id attributes */ const isEqualNode = (el1: Element, el2: Element): boolean => { if (el1.attributes.length !== el2.attributes.length) return false for (let i=0; i isEqualNode(child, el2.children[index])) } const addXmlIdsTo = (selector: string, prefix: string, d: Document) => { for (const [index, el] of [...d.querySelectorAll(selector)].entries()) { el.setAttribute("xml:id", `${prefix}-${index}`) } } const findByXmlId = (doc: Document | Element, xmlId: string): Element | undefined => { const allElements = [...doc.querySelectorAll('*')] return allElements.find((element: Element) => element.getAttribute("xml:id") === xmlId) } const sourceNameOfDocument = (doc: Document): string | undefined => { const manifestation = doc.querySelector('manifestation') return manifestation?.getAttribute('xml:id') || undefined } /** * This function creates a critical apparatus given two different readings. * In case it is immediately followed or preceeded by an existing , it * tries to unify with it. * * @param document the target document * @param sourceName1 the source id of the first reading * @param reading1 the first reading as DOM element * @param sourceName2 the source id of the second reading * @param reading2 the second reading as DOM element. Can be undefined. * @returns */ const createCriticalApparatus = (document: Document, sourceName1: string, reading1: Element, sourceName2: string, reading2: Element | undefined) => { console.log('creating critical apparatus for', reading1.tagName, ' – different reading in', sourceName2) let app: Element let rdg1: Element | undefined let rdg2: Element | undefined if (!reading2 && reading1.closest('app')) { console.log('creating empty for sources', sourceName1, 'and', sourceName2) } // define the mode enum Mode { Append, Prepend, Create } let mode: Mode = Mode.Create if (reading1.nextElementSibling && reading1.nextElementSibling.nodeName === 'app') { app = reading1.nextElementSibling mode = Mode.Prepend } else if (reading1.previousElementSibling && reading1.previousElementSibling.nodeName === 'app') { app = reading1.previousElementSibling mode = Mode.Append } else { app = document.createElementNS('http://www.music-encoding.org/ns/mei', 'app') mode = Mode.Create } // find or create the elements where to append the new material if (mode === Mode.Append || mode === Mode.Prepend) { rdg1 = app.querySelector(`rdg[source*="${sourceName1}"]`) || undefined if (!rdg1) { rdg1 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg1.setAttribute('source', sourceName1) app.appendChild(rdg1) } rdg2 = app.querySelector(`rdg[source*="${sourceName2}"]`) || undefined if (!rdg2) { rdg2 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg2.setAttribute('source', sourceName2) app.appendChild(rdg2) } } else { rdg1 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg2 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg1.setAttribute('source', sourceName1) rdg2.setAttribute('source', sourceName2) app.appendChild(rdg1) app.appendChild(rdg2) } if (!rdg1 || !rdg2) { console.log('something went wrong') return } if (mode === Mode.Create) { reading1.parentNode?.insertBefore(app, reading1) } if (mode === Mode.Prepend) { rdg1.prepend(reading1) reading2 && rdg2.prepend(reading2) } else { rdg1.appendChild(reading1) reading2 && rdg2.appendChild(reading2) } } const findOrAppendRdg = (app: Element, sourceId: string, material: Element[]) => { if (!app) { console.log(' may only appear inside of ') return } const rdgs = Array.from(app.querySelectorAll('rdg') || []) let done = false rdgs.slice(1).forEach((rdg: Element) => { // Check the reading. We need to be strict here: everything, // including the sub-elements must be exactly equal. // TODO: this could be based on the score -- in case of a match // with score 1 we'd still have to proceed ... if (rdg.childElementCount !== material.length) return const equal: boolean = material.every((el, index) => isEqualNode(el, rdg.children[index])) if (equal) { rdg.setAttribute('source', `${rdg.getAttribute('source')} ${sourceId}`) done = true } }) if (!done) { const rdg = app.ownerDocument.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg.setAttribute('source', sourceId) material.forEach((el) => { rdg.appendChild(el) }) app.appendChild(rdg) } } const createCriticalApparatusAfter = (document: Document, sourceName1: string, position: Element, sourceName2: string, reading2: Element): Element | undefined => { //console.log('creating critical apparatus after', position.tagName, 'for different reading in', sourceName2) //console.log('deciding where to put the', reading2.tagName, 'with id', reading2.getAttribute('xml:id')) let app: Element let rdg1: Element | undefined let rdg2: Element | undefined enum Mode { Append, Prepend, Create } let mode: Mode = Mode.Create if (position.closest('rdg')) { position = position.closest('app')! } if (position.nextElementSibling && position.nextElementSibling.nodeName === 'app') { app = position.nextElementSibling mode = Mode.Prepend } else if (position.previousElementSibling && position.previousElementSibling.nodeName === 'app') { app = position.previousElementSibling mode = Mode.Append } else { app = document.createElementNS('http://www.music-encoding.org/ns/mei', 'app') mode = Mode.Create } // find or create the elements where to append the new material if (mode === Mode.Append || mode === Mode.Prepend) { rdg1 = app.querySelector(`rdg[source*="${sourceName1.split(' ')[0]}"]`) || undefined rdg2 = app.querySelector(`rdg[source*="${sourceName2}"]`) || undefined if (!rdg1) { console.log('... not found') rdg1 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg1.setAttribute('source', sourceName1) app.appendChild(rdg1) } if (!rdg2) { rdg2 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg2.setAttribute('source', sourceName2) app.appendChild(rdg2) } } else { rdg1 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg2 = document.createElementNS('http://www.music-encoding.org/ns/mei', 'rdg') rdg1.setAttribute('source', sourceName1) rdg2.setAttribute('source', sourceName2) app.appendChild(rdg1) app.appendChild(rdg2) } if (!rdg1 || !rdg2) { return } if (mode === Mode.Create) { position.parentNode?.insertBefore(app, position.nextSibling) } if (mode === Mode.Append || mode === Mode.Create) { rdg2.appendChild(reading2) } else if (mode === Mode.Prepend) { rdg2.prepend(reading2) } return rdg1 } type ApparatusGenerator = () => void type NodeGenerator = (e: Element) => number[] export type CollationRule = { [name: string]: [NodeGenerator, CollationRule] | NodeGenerator } /** * Collator for multiple (TEI-like) XML documents. */ export class XMLCollator { rule: CollationRule appGenerator: ApparatusGenerator documents: Document[] | undefined gapOpen: number = -3 gapExt: number = -0.8 accumulativeTags: string[] constructor(rule: CollationRule, appGenerator: ApparatusGenerator) { this.accumulativeTags = [] this.rule = rule this.appGenerator = appGenerator } /** * Sets the penalty for opening a new gap in the alignment * (rather than aligning it to a non-equal value) * @param gapOpen negative number */ setGapOpen(gapOpen: number) { if (gapOpen >= 0) { throw new Error("gap open must be a negative number") } this.gapOpen = gapOpen } /** * Sets the penalty for extending an existing gap in the alignment * process. * @param gapExt negative number */ setGapExt(gapExt: number) { if (gapExt >= 0) { throw new Error("gap open must be a negative number") } this.gapExt = gapExt } /** * * @returns the result of alignment as a DOM object */ dom(): Document | undefined { if (this.documents && this.documents.length) { return this.documents[0] } } accumulateTag(tagName: string) { this.accumulativeTags.push(tagName) } /** * collates multiple TEI-like documents. The result can * later be retrieved using e.g. the serialize() method. * * @param docs set of XML documents to collate */ collate(docs: Document[]) { this.documents = docs this.applyRule(this.rule, this.documents.map(d => d.documentElement)) // add last manifestation const manifestationList = this.documents[0]!.querySelector('manifestationList') if (manifestationList) { const manifestation = this.documents[this.documents.length-1].querySelector('manifestation') if (manifestation) { console.log('appended', manifestation.getAttribute('xml:id'), '\n') manifestationList.appendChild(manifestation) } } } /** * Internal method. It recursively iterates over the * user-defined rules and tries to apply them to the given * DOM elements. It functions accumulative: The first document * will be expanded with the readings from the other documents. * * @param rule the current rule to apply * @param contexts the DOM elements to apply the rule to */ private applyRule(rule: CollationRule, contexts: Element[]) { for (const name in rule) { const rightSide = rule[name] const nodeGenerator: NodeGenerator = Array.isArray(rightSide) ? rightSide[0] : rightSide contexts.map((e: Element) => addXmlIdsTo(name, `${name}-${generateUid()}`, e.ownerDocument)) const generateSeqNode = (e: Element) => { const xmlId = e.getAttribute('xml:id') return new SeqNode(`${xmlId}`, nodeGenerator(e)) } const columns: SeqNode[][] = contexts.map((e: Element) => { return [...e.querySelectorAll(name)].reduce((prev: Element[], curr: Element) => { const parentApp = curr.closest('app') if (parentApp && parentApp.querySelector('rdg') !== curr.parentElement) { return prev } else return [...prev, curr] }, []).map(generateSeqNode) }) columns.slice(1).forEach((column: SeqNode[], index) => { const element = contexts[index].querySelector('manifestation') const parentName = element?.parentElement?.nodeName if (parentName) { const hook = contexts[0].ownerDocument.querySelector(parentName) if (hook) { console.log('appended', element.getAttribute('xml:id'), '\n') hook.appendChild(element) } } const aligner = new Aligner(AlignType.Global) aligner.align(columns[0], column, { gapOpen: this.gapOpen, gapExt: this.gapExt }) const allPaths = aligner.retrieveAlignments() // TODO how to choose the position if the alignment // starts with a gap? let lastValidElement: Element | undefined let leftGapCache: Element[] = [] type AppAlignmentCache = { rdgPointer: Element identical: boolean elements: Element[] } let cache: AppAlignmentCache | null const path = allPaths.paths[0] path.reverse().forEach((pair: any, pathIndex: number) => { const el1 = findByXmlId(contexts[0].ownerDocument, pair[0]) const el2 = findByXmlId(contexts[index+1].ownerDocument, pair[1]) const seqNode1 = columns[0].find((node: SeqNode) => { return node.label === pair[0] }) const seqNode2 = column.find((node: SeqNode) => { return node.label === pair[1] }) const score = (seqNode1 && seqNode2) ? seqNode1.computeSimilarity(seqNode2) : -1 const sourceName1 = Array.from(contexts[0].ownerDocument.querySelectorAll('manifestation')).map((manifestation: Element) => manifestation.getAttribute('xml:id') || 'unknown-source').join(' ') const sourceName2 = sourceNameOfDocument(contexts[index+1].ownerDocument) || 'unknown-source1' if (!el1 && el2) { if (lastValidElement) { leftGapCache.push(el2) } // postpone working on that particular alignment // in case another alignment is following. If this // is the last alignment, make sure that the cache is still // being processed if (pathIndex !== path.length-1) return } // reasons to stop putting things into the left gap alignment cache // and start processing it: // - left is not a gap anymore (el1 is defined) // - we have reached the end of an alignment sequence if (leftGapCache.length > 0 && (el1 || pathIndex === path.length-1)) { if (lastValidElement) { if (lastValidElement.nextElementSibling?.nodeName === 'app') { findOrAppendRdg(lastValidElement.nextElementSibling, sourceName2, leftGapCache) } else { leftGapCache.forEach((e: Element) => { createCriticalApparatusAfter(this.documents![0], sourceName1, lastValidElement!, sourceName2, e) }) } leftGapCache = [] } } // saving the last valid element is necessary in order // to know where to place an for an // alignment that has material only in the second reading. if (el1) { lastValidElement = el1 } // does the aligned element already live inside ? // in that case first collect all the subsequent // elements that aligned to an element inside this , // then compare it to other s and decide // whether to append to the present material in the // cache to an existing or to create a new one let rdgPointer = el1?.closest('rdg') if (el1 && rdgPointer) { // create a new cache ... if (!cache) { cache = { rdgPointer, elements: el2 ? [el2] : [], identical: el2 ? (score === 1) : (rdgPointer.childElementCount === 0) } } // ... or fill the existing one else { if (!el2) { // TODO is this correct? cache.identical = false } else { cache.elements.push(el2) cache.identical = cache.identical && (score === 1) } } // ... and postpone working on that particular alignment // in case another alignment is following. If this // is the last alignment, make sure that the cache is still // being processed if (pathIndex !== path.length-1) return } // process the cache if (cache && (!rdgPointer || pathIndex === path.length-1)) { if (cache.identical) { cache.rdgPointer.setAttribute('source', `${cache.rdgPointer.getAttribute('source')} ${sourceName2}`) } else { // there still might be another // to which our one is identical. const enclosingApp = cache.rdgPointer.closest('app') if (!enclosingApp) { console.log(' may only appear inside of ') return } findOrAppendRdg(enclosingApp, sourceName2, cache.elements) } cache = null return } // right gap if (el1 && !el2) { // create new createCriticalApparatus(this.documents![0], sourceName1, el1, sourceName2, undefined) } // no gap else if (el1 && el2) { if (score !== 1 && (!el1.querySelector('app'))) { createCriticalApparatus(this.documents![0], sourceName1, el1, sourceName2, el2) } else if (Array.isArray(rightSide)) { // apply subsequent rules only if an apparatus // has not been inserted yet this.applyRule(rightSide[1], [el1, el2]) } } }) }) } } }