/** * Atomizer Pipeline * * Main orchestration for the atomizer-based document comparison. * Integrates atomization, LCS comparison, move detection, format detection, * and document reconstruction. */ import type { CompareResult, CompareStats, ReconstructionMode } from '../../compare-types.js'; import type { ComparisonUnitAtom, MoveDetectionSettings, FormatDetectionSettings } from '../../core-types.js'; import { type NumberingIntegrationOptions } from './numberingIntegration.js'; export { hasFldCharInsideDel, validateFieldStructure, type FieldStory, } from '../../shared/field-structure.js'; import { type FieldStory } from '../../shared/field-structure.js'; /** * Options for the atomizer pipeline. */ export interface AtomizerOptions { /** Author name for track changes. Default: "Comparison" */ author?: string; /** Timestamp for track changes. Default: current time */ date?: Date; /** Move detection settings */ moveDetection?: Partial; /** Format detection settings */ formatDetection?: Partial; /** Numbering integration settings */ numbering?: Partial; /** * Pre-compare normalization: merge adjacent siblings with identical formatting. * * This reduces overly-fragmented diffs without relying on atom-level cross-run text merging, * and can improve revision grouping in Word. * * Default: true. */ premergeRuns?: boolean; /** * How to reconstruct the output: * - 'rebuild': rebuild document.xml from atoms (best reject/accept idempotency) * - 'inplace': modify the revised document AST in place (experimental) * * Default: 'rebuild' */ reconstructionMode?: ReconstructionMode; } /** * Split a docx into per-story XML fragments for field-closure validation. * * Each footnote/endnote entry is treated as an isolated story: a complex * field whose `begin` and `end` markers straddle stories breaks Word's * field state machine. We therefore validate each `` and * `` entry independently rather than treating the whole * `footnotes.xml`/`endnotes.xml` as one stream. * * Accepts arrays of sidecar XMLs (one per source archive) so callers can * validate the union of entries from every archive that may contribute to the * final result. Step 12 of `compareDocumentsAtomizer` merges entries from a * mode-dependent source archive into the base archive; passing both archives' * sidecars guarantees that whichever path the merge takes, the entries it * could publish have already been screened. Duplicates (same `w:id` in both * archives) yield redundant but harmless validation work. * * Header/footer stories are not yet covered — they require relationship * walking to enumerate `headerN.xml`/`footerN.xml`. * * @conformance ECMA-376 edition 5, Part 4 § 17.16.5 * @see https://github.com/UseJunior/safe-docx/issues/212 */ export declare function splitStories(documentXml: string, footnotesXmls: ReadonlyArray, endnotesXmls: ReadonlyArray): FieldStory[]; /** * Compare two DOCX documents using the atomizer-based approach. * * Pipeline steps: * 1. Load DOCX archives * 2. Extract document.xml * 3. Parse to WmlElement trees * 4. Atomize both documents * 5. (Optional) Apply numbering virtualization * 6. Run LCS on atom hashes * 7. Mark correlation status * 8. Run move detection * 9. Run format detection * 10. Reconstruct document with track changes * 11. Save and return result * * @param original - Original document as Buffer * @param revised - Revised document as Buffer * @param options - Pipeline options * @returns Comparison result with track changes document */ export declare function compareDocumentsAtomizer(original: Buffer, revised: Buffer, options?: AtomizerOptions): Promise; export interface AuxiliaryMergeResult { mergedIds: Set; createdPart: boolean; } /** * Compute comparison statistics from merged atoms. * * Range counts are contiguous same-status runs in the merged atom stream, scoped * to a paragraph. Atom counts remain available under explicit names for callers * that need the old granular benchmark signal. */ export declare function computeAtomizerStats(mergedAtoms: ComparisonUnitAtom[]): CompareStats; //# sourceMappingURL=pipeline.d.ts.map